From 47f56e4247255da5da66a926d2760af06136a5ea Mon Sep 17 00:00:00 2001
From: samahakk <samahakk@hu-berlin.de>
Date: Thu, 23 Sep 2021 00:51:31 +0200
Subject: [PATCH] Standardabweichungen added

---
 fewshot/emotion_semeval_fewshot.ipynb   | 5935 ++++++++++----------
 fewshot/emotion_yinetall_fewshot.ipynb  | 5890 ++++++++++----------
 fewshot/sentiment_amazon_fewshot.ipynb  | 6563 +++++++++++-----------
 fewshot/sentiment_twitter_fewshot.ipynb | 6035 ++++++++++----------
 fewshot/topic_huffpost_fewshot.ipynb    | 6036 ++++++++++----------
 fewshot/topic_yin_fewshot.ipynb         | 6027 ++++++++++----------
 oneshot/emotion_semeval_oneshot.ipynb   | 4914 ++++++++--------
 oneshot/emotion_yinetall_oneshot.ipynb  | 4182 +++++++-------
 oneshot/sentiment_amazon_oneshot.ipynb  | 3087 +++++-----
 oneshot/sentiment_twitter_oneshot.ipynb | 3654 ++++++------
 oneshot/topic_huffpost_oneshot.ipynb    | 5210 ++++++++---------
 oneshot/topic_yin_oneshot.ipynb         | 6813 ++++++++++++-----------
 12 files changed, 32392 insertions(+), 31954 deletions(-)

diff --git a/fewshot/emotion_semeval_fewshot.ipynb b/fewshot/emotion_semeval_fewshot.ipynb
index d82195c..a11f556 100644
--- a/fewshot/emotion_semeval_fewshot.ipynb
+++ b/fewshot/emotion_semeval_fewshot.ipynb
@@ -39,7 +39,7 @@
    "source": [
     "# GRAKA auswählen\n",
     "import flair, torch\n",
-    "flair.device = torch.device('cuda:1') "
+    "flair.device = torch.device('cuda:0') "
    ]
   },
   {
@@ -74,25 +74,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:36,916 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:01:59,276 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:21:43,742 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:02:06,120 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 59411.46it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 60959.40it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:43,746 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
-      "2021-09-08 01:21:43,911 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:43,913 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:02:06,124 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
+      "2021-09-21 21:02:06,128 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:06,130 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -405,24 +405,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:43,913 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:43,914 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:21:43,914 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:43,914 Parameters:\n",
-      "2021-09-08 01:21:43,915  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:21:43,915  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:21:43,915  - patience: \"3\"\n",
-      "2021-09-08 01:21:43,916  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:21:43,916  - max_epochs: \"10\"\n",
-      "2021-09-08 01:21:43,916  - shuffle: \"True\"\n",
-      "2021-09-08 01:21:43,917  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:21:43,917  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:21:43,917 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:43,918 Model training base path: \"None\"\n",
-      "2021-09-08 01:21:43,918 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:43,918 Device: cuda:1\n",
-      "2021-09-08 01:21:43,919 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:43,919 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:02:06,130 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:06,131 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:02:06,131 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:06,131 Parameters:\n",
+      "2021-09-21 21:02:06,132  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:02:06,132  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:02:06,132  - patience: \"3\"\n",
+      "2021-09-21 21:02:06,132  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:02:06,133  - max_epochs: \"10\"\n",
+      "2021-09-21 21:02:06,133  - shuffle: \"True\"\n",
+      "2021-09-21 21:02:06,133  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:02:06,134  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:02:06,134 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:06,134 Model training base path: \"None\"\n",
+      "2021-09-21 21:02:06,134 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:06,135 Device: cuda:0\n",
+      "2021-09-21 21:02:06,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:06,135 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:02:06,142 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -436,215 +437,213 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:44,031 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:44,377 epoch 1 - iter 7/73 - loss 0.32996139 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:21:44,704 epoch 1 - iter 14/73 - loss 0.49719035 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:21:45,060 epoch 1 - iter 21/73 - loss 0.53646894 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 01:21:45,392 epoch 1 - iter 28/73 - loss 0.57629372 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 01:21:45,729 epoch 1 - iter 35/73 - loss 0.59419281 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 01:21:46,066 epoch 1 - iter 42/73 - loss 0.57894869 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:21:46,411 epoch 1 - iter 49/73 - loss 0.53123762 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 01:21:46,741 epoch 1 - iter 56/73 - loss 0.56065674 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:21:47,071 epoch 1 - iter 63/73 - loss 0.57596918 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:21:47,396 epoch 1 - iter 70/73 - loss 0.56639758 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:21:47,535 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:47,535 EPOCH 1 done: loss 0.5492 - lr 0.0200000\n",
-      "2021-09-08 01:21:47,664 DEV : loss 1.069263219833374 - score 0.125\n",
-      "2021-09-08 01:21:47,665 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:02:06,473 epoch 1 - iter 7/73 - loss 0.30112487 - samples/sec: 22.29 - lr: 0.020000\n",
+      "2021-09-21 21:02:06,791 epoch 1 - iter 14/73 - loss 0.35199486 - samples/sec: 22.06 - lr: 0.020000\n",
+      "2021-09-21 21:02:07,105 epoch 1 - iter 21/73 - loss 0.39454277 - samples/sec: 22.31 - lr: 0.020000\n",
+      "2021-09-21 21:02:07,425 epoch 1 - iter 28/73 - loss 0.47428859 - samples/sec: 21.94 - lr: 0.020000\n",
+      "2021-09-21 21:02:07,743 epoch 1 - iter 35/73 - loss 0.52510606 - samples/sec: 22.03 - lr: 0.020000\n",
+      "2021-09-21 21:02:08,068 epoch 1 - iter 42/73 - loss 0.50562697 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 21:02:08,392 epoch 1 - iter 49/73 - loss 0.52155319 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 21:02:08,706 epoch 1 - iter 56/73 - loss 0.55040968 - samples/sec: 22.33 - lr: 0.020000\n",
+      "2021-09-21 21:02:09,018 epoch 1 - iter 63/73 - loss 0.55530092 - samples/sec: 22.46 - lr: 0.020000\n",
+      "2021-09-21 21:02:09,319 epoch 1 - iter 70/73 - loss 0.56495987 - samples/sec: 23.32 - lr: 0.020000\n",
+      "2021-09-21 21:02:09,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:09,451 EPOCH 1 done: loss 0.5673 - lr 0.0200000\n",
+      "2021-09-21 21:02:09,585 DEV : loss 0.47073835134506226 - score 0.25\n",
+      "2021-09-21 21:02:09,585 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:21:51,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:52,340 epoch 2 - iter 7/73 - loss 0.91173007 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 01:21:52,676 epoch 2 - iter 14/73 - loss 0.93345671 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 01:21:53,001 epoch 2 - iter 21/73 - loss 0.85877856 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:21:53,319 epoch 2 - iter 28/73 - loss 0.80958569 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:21:53,637 epoch 2 - iter 35/73 - loss 0.77971711 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 01:21:53,948 epoch 2 - iter 42/73 - loss 0.75517836 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,252 epoch 2 - iter 49/73 - loss 0.73577904 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,555 epoch 2 - iter 56/73 - loss 0.72402976 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,864 epoch 2 - iter 63/73 - loss 0.70760907 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:21:55,171 epoch 2 - iter 70/73 - loss 0.71212455 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:21:55,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:55,303 EPOCH 2 done: loss 0.7084 - lr 0.0200000\n",
-      "2021-09-08 01:21:55,427 DEV : loss 0.44753509759902954 - score 0.125\n",
-      "2021-09-08 01:21:55,428 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:22:15,034 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:15,358 epoch 3 - iter 7/73 - loss 0.66365601 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 01:22:15,664 epoch 3 - iter 14/73 - loss 0.65411494 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:22:15,969 epoch 3 - iter 21/73 - loss 0.64891718 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,274 epoch 3 - iter 28/73 - loss 0.64594888 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,583 epoch 3 - iter 35/73 - loss 0.65067181 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,891 epoch 3 - iter 42/73 - loss 0.64140087 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:22:17,205 epoch 3 - iter 49/73 - loss 0.63886077 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 01:22:17,515 epoch 3 - iter 56/73 - loss 0.64722412 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 01:22:17,840 epoch 3 - iter 63/73 - loss 0.64623682 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:22:18,161 epoch 3 - iter 70/73 - loss 0.64439642 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 01:22:18,301 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:18,302 EPOCH 3 done: loss 0.6401 - lr 0.0200000\n",
-      "2021-09-08 01:22:18,529 DEV : loss 0.41091054677963257 - score 0.0\n",
-      "2021-09-08 01:22:18,530 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:22:18,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:18,943 epoch 4 - iter 7/73 - loss 0.67651004 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 01:22:19,273 epoch 4 - iter 14/73 - loss 0.66200518 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 01:22:19,600 epoch 4 - iter 21/73 - loss 0.66139727 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 01:22:19,935 epoch 4 - iter 28/73 - loss 0.66372950 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 01:22:20,279 epoch 4 - iter 35/73 - loss 0.68323504 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 01:22:20,584 epoch 4 - iter 42/73 - loss 0.67431162 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 01:22:20,888 epoch 4 - iter 49/73 - loss 0.66829329 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:22:21,197 epoch 4 - iter 56/73 - loss 0.66699603 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:22:21,503 epoch 4 - iter 63/73 - loss 0.66331886 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:22:21,809 epoch 4 - iter 70/73 - loss 0.65921005 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 01:22:21,941 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:21,941 EPOCH 4 done: loss 0.6553 - lr 0.0200000\n",
-      "2021-09-08 01:22:22,069 DEV : loss 0.5215737223625183 - score 0.125\n",
-      "2021-09-08 01:22:22,070 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:22:22,072 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:22,390 epoch 5 - iter 7/73 - loss 0.58592498 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:22:22,703 epoch 5 - iter 14/73 - loss 0.62627939 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,009 epoch 5 - iter 21/73 - loss 0.63548091 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,316 epoch 5 - iter 28/73 - loss 0.65258885 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,625 epoch 5 - iter 35/73 - loss 0.65865639 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,935 epoch 5 - iter 42/73 - loss 0.65627115 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:22:24,245 epoch 5 - iter 49/73 - loss 0.65692026 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:22:24,559 epoch 5 - iter 56/73 - loss 0.65850635 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 01:22:24,866 epoch 5 - iter 63/73 - loss 0.65819567 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:22:25,173 epoch 5 - iter 70/73 - loss 0.65717002 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 01:22:25,305 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:25,306 EPOCH 5 done: loss 0.6561 - lr 0.0200000\n",
-      "2021-09-08 01:22:25,529 DEV : loss 0.5251460075378418 - score 0.125\n",
-      "2021-09-08 01:22:25,530 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:22:25,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:25,931 epoch 6 - iter 7/73 - loss 0.59428835 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:22:26,238 epoch 6 - iter 14/73 - loss 0.59802067 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:22:26,547 epoch 6 - iter 21/73 - loss 0.61485472 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:22:26,855 epoch 6 - iter 28/73 - loss 0.63060524 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:22:27,164 epoch 6 - iter 35/73 - loss 0.64015367 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:22:27,471 epoch 6 - iter 42/73 - loss 0.63330423 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:22:27,778 epoch 6 - iter 49/73 - loss 0.64058837 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,087 epoch 6 - iter 56/73 - loss 0.64980485 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,395 epoch 6 - iter 63/73 - loss 0.64824977 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,701 epoch 6 - iter 70/73 - loss 0.64705402 - samples/sec: 22.97 - lr: 0.020000\n"
+      "2021-09-21 21:02:19,672 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:19,992 epoch 2 - iter 7/73 - loss 0.64994478 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 21:02:20,297 epoch 2 - iter 14/73 - loss 0.63568074 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 21:02:20,595 epoch 2 - iter 21/73 - loss 0.63750708 - samples/sec: 23.58 - lr: 0.020000\n",
+      "2021-09-21 21:02:20,893 epoch 2 - iter 28/73 - loss 0.63621030 - samples/sec: 23.50 - lr: 0.020000\n",
+      "2021-09-21 21:02:21,196 epoch 2 - iter 35/73 - loss 0.63527067 - samples/sec: 23.15 - lr: 0.020000\n",
+      "2021-09-21 21:02:21,529 epoch 2 - iter 42/73 - loss 0.64369663 - samples/sec: 21.09 - lr: 0.020000\n",
+      "2021-09-21 21:02:21,830 epoch 2 - iter 49/73 - loss 0.64759412 - samples/sec: 23.27 - lr: 0.020000\n",
+      "2021-09-21 21:02:22,169 epoch 2 - iter 56/73 - loss 0.64867044 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 21:02:22,477 epoch 2 - iter 63/73 - loss 0.64778821 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 21:02:22,782 epoch 2 - iter 70/73 - loss 0.64721616 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 21:02:22,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:22,915 EPOCH 2 done: loss 0.6458 - lr 0.0200000\n",
+      "2021-09-21 21:02:23,053 DEV : loss 0.5282306671142578 - score 0.25\n",
+      "2021-09-21 21:02:23,054 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:02:23,076 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:23,393 epoch 3 - iter 7/73 - loss 0.64429760 - samples/sec: 23.09 - lr: 0.020000\n",
+      "2021-09-21 21:02:23,692 epoch 3 - iter 14/73 - loss 0.64020328 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 21:02:23,991 epoch 3 - iter 21/73 - loss 0.63812758 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:02:24,310 epoch 3 - iter 28/73 - loss 0.63264059 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 21:02:24,621 epoch 3 - iter 35/73 - loss 0.63593222 - samples/sec: 22.60 - lr: 0.020000\n",
+      "2021-09-21 21:02:24,919 epoch 3 - iter 42/73 - loss 0.63776639 - samples/sec: 23.58 - lr: 0.020000\n",
+      "2021-09-21 21:02:25,219 epoch 3 - iter 49/73 - loss 0.64401652 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:02:25,516 epoch 3 - iter 56/73 - loss 0.64324300 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 21:02:25,816 epoch 3 - iter 63/73 - loss 0.64206475 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 21:02:26,114 epoch 3 - iter 70/73 - loss 0.64119928 - samples/sec: 23.55 - lr: 0.020000\n",
+      "2021-09-21 21:02:26,244 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:26,245 EPOCH 3 done: loss 0.6349 - lr 0.0200000\n",
+      "2021-09-21 21:02:26,491 DEV : loss 0.4331118166446686 - score 0.0\n",
+      "2021-09-21 21:02:26,492 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:02:26,552 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:26,864 epoch 4 - iter 7/73 - loss 0.68244569 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 21:02:27,170 epoch 4 - iter 14/73 - loss 0.68460797 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 21:02:27,484 epoch 4 - iter 21/73 - loss 0.67047442 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 21:02:27,799 epoch 4 - iter 28/73 - loss 0.67076099 - samples/sec: 22.28 - lr: 0.020000\n",
+      "2021-09-21 21:02:28,126 epoch 4 - iter 35/73 - loss 0.66616665 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 21:02:28,447 epoch 4 - iter 42/73 - loss 0.66315985 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 21:02:28,770 epoch 4 - iter 49/73 - loss 0.66070874 - samples/sec: 21.68 - lr: 0.020000\n",
+      "2021-09-21 21:02:29,091 epoch 4 - iter 56/73 - loss 0.65906959 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 21:02:29,412 epoch 4 - iter 63/73 - loss 0.66077715 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 21:02:29,727 epoch 4 - iter 70/73 - loss 0.66063814 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 21:02:29,869 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:29,869 EPOCH 4 done: loss 0.6598 - lr 0.0200000\n",
+      "2021-09-21 21:02:30,007 DEV : loss 0.45423150062561035 - score 0.125\n",
+      "2021-09-21 21:02:30,008 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:02:30,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:30,342 epoch 5 - iter 7/73 - loss 0.66053975 - samples/sec: 22.09 - lr: 0.020000\n",
+      "2021-09-21 21:02:30,662 epoch 5 - iter 14/73 - loss 0.64042121 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 21:02:30,966 epoch 5 - iter 21/73 - loss 0.64733073 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 21:02:31,321 epoch 5 - iter 28/73 - loss 0.64374221 - samples/sec: 19.76 - lr: 0.020000\n",
+      "2021-09-21 21:02:31,683 epoch 5 - iter 35/73 - loss 0.63957469 - samples/sec: 19.39 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,039 epoch 5 - iter 42/73 - loss 0.63977868 - samples/sec: 19.75 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,404 epoch 5 - iter 49/73 - loss 0.63665052 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,768 epoch 5 - iter 56/73 - loss 0.63654760 - samples/sec: 19.29 - lr: 0.020000\n",
+      "2021-09-21 21:02:33,090 epoch 5 - iter 63/73 - loss 0.63473230 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 21:02:33,410 epoch 5 - iter 70/73 - loss 0.63647960 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 21:02:33,545 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:33,546 EPOCH 5 done: loss 0.6368 - lr 0.0200000\n",
+      "2021-09-21 21:02:33,772 DEV : loss 0.4738270044326782 - score 0.25\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:02:33,773 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:02:33,846 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:34,160 epoch 6 - iter 7/73 - loss 0.61354278 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:02:34,461 epoch 6 - iter 14/73 - loss 0.60498759 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:02:34,761 epoch 6 - iter 21/73 - loss 0.64448256 - samples/sec: 23.41 - lr: 0.010000\n",
+      "2021-09-21 21:02:35,058 epoch 6 - iter 28/73 - loss 0.63483782 - samples/sec: 23.61 - lr: 0.010000\n",
+      "2021-09-21 21:02:35,359 epoch 6 - iter 35/73 - loss 0.64293086 - samples/sec: 23.26 - lr: 0.010000\n",
+      "2021-09-21 21:02:35,660 epoch 6 - iter 42/73 - loss 0.64673567 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:02:35,957 epoch 6 - iter 49/73 - loss 0.64788195 - samples/sec: 23.59 - lr: 0.010000\n",
+      "2021-09-21 21:02:36,254 epoch 6 - iter 56/73 - loss 0.64358919 - samples/sec: 23.59 - lr: 0.010000\n",
+      "2021-09-21 21:02:36,553 epoch 6 - iter 63/73 - loss 0.65046153 - samples/sec: 23.48 - lr: 0.010000\n",
+      "2021-09-21 21:02:36,850 epoch 6 - iter 70/73 - loss 0.64966088 - samples/sec: 23.63 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:22:28,832 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:28,833 EPOCH 6 done: loss 0.6460 - lr 0.0200000\n",
-      "2021-09-08 01:22:28,958 DEV : loss 0.5554381012916565 - score 0.25\n",
-      "2021-09-08 01:22:28,958 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:02:36,978 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:38,422 EPOCH 6 done: loss 0.6505 - lr 0.0100000\n",
+      "2021-09-21 21:02:39,608 DEV : loss 0.47255972027778625 - score 0.25\n",
+      "2021-09-21 21:02:39,610 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:02:39,657 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:40,054 epoch 7 - iter 7/73 - loss 0.61907463 - samples/sec: 18.53 - lr: 0.010000\n",
+      "2021-09-21 21:02:40,390 epoch 7 - iter 14/73 - loss 0.62027066 - samples/sec: 20.94 - lr: 0.010000\n",
+      "2021-09-21 21:02:40,708 epoch 7 - iter 21/73 - loss 0.62739042 - samples/sec: 22.06 - lr: 0.010000\n",
+      "2021-09-21 21:02:41,060 epoch 7 - iter 28/73 - loss 0.62138266 - samples/sec: 19.95 - lr: 0.010000\n",
+      "2021-09-21 21:02:41,361 epoch 7 - iter 35/73 - loss 0.62954712 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:02:41,660 epoch 7 - iter 42/73 - loss 0.63726265 - samples/sec: 23.48 - lr: 0.010000\n",
+      "2021-09-21 21:02:41,958 epoch 7 - iter 49/73 - loss 0.63933832 - samples/sec: 23.58 - lr: 0.010000\n",
+      "2021-09-21 21:02:42,261 epoch 7 - iter 56/73 - loss 0.63877314 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 21:02:42,559 epoch 7 - iter 63/73 - loss 0.63748304 - samples/sec: 23.53 - lr: 0.010000\n",
+      "2021-09-21 21:02:42,860 epoch 7 - iter 70/73 - loss 0.63605707 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:02:42,989 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:42,989 EPOCH 7 done: loss 0.6367 - lr 0.0100000\n",
+      "2021-09-21 21:02:43,263 DEV : loss 0.523113489151001 - score 0.25\n",
+      "2021-09-21 21:02:43,263 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:02:43,378 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:43,696 epoch 8 - iter 7/73 - loss 0.62236793 - samples/sec: 23.02 - lr: 0.010000\n",
+      "2021-09-21 21:02:43,994 epoch 8 - iter 14/73 - loss 0.60897104 - samples/sec: 23.55 - lr: 0.010000\n",
+      "2021-09-21 21:02:44,295 epoch 8 - iter 21/73 - loss 0.62753214 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:02:44,598 epoch 8 - iter 28/73 - loss 0.64098048 - samples/sec: 23.18 - lr: 0.010000\n",
+      "2021-09-21 21:02:44,897 epoch 8 - iter 35/73 - loss 0.64096842 - samples/sec: 23.42 - lr: 0.010000\n",
+      "2021-09-21 21:02:45,198 epoch 8 - iter 42/73 - loss 0.65287740 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:02:45,494 epoch 8 - iter 49/73 - loss 0.65649205 - samples/sec: 23.71 - lr: 0.010000\n",
+      "2021-09-21 21:02:45,794 epoch 8 - iter 56/73 - loss 0.65249813 - samples/sec: 23.35 - lr: 0.010000\n",
+      "2021-09-21 21:02:46,164 epoch 8 - iter 63/73 - loss 0.65025191 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 21:02:46,555 epoch 8 - iter 70/73 - loss 0.65071134 - samples/sec: 17.97 - lr: 0.010000\n",
+      "2021-09-21 21:02:46,719 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:46,720 EPOCH 8 done: loss 0.6501 - lr 0.0100000\n",
+      "2021-09-21 21:02:46,907 DEV : loss 0.4976595342159271 - score 0.25\n",
+      "2021-09-21 21:02:46,908 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:02:46,911 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:47,305 epoch 9 - iter 7/73 - loss 0.64224017 - samples/sec: 18.88 - lr: 0.010000\n",
+      "2021-09-21 21:02:47,659 epoch 9 - iter 14/73 - loss 0.63161708 - samples/sec: 19.83 - lr: 0.010000\n",
+      "2021-09-21 21:02:48,024 epoch 9 - iter 21/73 - loss 0.63716978 - samples/sec: 19.21 - lr: 0.010000\n",
+      "2021-09-21 21:02:48,392 epoch 9 - iter 28/73 - loss 0.63593809 - samples/sec: 19.03 - lr: 0.010000\n",
+      "2021-09-21 21:02:48,764 epoch 9 - iter 35/73 - loss 0.63448184 - samples/sec: 18.87 - lr: 0.010000\n",
+      "2021-09-21 21:02:49,136 epoch 9 - iter 42/73 - loss 0.63650731 - samples/sec: 18.86 - lr: 0.010000\n",
+      "2021-09-21 21:02:49,491 epoch 9 - iter 49/73 - loss 0.63829697 - samples/sec: 19.74 - lr: 0.010000\n",
+      "2021-09-21 21:02:49,849 epoch 9 - iter 56/73 - loss 0.63542357 - samples/sec: 19.62 - lr: 0.010000\n",
+      "2021-09-21 21:02:50,234 epoch 9 - iter 63/73 - loss 0.63495477 - samples/sec: 18.22 - lr: 0.010000\n",
+      "2021-09-21 21:02:50,620 epoch 9 - iter 70/73 - loss 0.63570137 - samples/sec: 18.17 - lr: 0.010000\n",
+      "2021-09-21 21:02:50,799 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:50,800 EPOCH 9 done: loss 0.6357 - lr 0.0100000\n",
+      "2021-09-21 21:02:51,021 DEV : loss 0.4642232358455658 - score 0.25\n",
+      "2021-09-21 21:02:51,022 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:22:33,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:33,517 epoch 7 - iter 7/73 - loss 0.66589883 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 01:22:33,826 epoch 7 - iter 14/73 - loss 0.65144980 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,134 epoch 7 - iter 21/73 - loss 0.65178873 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,448 epoch 7 - iter 28/73 - loss 0.65813513 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,757 epoch 7 - iter 35/73 - loss 0.65619759 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 01:22:35,063 epoch 7 - iter 42/73 - loss 0.65062712 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:22:35,368 epoch 7 - iter 49/73 - loss 0.64827206 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:22:35,676 epoch 7 - iter 56/73 - loss 0.64619906 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:22:35,983 epoch 7 - iter 63/73 - loss 0.64502469 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:22:36,287 epoch 7 - iter 70/73 - loss 0.64200450 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 01:22:36,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:36,420 EPOCH 7 done: loss 0.6415 - lr 0.0200000\n",
-      "2021-09-08 01:22:36,546 DEV : loss 0.45512890815734863 - score 0.125\n",
-      "2021-09-08 01:22:36,547 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:22:36,550 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:36,870 epoch 8 - iter 7/73 - loss 0.64451982 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:22:37,178 epoch 8 - iter 14/73 - loss 0.64218905 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:22:37,488 epoch 8 - iter 21/73 - loss 0.65197425 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:22:37,801 epoch 8 - iter 28/73 - loss 0.65329382 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 01:22:38,107 epoch 8 - iter 35/73 - loss 0.65251642 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:22:38,415 epoch 8 - iter 42/73 - loss 0.65002729 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:22:38,722 epoch 8 - iter 49/73 - loss 0.64937462 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 01:22:39,031 epoch 8 - iter 56/73 - loss 0.64728334 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 01:22:39,337 epoch 8 - iter 63/73 - loss 0.64700615 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:22:39,644 epoch 8 - iter 70/73 - loss 0.64656329 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:22:39,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:39,778 EPOCH 8 done: loss 0.6476 - lr 0.0200000\n",
-      "2021-09-08 01:22:40,009 DEV : loss 0.5799890756607056 - score 0.125\n",
-      "2021-09-08 01:22:40,010 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:22:40,101 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:40,427 epoch 9 - iter 7/73 - loss 0.65398281 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 01:22:40,734 epoch 9 - iter 14/73 - loss 0.62024407 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:22:41,039 epoch 9 - iter 21/73 - loss 0.62922167 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:22:41,348 epoch 9 - iter 28/73 - loss 0.61028736 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:22:41,654 epoch 9 - iter 35/73 - loss 0.62361822 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:22:41,961 epoch 9 - iter 42/73 - loss 0.62123706 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:22:42,269 epoch 9 - iter 49/73 - loss 0.62948989 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:22:42,573 epoch 9 - iter 56/73 - loss 0.63029128 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:22:42,878 epoch 9 - iter 63/73 - loss 0.63524953 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:22:43,184 epoch 9 - iter 70/73 - loss 0.63591872 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:22:43,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:43,320 EPOCH 9 done: loss 0.6373 - lr 0.0200000\n",
-      "2021-09-08 01:22:43,449 DEV : loss 0.4553138017654419 - score 0.125\n",
-      "2021-09-08 01:22:43,450 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:22:43,452 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:43,778 epoch 10 - iter 7/73 - loss 0.63621052 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 01:22:44,086 epoch 10 - iter 14/73 - loss 0.62486201 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:22:44,393 epoch 10 - iter 21/73 - loss 0.62719847 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 01:22:44,703 epoch 10 - iter 28/73 - loss 0.64028598 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 01:22:45,010 epoch 10 - iter 35/73 - loss 0.64130457 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:22:45,317 epoch 10 - iter 42/73 - loss 0.63908187 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:22:45,627 epoch 10 - iter 49/73 - loss 0.64265331 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:22:45,932 epoch 10 - iter 56/73 - loss 0.64319742 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 01:22:46,237 epoch 10 - iter 63/73 - loss 0.64052346 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:22:46,545 epoch 10 - iter 70/73 - loss 0.64307304 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:22:46,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:46,677 EPOCH 10 done: loss 0.6436 - lr 0.0200000\n",
-      "2021-09-08 01:22:46,933 DEV : loss 0.48801830410957336 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:22:46,934 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:22:52,496 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:52,497 Testing using best model ...\n",
-      "2021-09-08 01:22:52,521 loading file None/best-model.pt\n",
+      "2021-09-21 21:02:55,961 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:56,376 epoch 10 - iter 7/73 - loss 0.64028059 - samples/sec: 17.82 - lr: 0.010000\n",
+      "2021-09-21 21:02:56,768 epoch 10 - iter 14/73 - loss 0.64030570 - samples/sec: 17.94 - lr: 0.010000\n",
+      "2021-09-21 21:02:57,150 epoch 10 - iter 21/73 - loss 0.64406672 - samples/sec: 18.37 - lr: 0.010000\n",
+      "2021-09-21 21:02:57,535 epoch 10 - iter 28/73 - loss 0.63636413 - samples/sec: 18.20 - lr: 0.010000\n",
+      "2021-09-21 21:02:57,872 epoch 10 - iter 35/73 - loss 0.65033398 - samples/sec: 20.85 - lr: 0.010000\n",
+      "2021-09-21 21:02:58,179 epoch 10 - iter 42/73 - loss 0.64749379 - samples/sec: 22.92 - lr: 0.010000\n",
+      "2021-09-21 21:02:58,487 epoch 10 - iter 49/73 - loss 0.64296457 - samples/sec: 22.77 - lr: 0.010000\n",
+      "2021-09-21 21:02:58,794 epoch 10 - iter 56/73 - loss 0.64124677 - samples/sec: 22.89 - lr: 0.010000\n",
+      "2021-09-21 21:02:59,102 epoch 10 - iter 63/73 - loss 0.64005832 - samples/sec: 22.80 - lr: 0.010000\n",
+      "2021-09-21 21:02:59,407 epoch 10 - iter 70/73 - loss 0.64037207 - samples/sec: 22.95 - lr: 0.010000\n",
+      "2021-09-21 21:02:59,539 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:59,540 EPOCH 10 done: loss 0.6411 - lr 0.0100000\n",
+      "2021-09-21 21:03:07,813 DEV : loss 0.5069143772125244 - score 0.25\n",
+      "2021-09-21 21:03:07,815 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:03:14,681 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:14,682 Testing using best model ...\n",
+      "2021-09-21 21:03:14,683 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:22:58,217 \t0.0\n",
-      "2021-09-08 01:22:58,218 \n",
+      "2021-09-21 21:03:21,344 \t0.1111\n",
+      "2021-09-21 21:03:21,345 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.1111\n",
+      "- F-score (macro) 0.0247\n",
+      "- Accuracy 0.1111\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "         joy     0.0000    0.0000    0.0000         1\n",
+      "         joy     0.0000    0.0000    0.0000         2\n",
       "    surprise     0.0000    0.0000    0.0000         1\n",
       "        love     0.0000    0.0000    0.0000         0\n",
       "     disgust     0.0000    0.0000    0.0000         1\n",
-      "        fear     0.0000    0.0000    0.0000         0\n",
+      "        fear     0.0000    0.0000    0.0000         1\n",
       "       anger     0.0000    0.0000    0.0000         1\n",
-      "       guilt     0.0000    0.0000    0.0000         3\n",
-      "       shame     0.0000    0.0000    0.0000         1\n",
+      "       guilt     0.0000    0.0000    0.0000         1\n",
+      "       shame     0.1250    1.0000    0.2222         1\n",
       "     sadness     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "   micro avg     0.0000    0.0000    0.0000         9\n",
-      "   macro avg     0.0000    0.0000    0.0000         9\n",
-      "weighted avg     0.0000    0.0000    0.0000         9\n",
-      " samples avg     0.0000    0.0000    0.0000         9\n",
+      "   micro avg     0.1111    0.1111    0.1111         9\n",
+      "   macro avg     0.0139    0.1111    0.0247         9\n",
+      "weighted avg     0.0139    0.1111    0.0247         9\n",
+      " samples avg     0.1111    0.1111    0.1111         9\n",
       "\n",
-      "2021-09-08 01:22:58,218 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:35,890 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:03:21,345 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:18,931 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:23:40,076 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:04:30,239 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 48653.69it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 51051.35it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:23:40,079 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
-      "2021-09-08 01:23:40,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,097 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:04:30,242 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
+      "2021-09-21 21:04:30,373 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:30,375 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -957,25 +956,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:23:40,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,098 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:23:40,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,098 Parameters:\n",
-      "2021-09-08 01:23:40,099  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:23:40,099  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:23:40,099  - patience: \"3\"\n",
-      "2021-09-08 01:23:40,100  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:23:40,100  - max_epochs: \"10\"\n",
-      "2021-09-08 01:23:40,100  - shuffle: \"True\"\n",
-      "2021-09-08 01:23:40,100  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:23:40,101  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:23:40,101 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,101 Model training base path: \"None\"\n",
-      "2021-09-08 01:23:40,102 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,102 Device: cuda:1\n",
-      "2021-09-08 01:23:40,102 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,102 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:23:40,130 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:04:30,376 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:30,376 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:04:30,376 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:30,376 Parameters:\n",
+      "2021-09-21 21:04:30,377  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:04:30,377  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:04:30,377  - patience: \"3\"\n",
+      "2021-09-21 21:04:30,378  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:04:30,378  - max_epochs: \"10\"\n",
+      "2021-09-21 21:04:30,378  - shuffle: \"True\"\n",
+      "2021-09-21 21:04:30,378  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:04:30,379  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:04:30,379 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:30,379 Model training base path: \"None\"\n",
+      "2021-09-21 21:04:30,380 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:30,380 Device: cuda:0\n",
+      "2021-09-21 21:04:30,380 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:30,380 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -989,214 +987,215 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:23:40,466 epoch 1 - iter 7/73 - loss 0.32473874 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 01:23:40,797 epoch 1 - iter 14/73 - loss 0.56301667 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 01:23:41,125 epoch 1 - iter 21/73 - loss 0.60858424 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:23:41,447 epoch 1 - iter 28/73 - loss 0.55296939 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 01:23:41,767 epoch 1 - iter 35/73 - loss 0.49936003 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 01:23:42,100 epoch 1 - iter 42/73 - loss 0.56341036 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:23:42,424 epoch 1 - iter 49/73 - loss 0.49576140 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 01:23:42,755 epoch 1 - iter 56/73 - loss 0.52903851 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 01:23:43,084 epoch 1 - iter 63/73 - loss 0.54396308 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:23:43,412 epoch 1 - iter 70/73 - loss 0.56172376 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:23:43,554 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:43,555 EPOCH 1 done: loss 0.5634 - lr 0.0200000\n",
-      "2021-09-08 01:23:43,680 DEV : loss 0.42677468061447144 - score 0.0\n",
-      "2021-09-08 01:23:43,680 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:04:30,561 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:31,153 epoch 1 - iter 7/73 - loss 0.21616299 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 21:04:31,716 epoch 1 - iter 14/73 - loss 0.45696082 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 21:04:32,109 epoch 1 - iter 21/73 - loss 0.50869023 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 21:04:32,616 epoch 1 - iter 28/73 - loss 0.56591847 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,156 epoch 1 - iter 35/73 - loss 0.57796296 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,705 epoch 1 - iter 42/73 - loss 0.59304547 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,251 epoch 1 - iter 49/73 - loss 0.59768341 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,749 epoch 1 - iter 56/73 - loss 0.60628419 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,330 epoch 1 - iter 63/73 - loss 0.61135515 - samples/sec: 12.08 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,898 epoch 1 - iter 70/73 - loss 0.60812958 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,142 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:36,143 EPOCH 1 done: loss 0.6015 - lr 0.0200000\n",
+      "2021-09-21 21:04:36,697 DEV : loss 0.40474802255630493 - score 0.0\n",
+      "2021-09-21 21:04:36,697 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:23:49,504 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:49,852 epoch 2 - iter 7/73 - loss 0.61121003 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:23:50,181 epoch 2 - iter 14/73 - loss 0.65599348 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 01:23:50,512 epoch 2 - iter 21/73 - loss 0.65463857 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 01:23:50,841 epoch 2 - iter 28/73 - loss 0.65741342 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 01:23:51,169 epoch 2 - iter 35/73 - loss 0.63790455 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:23:51,498 epoch 2 - iter 42/73 - loss 0.65219728 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:23:51,830 epoch 2 - iter 49/73 - loss 0.64788511 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:23:52,151 epoch 2 - iter 56/73 - loss 0.65270368 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 01:23:52,484 epoch 2 - iter 63/73 - loss 0.65037714 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 01:23:52,813 epoch 2 - iter 70/73 - loss 0.65137009 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 01:23:52,955 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:52,955 EPOCH 2 done: loss 0.6523 - lr 0.0200000\n",
-      "2021-09-08 01:23:53,078 DEV : loss 0.4587031304836273 - score 0.375\n",
-      "2021-09-08 01:23:53,078 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:04:41,375 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:41,819 epoch 2 - iter 7/73 - loss 0.76301656 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 21:04:42,238 epoch 2 - iter 14/73 - loss 0.60807618 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 21:04:42,684 epoch 2 - iter 21/73 - loss 0.69234089 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 21:04:43,023 epoch 2 - iter 28/73 - loss 0.69505464 - samples/sec: 20.68 - lr: 0.020000\n",
+      "2021-09-21 21:04:43,450 epoch 2 - iter 35/73 - loss 0.69776517 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 21:04:43,894 epoch 2 - iter 42/73 - loss 0.68869813 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 21:04:44,337 epoch 2 - iter 49/73 - loss 0.67775857 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 21:04:44,762 epoch 2 - iter 56/73 - loss 0.66984777 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:04:45,125 epoch 2 - iter 63/73 - loss 0.66537450 - samples/sec: 19.33 - lr: 0.020000\n",
+      "2021-09-21 21:04:45,429 epoch 2 - iter 70/73 - loss 0.66261320 - samples/sec: 23.08 - lr: 0.020000\n",
+      "2021-09-21 21:04:45,559 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:45,559 EPOCH 2 done: loss 0.6618 - lr 0.0200000\n",
+      "2021-09-21 21:04:50,324 DEV : loss 0.4740963280200958 - score 0.125\n",
+      "2021-09-21 21:04:50,325 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:23:57,354 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:57,687 epoch 3 - iter 7/73 - loss 0.65468221 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:23:57,999 epoch 3 - iter 14/73 - loss 0.65019003 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 01:23:58,317 epoch 3 - iter 21/73 - loss 0.64637403 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 01:23:58,642 epoch 3 - iter 28/73 - loss 0.64486923 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 01:23:58,954 epoch 3 - iter 35/73 - loss 0.64792993 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 01:23:59,272 epoch 3 - iter 42/73 - loss 0.64935841 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 01:23:59,584 epoch 3 - iter 49/73 - loss 0.64634693 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 01:23:59,896 epoch 3 - iter 56/73 - loss 0.64925174 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 01:24:00,203 epoch 3 - iter 63/73 - loss 0.64951503 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:24:00,508 epoch 3 - iter 70/73 - loss 0.64853890 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 01:24:00,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:00,641 EPOCH 3 done: loss 0.6484 - lr 0.0200000\n",
-      "2021-09-08 01:24:00,765 DEV : loss 0.49938708543777466 - score 0.0\n",
-      "2021-09-08 01:24:00,766 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:24:00,768 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:01,087 epoch 4 - iter 7/73 - loss 0.63827620 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:24:01,400 epoch 4 - iter 14/73 - loss 0.63078490 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 01:24:01,724 epoch 4 - iter 21/73 - loss 0.63205718 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 01:24:02,045 epoch 4 - iter 28/73 - loss 0.63360753 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:24:02,357 epoch 4 - iter 35/73 - loss 0.63269250 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 01:24:02,670 epoch 4 - iter 42/73 - loss 0.63463180 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 01:24:02,983 epoch 4 - iter 49/73 - loss 0.62733297 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 01:24:03,300 epoch 4 - iter 56/73 - loss 0.63125845 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 01:24:03,616 epoch 4 - iter 63/73 - loss 0.63072848 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 01:24:03,938 epoch 4 - iter 70/73 - loss 0.62086706 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 01:24:04,080 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:04,080 EPOCH 4 done: loss 0.6256 - lr 0.0200000\n",
-      "2021-09-08 01:24:04,203 DEV : loss 0.4740659296512604 - score 0.0\n",
-      "2021-09-08 01:24:04,203 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:24:04,205 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:04,542 epoch 5 - iter 7/73 - loss 0.58505256 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:24:04,871 epoch 5 - iter 14/73 - loss 0.57120623 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:24:05,200 epoch 5 - iter 21/73 - loss 0.59390782 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:24:05,527 epoch 5 - iter 28/73 - loss 0.53749890 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 01:24:05,849 epoch 5 - iter 35/73 - loss 0.55941459 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:24:06,178 epoch 5 - iter 42/73 - loss 0.56310400 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 01:24:06,508 epoch 5 - iter 49/73 - loss 0.58305261 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:24:06,835 epoch 5 - iter 56/73 - loss 0.58835345 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:24:07,164 epoch 5 - iter 63/73 - loss 0.59411707 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 01:24:07,496 epoch 5 - iter 70/73 - loss 0.59211524 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 01:24:07,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:07,639 EPOCH 5 done: loss 0.5919 - lr 0.0200000\n",
-      "2021-09-08 01:24:07,763 DEV : loss 0.5628487467765808 - score 0.0\n",
-      "2021-09-08 01:24:07,764 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:24:07,767 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:08,110 epoch 6 - iter 7/73 - loss 0.73365537 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:24:08,461 epoch 6 - iter 14/73 - loss 0.68827905 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 01:24:08,791 epoch 6 - iter 21/73 - loss 0.65554483 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,124 epoch 6 - iter 28/73 - loss 0.61672528 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,455 epoch 6 - iter 35/73 - loss 0.63036567 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,783 epoch 6 - iter 42/73 - loss 0.63074538 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:24:10,114 epoch 6 - iter 49/73 - loss 0.61593333 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 01:24:10,442 epoch 6 - iter 56/73 - loss 0.61175512 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:24:10,773 epoch 6 - iter 63/73 - loss 0.61180684 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 01:24:11,113 epoch 6 - iter 70/73 - loss 0.62389163 - samples/sec: 20.61 - lr: 0.020000\n",
-      "2021-09-08 01:24:11,250 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:04:55,560 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:55,966 epoch 3 - iter 7/73 - loss 0.61851670 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,373 epoch 3 - iter 14/73 - loss 0.64058327 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,745 epoch 3 - iter 21/73 - loss 0.64241593 - samples/sec: 18.85 - lr: 0.020000\n",
+      "2021-09-21 21:04:57,153 epoch 3 - iter 28/73 - loss 0.64947990 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 21:04:57,490 epoch 3 - iter 35/73 - loss 0.64563503 - samples/sec: 20.79 - lr: 0.020000\n",
+      "2021-09-21 21:04:57,808 epoch 3 - iter 42/73 - loss 0.64545022 - samples/sec: 22.07 - lr: 0.020000\n",
+      "2021-09-21 21:04:58,106 epoch 3 - iter 49/73 - loss 0.64779512 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 21:04:58,402 epoch 3 - iter 56/73 - loss 0.64997672 - samples/sec: 23.70 - lr: 0.020000\n",
+      "2021-09-21 21:04:58,705 epoch 3 - iter 63/73 - loss 0.64944333 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 21:04:59,003 epoch 3 - iter 70/73 - loss 0.64817214 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 21:04:59,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:59,135 EPOCH 3 done: loss 0.6484 - lr 0.0200000\n",
+      "2021-09-21 21:05:02,356 DEV : loss 0.4383474886417389 - score 0.0\n",
+      "2021-09-21 21:05:02,357 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:05:02,507 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:02,911 epoch 4 - iter 7/73 - loss 0.65159323 - samples/sec: 18.29 - lr: 0.020000\n",
+      "2021-09-21 21:05:03,268 epoch 4 - iter 14/73 - loss 0.64606168 - samples/sec: 19.65 - lr: 0.020000\n",
+      "2021-09-21 21:05:03,628 epoch 4 - iter 21/73 - loss 0.64829407 - samples/sec: 19.50 - lr: 0.020000\n",
+      "2021-09-21 21:05:03,932 epoch 4 - iter 28/73 - loss 0.64955141 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 21:05:04,235 epoch 4 - iter 35/73 - loss 0.64959596 - samples/sec: 23.09 - lr: 0.020000\n",
+      "2021-09-21 21:05:04,551 epoch 4 - iter 42/73 - loss 0.65026422 - samples/sec: 22.27 - lr: 0.020000\n",
+      "2021-09-21 21:05:04,852 epoch 4 - iter 49/73 - loss 0.64865435 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:05:05,154 epoch 4 - iter 56/73 - loss 0.64542224 - samples/sec: 23.20 - lr: 0.020000\n",
+      "2021-09-21 21:05:05,454 epoch 4 - iter 63/73 - loss 0.64335005 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:05:05,765 epoch 4 - iter 70/73 - loss 0.64237314 - samples/sec: 22.60 - lr: 0.020000\n",
+      "2021-09-21 21:05:05,903 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:05,904 EPOCH 4 done: loss 0.6460 - lr 0.0200000\n",
+      "2021-09-21 21:05:06,249 DEV : loss 0.5939858555793762 - score 0.125\n",
+      "2021-09-21 21:05:06,251 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:05:06,305 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:06,669 epoch 5 - iter 7/73 - loss 0.65260588 - samples/sec: 20.14 - lr: 0.020000\n",
+      "2021-09-21 21:05:06,984 epoch 5 - iter 14/73 - loss 0.66543233 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 21:05:07,301 epoch 5 - iter 21/73 - loss 0.66417369 - samples/sec: 22.08 - lr: 0.020000\n",
+      "2021-09-21 21:05:07,608 epoch 5 - iter 28/73 - loss 0.65904224 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 21:05:07,941 epoch 5 - iter 35/73 - loss 0.65916008 - samples/sec: 21.06 - lr: 0.020000\n",
+      "2021-09-21 21:05:08,417 epoch 5 - iter 42/73 - loss 0.65592523 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 21:05:08,884 epoch 5 - iter 49/73 - loss 0.65824924 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 21:05:09,385 epoch 5 - iter 56/73 - loss 0.65200291 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:05:09,871 epoch 5 - iter 63/73 - loss 0.65260440 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 21:05:10,254 epoch 5 - iter 70/73 - loss 0.65594912 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 21:05:10,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:10,433 EPOCH 5 done: loss 0.6550 - lr 0.0200000\n",
+      "2021-09-21 21:05:10,676 DEV : loss 0.4804273247718811 - score 0.0\n",
+      "2021-09-21 21:05:10,677 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:05:10,679 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:11,123 epoch 6 - iter 7/73 - loss 0.61703262 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 21:05:11,522 epoch 6 - iter 14/73 - loss 0.63321508 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 21:05:11,903 epoch 6 - iter 21/73 - loss 0.63815682 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 21:05:12,303 epoch 6 - iter 28/73 - loss 0.64421795 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 21:05:12,773 epoch 6 - iter 35/73 - loss 0.64710470 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 21:05:13,178 epoch 6 - iter 42/73 - loss 0.65463734 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 21:05:13,545 epoch 6 - iter 49/73 - loss 0.65363478 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:05:13,903 epoch 6 - iter 56/73 - loss 0.64732395 - samples/sec: 19.58 - lr: 0.020000\n",
+      "2021-09-21 21:05:14,303 epoch 6 - iter 63/73 - loss 0.64343257 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 21:05:14,686 epoch 6 - iter 70/73 - loss 0.64574453 - samples/sec: 18.32 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:11,251 EPOCH 6 done: loss 0.6272 - lr 0.0200000\n",
-      "2021-09-08 01:24:11,484 DEV : loss 0.5678269863128662 - score 0.125\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:24:11,484 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:24:11,580 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:11,910 epoch 7 - iter 7/73 - loss 0.61839372 - samples/sec: 22.19 - lr: 0.010000\n",
-      "2021-09-08 01:24:12,225 epoch 7 - iter 14/73 - loss 0.63858519 - samples/sec: 22.27 - lr: 0.010000\n",
-      "2021-09-08 01:24:12,538 epoch 7 - iter 21/73 - loss 0.64097921 - samples/sec: 22.41 - lr: 0.010000\n",
-      "2021-09-08 01:24:12,869 epoch 7 - iter 28/73 - loss 0.62407290 - samples/sec: 21.22 - lr: 0.010000\n",
-      "2021-09-08 01:24:13,199 epoch 7 - iter 35/73 - loss 0.62115527 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 01:24:13,533 epoch 7 - iter 42/73 - loss 0.59856930 - samples/sec: 21.00 - lr: 0.010000\n",
-      "2021-09-08 01:24:13,875 epoch 7 - iter 49/73 - loss 0.56750935 - samples/sec: 20.50 - lr: 0.010000\n",
-      "2021-09-08 01:24:14,209 epoch 7 - iter 56/73 - loss 0.56035704 - samples/sec: 20.98 - lr: 0.010000\n",
-      "2021-09-08 01:24:14,538 epoch 7 - iter 63/73 - loss 0.54379901 - samples/sec: 21.36 - lr: 0.010000\n",
-      "2021-09-08 01:24:14,866 epoch 7 - iter 70/73 - loss 0.53265061 - samples/sec: 21.32 - lr: 0.010000\n",
-      "2021-09-08 01:24:15,008 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:15,009 EPOCH 7 done: loss 0.5316 - lr 0.0100000\n",
-      "2021-09-08 01:24:15,132 DEV : loss 0.7035550475120544 - score 0.25\n",
-      "2021-09-08 01:24:15,133 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:24:15,134 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:15,469 epoch 8 - iter 7/73 - loss 0.47965028 - samples/sec: 21.81 - lr: 0.010000\n",
-      "2021-09-08 01:24:15,797 epoch 8 - iter 14/73 - loss 0.51234131 - samples/sec: 21.39 - lr: 0.010000\n",
-      "2021-09-08 01:24:16,125 epoch 8 - iter 21/73 - loss 0.50952226 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 01:24:16,454 epoch 8 - iter 28/73 - loss 0.53068482 - samples/sec: 21.28 - lr: 0.010000\n",
-      "2021-09-08 01:24:16,794 epoch 8 - iter 35/73 - loss 0.53944033 - samples/sec: 20.66 - lr: 0.010000\n",
-      "2021-09-08 01:24:17,129 epoch 8 - iter 42/73 - loss 0.54090487 - samples/sec: 20.92 - lr: 0.010000\n",
-      "2021-09-08 01:24:17,462 epoch 8 - iter 49/73 - loss 0.50415314 - samples/sec: 21.08 - lr: 0.010000\n",
-      "2021-09-08 01:24:17,795 epoch 8 - iter 56/73 - loss 0.50332371 - samples/sec: 21.02 - lr: 0.010000\n",
-      "2021-09-08 01:24:18,123 epoch 8 - iter 63/73 - loss 0.49138118 - samples/sec: 21.43 - lr: 0.010000\n",
-      "2021-09-08 01:24:18,457 epoch 8 - iter 70/73 - loss 0.49955940 - samples/sec: 21.01 - lr: 0.010000\n",
-      "2021-09-08 01:24:18,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:18,610 EPOCH 8 done: loss 0.4941 - lr 0.0100000\n",
-      "2021-09-08 01:24:18,842 DEV : loss 0.4999798834323883 - score 0.125\n",
-      "2021-09-08 01:24:18,843 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:24:18,918 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:19,282 epoch 9 - iter 7/73 - loss 0.40841463 - samples/sec: 20.07 - lr: 0.010000\n",
-      "2021-09-08 01:24:19,614 epoch 9 - iter 14/73 - loss 0.44901214 - samples/sec: 21.16 - lr: 0.010000\n",
-      "2021-09-08 01:24:19,940 epoch 9 - iter 21/73 - loss 0.40408563 - samples/sec: 21.53 - lr: 0.010000\n",
-      "2021-09-08 01:24:20,265 epoch 9 - iter 28/73 - loss 0.42766175 - samples/sec: 21.57 - lr: 0.010000\n",
-      "2021-09-08 01:24:20,594 epoch 9 - iter 35/73 - loss 0.40863298 - samples/sec: 21.34 - lr: 0.010000\n",
-      "2021-09-08 01:24:20,923 epoch 9 - iter 42/73 - loss 0.42557045 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 01:24:21,248 epoch 9 - iter 49/73 - loss 0.41828214 - samples/sec: 21.56 - lr: 0.010000\n",
-      "2021-09-08 01:24:21,703 epoch 9 - iter 56/73 - loss 0.42462344 - samples/sec: 15.40 - lr: 0.010000\n",
-      "2021-09-08 01:24:22,034 epoch 9 - iter 63/73 - loss 0.41564770 - samples/sec: 21.24 - lr: 0.010000\n",
-      "2021-09-08 01:24:22,362 epoch 9 - iter 70/73 - loss 0.44574302 - samples/sec: 21.36 - lr: 0.010000\n",
-      "2021-09-08 01:24:22,503 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:22,504 EPOCH 9 done: loss 0.4428 - lr 0.0100000\n",
-      "2021-09-08 01:24:22,629 DEV : loss 0.6280174255371094 - score 0.25\n",
-      "2021-09-08 01:24:22,630 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:24:22,635 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:22,976 epoch 10 - iter 7/73 - loss 0.38860023 - samples/sec: 21.37 - lr: 0.010000\n",
-      "2021-09-08 01:24:23,306 epoch 10 - iter 14/73 - loss 0.35994627 - samples/sec: 21.25 - lr: 0.010000\n",
-      "2021-09-08 01:24:23,631 epoch 10 - iter 21/73 - loss 0.42528907 - samples/sec: 21.56 - lr: 0.010000\n",
-      "2021-09-08 01:24:23,949 epoch 10 - iter 28/73 - loss 0.39685962 - samples/sec: 22.06 - lr: 0.010000\n",
-      "2021-09-08 01:24:24,288 epoch 10 - iter 35/73 - loss 0.42606765 - samples/sec: 20.69 - lr: 0.010000\n",
-      "2021-09-08 01:24:24,610 epoch 10 - iter 42/73 - loss 0.39655812 - samples/sec: 21.83 - lr: 0.010000\n",
-      "2021-09-08 01:24:24,932 epoch 10 - iter 49/73 - loss 0.39726287 - samples/sec: 21.79 - lr: 0.010000\n",
-      "2021-09-08 01:24:25,259 epoch 10 - iter 56/73 - loss 0.41613849 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 01:24:25,585 epoch 10 - iter 63/73 - loss 0.42739896 - samples/sec: 21.53 - lr: 0.010000\n",
-      "2021-09-08 01:24:25,907 epoch 10 - iter 70/73 - loss 0.41323702 - samples/sec: 21.75 - lr: 0.010000\n",
-      "2021-09-08 01:24:26,050 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:26,051 EPOCH 10 done: loss 0.4317 - lr 0.0100000\n",
-      "2021-09-08 01:24:26,172 DEV : loss 0.6039746999740601 - score 0.25\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:24:26,173 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:24:30,250 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:30,250 Testing using best model ...\n",
-      "2021-09-08 01:24:30,252 loading file None/best-model.pt\n",
+      "2021-09-21 21:05:14,819 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:14,819 EPOCH 6 done: loss 0.6458 - lr 0.0200000\n",
+      "2021-09-21 21:05:17,381 DEV : loss 0.46536242961883545 - score 0.25\n",
+      "2021-09-21 21:05:17,382 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:05:22,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:22,517 epoch 7 - iter 7/73 - loss 0.64220818 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 21:05:22,956 epoch 7 - iter 14/73 - loss 0.63998557 - samples/sec: 15.96 - lr: 0.020000\n",
+      "2021-09-21 21:05:23,394 epoch 7 - iter 21/73 - loss 0.63599430 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:05:23,842 epoch 7 - iter 28/73 - loss 0.62938154 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:05:24,268 epoch 7 - iter 35/73 - loss 0.63910054 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 21:05:24,671 epoch 7 - iter 42/73 - loss 0.63858624 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:05:25,121 epoch 7 - iter 49/73 - loss 0.64232019 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 21:05:25,579 epoch 7 - iter 56/73 - loss 0.64213889 - samples/sec: 15.30 - lr: 0.020000\n",
+      "2021-09-21 21:05:26,005 epoch 7 - iter 63/73 - loss 0.64687313 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 21:05:26,461 epoch 7 - iter 70/73 - loss 0.64812211 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 21:05:26,626 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:26,627 EPOCH 7 done: loss 0.6478 - lr 0.0200000\n",
+      "2021-09-21 21:05:28,615 DEV : loss 0.5057493448257446 - score 0.125\n",
+      "2021-09-21 21:05:28,616 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:05:28,618 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:29,220 epoch 8 - iter 7/73 - loss 0.66394738 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:05:29,702 epoch 8 - iter 14/73 - loss 0.64084664 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:05:30,242 epoch 8 - iter 21/73 - loss 0.64031785 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:05:30,778 epoch 8 - iter 28/73 - loss 0.63735888 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:05:31,302 epoch 8 - iter 35/73 - loss 0.63828011 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:05:31,859 epoch 8 - iter 42/73 - loss 0.63209387 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 21:05:32,382 epoch 8 - iter 49/73 - loss 0.62973448 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:05:32,923 epoch 8 - iter 56/73 - loss 0.63293393 - samples/sec: 12.96 - lr: 0.020000\n",
+      "2021-09-21 21:05:33,456 epoch 8 - iter 63/73 - loss 0.63516148 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 21:05:33,955 epoch 8 - iter 70/73 - loss 0.63390831 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:05:34,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:34,147 EPOCH 8 done: loss 0.6320 - lr 0.0200000\n",
+      "2021-09-21 21:05:34,630 DEV : loss 0.5351740717887878 - score 0.125\n",
+      "2021-09-21 21:05:34,631 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:05:34,643 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:35,241 epoch 9 - iter 7/73 - loss 0.69551904 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:05:35,736 epoch 9 - iter 14/73 - loss 0.66934855 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:05:36,320 epoch 9 - iter 21/73 - loss 0.65497738 - samples/sec: 12.00 - lr: 0.020000\n",
+      "2021-09-21 21:05:36,832 epoch 9 - iter 28/73 - loss 0.63826057 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 21:05:37,384 epoch 9 - iter 35/73 - loss 0.64203716 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 21:05:37,886 epoch 9 - iter 42/73 - loss 0.65535799 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:05:38,439 epoch 9 - iter 49/73 - loss 0.65006700 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 21:05:38,998 epoch 9 - iter 56/73 - loss 0.65373526 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 21:05:39,483 epoch 9 - iter 63/73 - loss 0.65211172 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 21:05:40,016 epoch 9 - iter 70/73 - loss 0.65269141 - samples/sec: 13.16 - lr: 0.020000\n",
+      "2021-09-21 21:05:40,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:40,261 EPOCH 9 done: loss 0.6520 - lr 0.0200000\n",
+      "2021-09-21 21:05:40,760 DEV : loss 0.4822269380092621 - score 0.0\n",
+      "2021-09-21 21:05:40,761 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:05:40,763 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:41,357 epoch 10 - iter 7/73 - loss 0.66961794 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:05:41,875 epoch 10 - iter 14/73 - loss 0.64134533 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 21:05:42,376 epoch 10 - iter 21/73 - loss 0.65218818 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:05:42,889 epoch 10 - iter 28/73 - loss 0.64741921 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 21:05:43,388 epoch 10 - iter 35/73 - loss 0.65598952 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 21:05:43,883 epoch 10 - iter 42/73 - loss 0.65418835 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 21:05:44,392 epoch 10 - iter 49/73 - loss 0.65558208 - samples/sec: 13.76 - lr: 0.020000\n",
+      "2021-09-21 21:05:44,805 epoch 10 - iter 56/73 - loss 0.65159580 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 21:05:45,324 epoch 10 - iter 63/73 - loss 0.65016091 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 21:05:45,797 epoch 10 - iter 70/73 - loss 0.65069880 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 21:05:46,017 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:46,017 EPOCH 10 done: loss 0.6501 - lr 0.0200000\n",
+      "2021-09-21 21:05:46,369 DEV : loss 0.45257705450057983 - score 0.125\n",
+      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:05:46,370 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:05:52,170 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:52,171 Testing using best model ...\n",
+      "2021-09-21 21:05:52,200 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:24:35,132 \t0.2222\n",
-      "2021-09-08 01:24:35,133 \n",
+      "2021-09-21 21:05:57,255 \t0.0\n",
+      "2021-09-21 21:05:57,256 \n",
       "Results:\n",
-      "- F-score (micro) 0.2222\n",
-      "- F-score (macro) 0.1058\n",
-      "- Accuracy 0.2222\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "         joy     0.0000    0.0000    0.0000         2\n",
-      "    surprise     0.0000    0.0000    0.0000         0\n",
+      "         joy     0.0000    0.0000    0.0000         1\n",
+      "    surprise     0.0000    0.0000    0.0000         1\n",
       "        love     0.0000    0.0000    0.0000         1\n",
-      "     disgust     0.5000    1.0000    0.6667         1\n",
-      "        fear     0.0000    0.0000    0.0000         2\n",
+      "     disgust     0.0000    0.0000    0.0000         1\n",
+      "        fear     0.0000    0.0000    0.0000         1\n",
       "       anger     0.0000    0.0000    0.0000         2\n",
       "       guilt     0.0000    0.0000    0.0000         0\n",
       "       shame     0.0000    0.0000    0.0000         0\n",
-      "     sadness     0.1667    1.0000    0.2857         1\n",
+      "     sadness     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "   micro avg     0.2222    0.2222    0.2222         9\n",
-      "   macro avg     0.0741    0.2222    0.1058         9\n",
-      "weighted avg     0.0741    0.2222    0.1058         9\n",
-      " samples avg     0.2222    0.2222    0.2222         9\n",
+      "   micro avg     0.0000    0.0000    0.0000         9\n",
+      "   macro avg     0.0000    0.0000    0.0000         9\n",
+      "weighted avg     0.0000    0.0000    0.0000         9\n",
+      " samples avg     0.0000    0.0000    0.0000         9\n",
       "\n",
-      "2021-09-08 01:24:35,133 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:11,994 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:05:57,256 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:43,773 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:25:15,989 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:06:48,505 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 44550.90it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 32092.28it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:15,992 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
-      "2021-09-08 01:25:16,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,002 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:06:48,510 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
+      "2021-09-21 21:06:48,528 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,530 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1509,25 +1508,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:16,003 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,003 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:25:16,004 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,004 Parameters:\n",
-      "2021-09-08 01:25:16,004  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:25:16,004  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:25:16,005  - patience: \"3\"\n",
-      "2021-09-08 01:25:16,005  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:25:16,005  - max_epochs: \"10\"\n",
-      "2021-09-08 01:25:16,005  - shuffle: \"True\"\n",
-      "2021-09-08 01:25:16,006  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:25:16,006  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:25:16,006 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,007 Model training base path: \"None\"\n",
-      "2021-09-08 01:25:16,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,007 Device: cuda:1\n",
-      "2021-09-08 01:25:16,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,008 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:25:16,014 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:06:48,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,531 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:06:48,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,532 Parameters:\n",
+      "2021-09-21 21:06:48,532  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:06:48,532  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:06:48,532  - patience: \"3\"\n",
+      "2021-09-21 21:06:48,533  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:06:48,533  - max_epochs: \"10\"\n",
+      "2021-09-21 21:06:48,533  - shuffle: \"True\"\n",
+      "2021-09-21 21:06:48,534  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:06:48,534  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:06:48,534 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,534 Model training base path: \"None\"\n",
+      "2021-09-21 21:06:48,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,535 Device: cuda:0\n",
+      "2021-09-21 21:06:48,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,536 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:06:48,557 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -1541,216 +1540,214 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:16,341 epoch 1 - iter 7/73 - loss 0.40207706 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 01:25:16,670 epoch 1 - iter 14/73 - loss 0.52993988 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 01:25:16,995 epoch 1 - iter 21/73 - loss 0.52060898 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 01:25:17,319 epoch 1 - iter 28/73 - loss 0.49394363 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:25:17,649 epoch 1 - iter 35/73 - loss 0.49024153 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:25:17,970 epoch 1 - iter 42/73 - loss 0.48790460 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 01:25:18,285 epoch 1 - iter 49/73 - loss 0.50612866 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 01:25:18,619 epoch 1 - iter 56/73 - loss 0.53839817 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 01:25:18,945 epoch 1 - iter 63/73 - loss 0.56252162 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 01:25:19,271 epoch 1 - iter 70/73 - loss 0.56901094 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 01:25:19,414 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:19,414 EPOCH 1 done: loss 0.5711 - lr 0.0200000\n",
-      "2021-09-08 01:25:19,538 DEV : loss 0.47326064109802246 - score 0.0\n",
-      "2021-09-08 01:25:19,539 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:06:49,342 epoch 1 - iter 7/73 - loss 0.46779873 - samples/sec: 10.65 - lr: 0.020000\n",
+      "2021-09-21 21:06:49,950 epoch 1 - iter 14/73 - loss 0.65204082 - samples/sec: 11.53 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,481 epoch 1 - iter 21/73 - loss 0.61974897 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 21:06:51,019 epoch 1 - iter 28/73 - loss 0.62516414 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 21:06:51,555 epoch 1 - iter 35/73 - loss 0.62900433 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:06:52,029 epoch 1 - iter 42/73 - loss 0.58749660 - samples/sec: 14.78 - lr: 0.020000\n",
+      "2021-09-21 21:06:52,420 epoch 1 - iter 49/73 - loss 0.61414984 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 21:06:52,839 epoch 1 - iter 56/73 - loss 0.62462073 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 21:06:53,263 epoch 1 - iter 63/73 - loss 0.62542186 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 21:06:53,677 epoch 1 - iter 70/73 - loss 0.62890606 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:06:53,854 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:53,855 EPOCH 1 done: loss 0.6269 - lr 0.0200000\n",
+      "2021-09-21 21:07:09,108 DEV : loss 0.47776806354522705 - score 0.0\n",
+      "2021-09-21 21:07:09,109 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:25:23,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:23,983 epoch 2 - iter 7/73 - loss 0.64762083 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 01:25:24,314 epoch 2 - iter 14/73 - loss 0.68318910 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 01:25:24,643 epoch 2 - iter 21/73 - loss 0.69282834 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 01:25:24,971 epoch 2 - iter 28/73 - loss 0.68851689 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 01:25:25,299 epoch 2 - iter 35/73 - loss 0.69993622 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:25:25,625 epoch 2 - iter 42/73 - loss 0.70821187 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 01:25:25,952 epoch 2 - iter 49/73 - loss 0.69116401 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 01:25:26,278 epoch 2 - iter 56/73 - loss 0.67524281 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 01:25:26,614 epoch 2 - iter 63/73 - loss 0.66321064 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 01:25:26,952 epoch 2 - iter 70/73 - loss 0.65839307 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 01:25:27,097 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:27,098 EPOCH 2 done: loss 0.6491 - lr 0.0200000\n",
-      "2021-09-08 01:25:27,237 DEV : loss 0.3634780943393707 - score 0.375\n",
-      "2021-09-08 01:25:27,238 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:07:15,891 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:16,214 epoch 2 - iter 7/73 - loss 0.67040484 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 21:07:16,516 epoch 2 - iter 14/73 - loss 0.65260005 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 21:07:16,815 epoch 2 - iter 21/73 - loss 0.65154808 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 21:07:17,114 epoch 2 - iter 28/73 - loss 0.64783848 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:07:17,416 epoch 2 - iter 35/73 - loss 0.65137045 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 21:07:17,716 epoch 2 - iter 42/73 - loss 0.64487970 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 21:07:18,016 epoch 2 - iter 49/73 - loss 0.64990706 - samples/sec: 23.35 - lr: 0.020000\n",
+      "2021-09-21 21:07:18,317 epoch 2 - iter 56/73 - loss 0.64860068 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 21:07:18,613 epoch 2 - iter 63/73 - loss 0.64515235 - samples/sec: 23.64 - lr: 0.020000\n",
+      "2021-09-21 21:07:18,912 epoch 2 - iter 70/73 - loss 0.64937029 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 21:07:19,041 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:19,041 EPOCH 2 done: loss 0.6509 - lr 0.0200000\n",
+      "2021-09-21 21:07:22,756 DEV : loss 0.45463424921035767 - score 0.0\n",
+      "2021-09-21 21:07:22,759 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:25:31,289 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:31,634 epoch 3 - iter 7/73 - loss 0.52304417 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:25:31,960 epoch 3 - iter 14/73 - loss 0.47987687 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 01:25:32,290 epoch 3 - iter 21/73 - loss 0.52662926 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:25:32,620 epoch 3 - iter 28/73 - loss 0.56298321 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 01:25:32,947 epoch 3 - iter 35/73 - loss 0.57197134 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:25:33,274 epoch 3 - iter 42/73 - loss 0.56237737 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 01:25:33,600 epoch 3 - iter 49/73 - loss 0.56868186 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 01:25:33,921 epoch 3 - iter 56/73 - loss 0.54022306 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 01:25:34,251 epoch 3 - iter 63/73 - loss 0.54182757 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:25:34,578 epoch 3 - iter 70/73 - loss 0.54640037 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 01:25:34,719 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:34,719 EPOCH 3 done: loss 0.5426 - lr 0.0200000\n",
-      "2021-09-08 01:25:34,845 DEV : loss 0.547095775604248 - score 0.125\n",
-      "2021-09-08 01:25:34,846 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:25:34,848 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:35,190 epoch 4 - iter 7/73 - loss 0.54077180 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 01:25:35,517 epoch 4 - iter 14/73 - loss 0.50535156 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 01:25:35,841 epoch 4 - iter 21/73 - loss 0.48314396 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:25:36,171 epoch 4 - iter 28/73 - loss 0.45219100 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:25:36,503 epoch 4 - iter 35/73 - loss 0.52420024 - samples/sec: 21.11 - lr: 0.020000\n",
-      "2021-09-08 01:25:36,831 epoch 4 - iter 42/73 - loss 0.54757146 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 01:25:37,162 epoch 4 - iter 49/73 - loss 0.56870439 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 01:25:37,483 epoch 4 - iter 56/73 - loss 0.54046069 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 01:25:37,811 epoch 4 - iter 63/73 - loss 0.54500981 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:25:38,131 epoch 4 - iter 70/73 - loss 0.52250026 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 01:25:38,275 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:38,275 EPOCH 4 done: loss 0.5210 - lr 0.0200000\n",
-      "2021-09-08 01:25:38,400 DEV : loss 0.461135596036911 - score 0.25\n",
-      "2021-09-08 01:25:38,401 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:25:38,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:38,743 epoch 5 - iter 7/73 - loss 0.43604296 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:25:39,065 epoch 5 - iter 14/73 - loss 0.48577300 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:25:39,390 epoch 5 - iter 21/73 - loss 0.57844549 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 01:25:39,719 epoch 5 - iter 28/73 - loss 0.60855083 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:25:40,048 epoch 5 - iter 35/73 - loss 0.56992555 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 01:25:40,371 epoch 5 - iter 42/73 - loss 0.54989400 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 01:25:40,693 epoch 5 - iter 49/73 - loss 0.55649009 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 01:25:41,025 epoch 5 - iter 56/73 - loss 0.54624612 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 01:25:41,350 epoch 5 - iter 63/73 - loss 0.54723594 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 01:25:41,675 epoch 5 - iter 70/73 - loss 0.52638988 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:25:41,820 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:41,820 EPOCH 5 done: loss 0.5344 - lr 0.0200000\n",
-      "2021-09-08 01:25:41,944 DEV : loss 0.5270930528640747 - score 0.5\n",
-      "2021-09-08 01:25:41,945 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:07:27,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:27,894 epoch 3 - iter 7/73 - loss 0.66586444 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 21:07:28,484 epoch 3 - iter 14/73 - loss 0.64508768 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 21:07:29,073 epoch 3 - iter 21/73 - loss 0.65010733 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 21:07:29,688 epoch 3 - iter 28/73 - loss 0.64205908 - samples/sec: 11.40 - lr: 0.020000\n",
+      "2021-09-21 21:07:30,247 epoch 3 - iter 35/73 - loss 0.65092753 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 21:07:30,778 epoch 3 - iter 42/73 - loss 0.65042691 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 21:07:31,344 epoch 3 - iter 49/73 - loss 0.64731581 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 21:07:31,933 epoch 3 - iter 56/73 - loss 0.64651531 - samples/sec: 11.91 - lr: 0.020000\n",
+      "2021-09-21 21:07:32,398 epoch 3 - iter 63/73 - loss 0.64093993 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 21:07:32,817 epoch 3 - iter 70/73 - loss 0.64592940 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 21:07:32,966 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:32,966 EPOCH 3 done: loss 0.6459 - lr 0.0200000\n",
+      "2021-09-21 21:07:33,214 DEV : loss 0.48441219329833984 - score 0.0\n",
+      "2021-09-21 21:07:33,215 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:07:33,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:33,646 epoch 4 - iter 7/73 - loss 0.62498255 - samples/sec: 20.51 - lr: 0.020000\n",
+      "2021-09-21 21:07:33,968 epoch 4 - iter 14/73 - loss 0.64681674 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 21:07:34,286 epoch 4 - iter 21/73 - loss 0.63169600 - samples/sec: 22.07 - lr: 0.020000\n",
+      "2021-09-21 21:07:34,607 epoch 4 - iter 28/73 - loss 0.63732720 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 21:07:34,924 epoch 4 - iter 35/73 - loss 0.64255509 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 21:07:35,247 epoch 4 - iter 42/73 - loss 0.64559263 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 21:07:35,562 epoch 4 - iter 49/73 - loss 0.64701305 - samples/sec: 22.31 - lr: 0.020000\n",
+      "2021-09-21 21:07:35,876 epoch 4 - iter 56/73 - loss 0.64517688 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 21:07:36,202 epoch 4 - iter 63/73 - loss 0.64968685 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 21:07:36,521 epoch 4 - iter 70/73 - loss 0.64722491 - samples/sec: 22.06 - lr: 0.020000\n",
+      "2021-09-21 21:07:36,658 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:36,658 EPOCH 4 done: loss 0.6473 - lr 0.0200000\n",
+      "2021-09-21 21:07:36,923 DEV : loss 0.4856971800327301 - score 0.25\n",
+      "2021-09-21 21:07:36,924 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:25:49,744 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:50,087 epoch 6 - iter 7/73 - loss 0.48397700 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 01:25:50,414 epoch 6 - iter 14/73 - loss 0.47723638 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:25:50,742 epoch 6 - iter 21/73 - loss 0.47672299 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:25:51,061 epoch 6 - iter 28/73 - loss 0.44426194 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 01:25:51,384 epoch 6 - iter 35/73 - loss 0.43335651 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 01:25:51,710 epoch 6 - iter 42/73 - loss 0.43441651 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 01:25:52,032 epoch 6 - iter 49/73 - loss 0.43451411 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 01:25:52,352 epoch 6 - iter 56/73 - loss 0.42183732 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 01:25:52,677 epoch 6 - iter 63/73 - loss 0.43451675 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:25:52,999 epoch 6 - iter 70/73 - loss 0.43716672 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,143 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:07:45,773 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:46,094 epoch 5 - iter 7/73 - loss 0.64292043 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 21:07:46,404 epoch 5 - iter 14/73 - loss 0.65858160 - samples/sec: 22.65 - lr: 0.020000\n",
+      "2021-09-21 21:07:46,722 epoch 5 - iter 21/73 - loss 0.64305361 - samples/sec: 22.07 - lr: 0.020000\n",
+      "2021-09-21 21:07:47,024 epoch 5 - iter 28/73 - loss 0.64218577 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 21:07:47,327 epoch 5 - iter 35/73 - loss 0.64199835 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 21:07:47,795 epoch 5 - iter 42/73 - loss 0.64608078 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,213 epoch 5 - iter 49/73 - loss 0.64094363 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,756 epoch 5 - iter 56/73 - loss 0.64646070 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 21:07:49,257 epoch 5 - iter 63/73 - loss 0.64775521 - samples/sec: 13.99 - lr: 0.020000\n",
+      "2021-09-21 21:07:49,713 epoch 5 - iter 70/73 - loss 0.64838055 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 21:07:49,887 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:49,887 EPOCH 5 done: loss 0.6479 - lr 0.0200000\n",
+      "2021-09-21 21:07:50,277 DEV : loss 0.5697077512741089 - score 0.25\n",
+      "2021-09-21 21:07:50,281 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:07:50,352 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:50,778 epoch 6 - iter 7/73 - loss 0.64337329 - samples/sec: 18.27 - lr: 0.020000\n",
+      "2021-09-21 21:07:51,191 epoch 6 - iter 14/73 - loss 0.63001423 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:07:51,590 epoch 6 - iter 21/73 - loss 0.62819588 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:07:52,036 epoch 6 - iter 28/73 - loss 0.63537390 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:07:52,458 epoch 6 - iter 35/73 - loss 0.63774501 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 21:07:52,847 epoch 6 - iter 42/73 - loss 0.63665061 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 21:07:53,177 epoch 6 - iter 49/73 - loss 0.63926868 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 21:07:53,578 epoch 6 - iter 56/73 - loss 0.63739233 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,061 epoch 6 - iter 63/73 - loss 0.63945061 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,599 epoch 6 - iter 70/73 - loss 0.64035537 - samples/sec: 13.03 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,821 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:53,144 EPOCH 6 done: loss 0.4377 - lr 0.0200000\n",
-      "2021-09-08 01:25:53,271 DEV : loss 0.6451144218444824 - score 0.25\n",
-      "2021-09-08 01:25:53,272 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:25:53,274 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:53,609 epoch 7 - iter 7/73 - loss 0.36225067 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,932 epoch 7 - iter 14/73 - loss 0.33121878 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 01:25:54,252 epoch 7 - iter 21/73 - loss 0.28581395 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:25:54,585 epoch 7 - iter 28/73 - loss 0.33543005 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:25:54,916 epoch 7 - iter 35/73 - loss 0.34963095 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 01:25:55,237 epoch 7 - iter 42/73 - loss 0.33099715 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 01:25:55,566 epoch 7 - iter 49/73 - loss 0.36552907 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 01:25:55,891 epoch 7 - iter 56/73 - loss 0.36308135 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:25:56,217 epoch 7 - iter 63/73 - loss 0.36510914 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 01:25:56,540 epoch 7 - iter 70/73 - loss 0.36851197 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 01:25:56,678 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:56,679 EPOCH 7 done: loss 0.3753 - lr 0.0200000\n",
-      "2021-09-08 01:25:56,903 DEV : loss 0.6190465688705444 - score 0.5\n",
-      "2021-09-08 01:25:56,904 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:25:56,981 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:57,318 epoch 8 - iter 7/73 - loss 0.34454136 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 01:25:57,648 epoch 8 - iter 14/73 - loss 0.40552326 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 01:25:57,975 epoch 8 - iter 21/73 - loss 0.40003298 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 01:25:58,295 epoch 8 - iter 28/73 - loss 0.36503091 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 01:25:58,618 epoch 8 - iter 35/73 - loss 0.34964780 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 01:25:58,934 epoch 8 - iter 42/73 - loss 0.36134643 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,253 epoch 8 - iter 49/73 - loss 0.35665451 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,575 epoch 8 - iter 56/73 - loss 0.34647274 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,896 epoch 8 - iter 63/73 - loss 0.36300442 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 01:26:00,214 epoch 8 - iter 70/73 - loss 0.34067985 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 01:26:00,354 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:00,355 EPOCH 8 done: loss 0.3348 - lr 0.0200000\n",
-      "2021-09-08 01:26:00,478 DEV : loss 0.45142728090286255 - score 0.5\n",
-      "2021-09-08 01:26:00,479 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:26:05,676 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:06,025 epoch 9 - iter 7/73 - loss 0.75166850 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 01:26:06,352 epoch 9 - iter 14/73 - loss 0.48675860 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:26:06,675 epoch 9 - iter 21/73 - loss 0.51544756 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 01:26:06,996 epoch 9 - iter 28/73 - loss 0.42954128 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 01:26:07,319 epoch 9 - iter 35/73 - loss 0.41050795 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:26:07,639 epoch 9 - iter 42/73 - loss 0.39019947 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 01:26:07,960 epoch 9 - iter 49/73 - loss 0.37359943 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,279 epoch 9 - iter 56/73 - loss 0.34611090 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,600 epoch 9 - iter 63/73 - loss 0.34817159 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,923 epoch 9 - iter 70/73 - loss 0.34147295 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 01:26:09,064 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:09,064 EPOCH 9 done: loss 0.3435 - lr 0.0200000\n",
-      "2021-09-08 01:26:09,263 DEV : loss 0.6016842126846313 - score 0.625\n",
-      "2021-09-08 01:26:09,264 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:26:13,145 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:13,482 epoch 10 - iter 7/73 - loss 0.34476654 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:26:13,803 epoch 10 - iter 14/73 - loss 0.29977627 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,122 epoch 10 - iter 21/73 - loss 0.30533496 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,439 epoch 10 - iter 28/73 - loss 0.28993492 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,748 epoch 10 - iter 35/73 - loss 0.25270725 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:26:15,078 epoch 10 - iter 42/73 - loss 0.26317101 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:26:15,398 epoch 10 - iter 49/73 - loss 0.27898359 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 01:26:15,719 epoch 10 - iter 56/73 - loss 0.26809377 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 01:26:16,037 epoch 10 - iter 63/73 - loss 0.26635346 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 01:26:16,352 epoch 10 - iter 70/73 - loss 0.25036011 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 01:26:16,486 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:16,486 EPOCH 10 done: loss 0.2420 - lr 0.0200000\n",
-      "2021-09-08 01:26:16,612 DEV : loss 0.44252529740333557 - score 0.625\n",
-      "2021-09-08 01:26:16,613 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:26:26,855 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:26,856 Testing using best model ...\n",
-      "2021-09-08 01:26:26,858 loading file None/best-model.pt\n",
+      "2021-09-21 21:07:54,821 EPOCH 6 done: loss 0.6408 - lr 0.0200000\n",
+      "2021-09-21 21:07:55,339 DEV : loss 0.4790005087852478 - score 0.125\n",
+      "2021-09-21 21:07:55,340 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:07:55,342 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:55,867 epoch 7 - iter 7/73 - loss 0.66416232 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 21:07:56,287 epoch 7 - iter 14/73 - loss 0.64641752 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 21:07:56,701 epoch 7 - iter 21/73 - loss 0.64055791 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:07:57,105 epoch 7 - iter 28/73 - loss 0.64438964 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 21:07:57,542 epoch 7 - iter 35/73 - loss 0.64367687 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 21:07:57,953 epoch 7 - iter 42/73 - loss 0.65271863 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 21:07:58,372 epoch 7 - iter 49/73 - loss 0.65067244 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:07:58,776 epoch 7 - iter 56/73 - loss 0.65194683 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 21:07:59,200 epoch 7 - iter 63/73 - loss 0.65123360 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 21:07:59,608 epoch 7 - iter 70/73 - loss 0.65190203 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 21:07:59,784 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:59,785 EPOCH 7 done: loss 0.6521 - lr 0.0200000\n",
+      "2021-09-21 21:08:02,078 DEV : loss 0.5715916752815247 - score 0.125\n",
+      "2021-09-21 21:08:02,079 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:08:03,637 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,071 epoch 8 - iter 7/73 - loss 0.65039465 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 21:08:04,527 epoch 8 - iter 14/73 - loss 0.64998939 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,033 epoch 8 - iter 21/73 - loss 0.65134763 - samples/sec: 13.87 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,605 epoch 8 - iter 28/73 - loss 0.64647617 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,191 epoch 8 - iter 35/73 - loss 0.64639043 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,842 epoch 8 - iter 42/73 - loss 0.64770596 - samples/sec: 10.76 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,475 epoch 8 - iter 49/73 - loss 0.65092855 - samples/sec: 11.09 - lr: 0.020000\n",
+      "2021-09-21 21:08:08,081 epoch 8 - iter 56/73 - loss 0.64969251 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 21:08:08,507 epoch 8 - iter 63/73 - loss 0.64938568 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 21:08:08,908 epoch 8 - iter 70/73 - loss 0.64871619 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 21:08:09,089 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:09,089 EPOCH 8 done: loss 0.6480 - lr 0.0200000\n",
+      "2021-09-21 21:08:09,486 DEV : loss 0.4332415461540222 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:08:09,487 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:08:09,607 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:10,031 epoch 9 - iter 7/73 - loss 0.64476529 - samples/sec: 17.98 - lr: 0.010000\n",
+      "2021-09-21 21:08:10,421 epoch 9 - iter 14/73 - loss 0.62123793 - samples/sec: 18.00 - lr: 0.010000\n",
+      "2021-09-21 21:08:10,808 epoch 9 - iter 21/73 - loss 0.63694948 - samples/sec: 18.10 - lr: 0.010000\n",
+      "2021-09-21 21:08:11,197 epoch 9 - iter 28/73 - loss 0.63776838 - samples/sec: 18.07 - lr: 0.010000\n",
+      "2021-09-21 21:08:11,570 epoch 9 - iter 35/73 - loss 0.63874133 - samples/sec: 18.82 - lr: 0.010000\n",
+      "2021-09-21 21:08:11,931 epoch 9 - iter 42/73 - loss 0.64167292 - samples/sec: 19.43 - lr: 0.010000\n",
+      "2021-09-21 21:08:12,324 epoch 9 - iter 49/73 - loss 0.63910626 - samples/sec: 17.85 - lr: 0.010000\n",
+      "2021-09-21 21:08:12,715 epoch 9 - iter 56/73 - loss 0.64303088 - samples/sec: 17.91 - lr: 0.010000\n",
+      "2021-09-21 21:08:13,107 epoch 9 - iter 63/73 - loss 0.64363689 - samples/sec: 17.90 - lr: 0.010000\n",
+      "2021-09-21 21:08:13,521 epoch 9 - iter 70/73 - loss 0.64213364 - samples/sec: 16.98 - lr: 0.010000\n",
+      "2021-09-21 21:08:13,685 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:13,685 EPOCH 9 done: loss 0.6424 - lr 0.0100000\n",
+      "2021-09-21 21:08:17,021 DEV : loss 0.46323269605636597 - score 0.0\n",
+      "2021-09-21 21:08:17,023 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:08:17,030 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:17,687 epoch 10 - iter 7/73 - loss 0.64727432 - samples/sec: 12.50 - lr: 0.010000\n",
+      "2021-09-21 21:08:18,171 epoch 10 - iter 14/73 - loss 0.64183708 - samples/sec: 14.52 - lr: 0.010000\n",
+      "2021-09-21 21:08:18,650 epoch 10 - iter 21/73 - loss 0.64026807 - samples/sec: 14.62 - lr: 0.010000\n",
+      "2021-09-21 21:08:19,052 epoch 10 - iter 28/73 - loss 0.63777360 - samples/sec: 17.46 - lr: 0.010000\n",
+      "2021-09-21 21:08:19,504 epoch 10 - iter 35/73 - loss 0.64602519 - samples/sec: 15.51 - lr: 0.010000\n",
+      "2021-09-21 21:08:20,021 epoch 10 - iter 42/73 - loss 0.64310963 - samples/sec: 13.57 - lr: 0.010000\n",
+      "2021-09-21 21:08:20,509 epoch 10 - iter 49/73 - loss 0.64530178 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 21:08:20,983 epoch 10 - iter 56/73 - loss 0.64311583 - samples/sec: 14.76 - lr: 0.010000\n",
+      "2021-09-21 21:08:21,365 epoch 10 - iter 63/73 - loss 0.64532407 - samples/sec: 18.37 - lr: 0.010000\n",
+      "2021-09-21 21:08:21,757 epoch 10 - iter 70/73 - loss 0.64849768 - samples/sec: 17.91 - lr: 0.010000\n",
+      "2021-09-21 21:08:21,984 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:21,985 EPOCH 10 done: loss 0.6487 - lr 0.0100000\n",
+      "2021-09-21 21:08:22,536 DEV : loss 0.5440190434455872 - score 0.125\n",
+      "2021-09-21 21:08:22,537 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:08:26,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:26,445 Testing using best model ...\n",
+      "2021-09-21 21:08:26,446 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:26:31,741 \t0.2222\n",
-      "2021-09-08 01:26:31,741 \n",
+      "2021-09-21 21:08:34,512 \t0.1111\n",
+      "2021-09-21 21:08:34,513 \n",
       "Results:\n",
-      "- F-score (micro) 0.2222\n",
-      "- F-score (macro) 0.1111\n",
-      "- Accuracy 0.2222\n",
+      "- F-score (micro) 0.1111\n",
+      "- F-score (macro) 0.0222\n",
+      "- Accuracy 0.1111\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "         joy     1.0000    1.0000    1.0000         2\n",
+      "         joy     0.0000    0.0000    0.0000         1\n",
       "    surprise     0.0000    0.0000    0.0000         1\n",
-      "        love     0.0000    0.0000    0.0000         0\n",
-      "     disgust     0.0000    0.0000    0.0000         1\n",
-      "        fear     0.0000    0.0000    0.0000         0\n",
-      "       anger     0.0000    0.0000    0.0000         2\n",
-      "       guilt     0.0000    0.0000    0.0000         0\n",
-      "       shame     0.0000    0.0000    0.0000         0\n",
-      "     sadness     0.0000    0.0000    0.0000         3\n",
+      "        love     0.0000    0.0000    0.0000         1\n",
+      "     disgust     0.0000    0.0000    0.0000         0\n",
+      "        fear     0.0000    0.0000    0.0000         2\n",
+      "       anger     0.0000    0.0000    0.0000         0\n",
+      "       guilt     0.0000    0.0000    0.0000         2\n",
+      "       shame     0.0000    0.0000    0.0000         1\n",
+      "     sadness     0.1111    1.0000    0.2000         1\n",
       "\n",
-      "   micro avg     0.2222    0.2222    0.2222         9\n",
-      "   macro avg     0.1111    0.1111    0.1111         9\n",
-      "weighted avg     0.2222    0.2222    0.2222         9\n",
-      " samples avg     0.2222    0.2222    0.2222         9\n",
+      "   micro avg     0.1111    0.1111    0.1111         9\n",
+      "   macro avg     0.0123    0.1111    0.0222         9\n",
+      "weighted avg     0.0123    0.1111    0.0222         9\n",
+      " samples avg     0.1111    0.1111    0.1111         9\n",
       "\n",
-      "2021-09-08 01:26:31,742 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:08,570 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:08:34,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:50,665 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:27:12,573 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:09:55,081 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 42825.67it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 44026.23it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:27:12,576 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
-      "2021-09-08 01:27:12,589 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:12,591 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:09:55,085 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
+      "2021-09-21 21:09:55,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:55,162 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2063,25 +2060,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:27:12,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:12,592 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:27:12,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:12,592 Parameters:\n",
-      "2021-09-08 01:27:12,593  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:27:12,593  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:27:12,593  - patience: \"3\"\n",
-      "2021-09-08 01:27:12,593  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:27:12,594  - max_epochs: \"10\"\n",
-      "2021-09-08 01:27:12,594  - shuffle: \"True\"\n",
-      "2021-09-08 01:27:12,594  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:27:12,595  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:27:12,595 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:12,595 Model training base path: \"None\"\n",
-      "2021-09-08 01:27:12,595 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:12,596 Device: cuda:1\n",
-      "2021-09-08 01:27:12,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:12,596 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:27:12,603 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:09:55,162 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:55,163 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:09:55,163 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:55,163 Parameters:\n",
+      "2021-09-21 21:09:55,164  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:09:55,165  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:09:55,165  - patience: \"3\"\n",
+      "2021-09-21 21:09:55,166  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:09:55,166  - max_epochs: \"10\"\n",
+      "2021-09-21 21:09:55,167  - shuffle: \"True\"\n",
+      "2021-09-21 21:09:55,167  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:09:55,168  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:09:55,168 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:55,169 Model training base path: \"None\"\n",
+      "2021-09-21 21:09:55,169 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:55,169 Device: cuda:0\n",
+      "2021-09-21 21:09:55,170 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:55,170 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:09:55,189 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2095,215 +2092,213 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:27:12,933 epoch 1 - iter 7/73 - loss 0.28788943 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 01:27:13,255 epoch 1 - iter 14/73 - loss 0.56215744 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:27:13,577 epoch 1 - iter 21/73 - loss 0.58205043 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 01:27:13,906 epoch 1 - iter 28/73 - loss 0.60152627 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:27:14,233 epoch 1 - iter 35/73 - loss 0.58883908 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 01:27:14,562 epoch 1 - iter 42/73 - loss 0.61306346 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 01:27:14,879 epoch 1 - iter 49/73 - loss 0.61879267 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 01:27:15,207 epoch 1 - iter 56/73 - loss 0.62434413 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 01:27:15,541 epoch 1 - iter 63/73 - loss 0.62542305 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 01:27:15,867 epoch 1 - iter 70/73 - loss 0.63638327 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:27:16,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:16,007 EPOCH 1 done: loss 0.6377 - lr 0.0200000\n",
-      "2021-09-08 01:27:16,150 DEV : loss 0.43607085943222046 - score 0.125\n",
-      "2021-09-08 01:27:16,151 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:09:55,626 epoch 1 - iter 7/73 - loss 0.39125076 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:09:55,997 epoch 1 - iter 14/73 - loss 0.41410592 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 21:09:56,414 epoch 1 - iter 21/73 - loss 0.53928602 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 21:09:56,830 epoch 1 - iter 28/73 - loss 0.56058227 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 21:09:57,254 epoch 1 - iter 35/73 - loss 0.56418748 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 21:09:57,705 epoch 1 - iter 42/73 - loss 0.55133454 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 21:09:58,143 epoch 1 - iter 49/73 - loss 0.53337176 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:09:58,640 epoch 1 - iter 56/73 - loss 0.53890676 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 21:09:59,151 epoch 1 - iter 63/73 - loss 0.56078526 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 21:09:59,642 epoch 1 - iter 70/73 - loss 0.58560330 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 21:09:59,861 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:59,862 EPOCH 1 done: loss 0.5828 - lr 0.0200000\n",
+      "2021-09-21 21:10:00,266 DEV : loss 0.5954540967941284 - score 0.25\n",
+      "2021-09-21 21:10:00,267 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:27:21,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:21,817 epoch 2 - iter 7/73 - loss 0.65919658 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:27:22,142 epoch 2 - iter 14/73 - loss 0.65575151 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:27:22,474 epoch 2 - iter 21/73 - loss 0.68042924 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:27:22,799 epoch 2 - iter 28/73 - loss 0.68027282 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:27:23,127 epoch 2 - iter 35/73 - loss 0.68903641 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:27:23,451 epoch 2 - iter 42/73 - loss 0.68881871 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:27:23,772 epoch 2 - iter 49/73 - loss 0.67967770 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 01:27:24,083 epoch 2 - iter 56/73 - loss 0.67491236 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 01:27:24,389 epoch 2 - iter 63/73 - loss 0.66873934 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:27:24,691 epoch 2 - iter 70/73 - loss 0.66323060 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:27:24,823 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:24,824 EPOCH 2 done: loss 0.6612 - lr 0.0200000\n",
-      "2021-09-08 01:27:24,945 DEV : loss 0.5294440984725952 - score 0.25\n",
-      "2021-09-08 01:27:24,946 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:27:28,719 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:29,045 epoch 3 - iter 7/73 - loss 0.69710058 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 01:27:29,350 epoch 3 - iter 14/73 - loss 0.68503063 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:27:29,659 epoch 3 - iter 21/73 - loss 0.67916793 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:27:29,964 epoch 3 - iter 28/73 - loss 0.68513043 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 01:27:30,272 epoch 3 - iter 35/73 - loss 0.68227640 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:27:30,573 epoch 3 - iter 42/73 - loss 0.67624606 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 01:27:30,876 epoch 3 - iter 49/73 - loss 0.67128466 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 01:27:31,186 epoch 3 - iter 56/73 - loss 0.66705713 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 01:27:31,499 epoch 3 - iter 63/73 - loss 0.67212374 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 01:27:31,825 epoch 3 - iter 70/73 - loss 0.67238136 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 01:27:31,966 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:31,967 EPOCH 3 done: loss 0.6779 - lr 0.0200000\n",
-      "2021-09-08 01:27:32,088 DEV : loss 0.42358916997909546 - score 0.0\n",
-      "2021-09-08 01:27:32,089 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:27:32,091 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:32,430 epoch 4 - iter 7/73 - loss 0.69188413 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 01:27:32,757 epoch 4 - iter 14/73 - loss 0.67487537 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 01:27:33,084 epoch 4 - iter 21/73 - loss 0.66397373 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:27:33,415 epoch 4 - iter 28/73 - loss 0.66925665 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 01:27:33,747 epoch 4 - iter 35/73 - loss 0.67014577 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:27:34,075 epoch 4 - iter 42/73 - loss 0.66700969 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 01:27:34,393 epoch 4 - iter 49/73 - loss 0.67209374 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:27:34,702 epoch 4 - iter 56/73 - loss 0.67123023 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:27:35,007 epoch 4 - iter 63/73 - loss 0.66859658 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:27:35,309 epoch 4 - iter 70/73 - loss 0.66397639 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:27:35,440 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:35,440 EPOCH 4 done: loss 0.6612 - lr 0.0200000\n",
-      "2021-09-08 01:27:35,561 DEV : loss 0.4637017846107483 - score 0.25\n",
-      "2021-09-08 01:27:35,562 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:27:39,615 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:39,935 epoch 5 - iter 7/73 - loss 0.65879209 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:27:40,238 epoch 5 - iter 14/73 - loss 0.65836236 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:27:40,540 epoch 5 - iter 21/73 - loss 0.64803568 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 01:27:40,844 epoch 5 - iter 28/73 - loss 0.65579387 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:27:41,145 epoch 5 - iter 35/73 - loss 0.65613149 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:27:41,447 epoch 5 - iter 42/73 - loss 0.65073598 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 01:27:41,753 epoch 5 - iter 49/73 - loss 0.64486494 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:27:42,055 epoch 5 - iter 56/73 - loss 0.64915728 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:27:42,358 epoch 5 - iter 63/73 - loss 0.64486900 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:27:42,660 epoch 5 - iter 70/73 - loss 0.64310799 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:27:42,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:42,792 EPOCH 5 done: loss 0.6417 - lr 0.0200000\n",
-      "2021-09-08 01:27:42,913 DEV : loss 0.44266071915626526 - score 0.25\n",
-      "2021-09-08 01:27:42,914 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:27:46,869 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:47,190 epoch 6 - iter 7/73 - loss 0.61688410 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:27:47,496 epoch 6 - iter 14/73 - loss 0.60387811 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:27:47,802 epoch 6 - iter 21/73 - loss 0.61156395 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 01:27:48,111 epoch 6 - iter 28/73 - loss 0.60481918 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 01:27:48,428 epoch 6 - iter 35/73 - loss 0.62922449 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 01:27:48,731 epoch 6 - iter 42/73 - loss 0.63373828 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 01:27:49,036 epoch 6 - iter 49/73 - loss 0.63662476 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 01:27:49,337 epoch 6 - iter 56/73 - loss 0.63572421 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:27:49,639 epoch 6 - iter 63/73 - loss 0.63477239 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:27:49,944 epoch 6 - iter 70/73 - loss 0.63688185 - samples/sec: 22.99 - lr: 0.020000\n"
+      "2021-09-21 21:10:04,373 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:04,987 epoch 2 - iter 7/73 - loss 0.71403321 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:10:05,513 epoch 2 - iter 14/73 - loss 0.74160068 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 21:10:06,028 epoch 2 - iter 21/73 - loss 0.70878254 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 21:10:06,505 epoch 2 - iter 28/73 - loss 0.69992147 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 21:10:06,990 epoch 2 - iter 35/73 - loss 0.69666527 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:10:07,508 epoch 2 - iter 42/73 - loss 0.69048391 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 21:10:07,959 epoch 2 - iter 49/73 - loss 0.69200322 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 21:10:08,427 epoch 2 - iter 56/73 - loss 0.69145967 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:10:08,953 epoch 2 - iter 63/73 - loss 0.68791389 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 21:10:09,432 epoch 2 - iter 70/73 - loss 0.69098973 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 21:10:09,640 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:09,641 EPOCH 2 done: loss 0.6901 - lr 0.0200000\n",
+      "2021-09-21 21:10:10,025 DEV : loss 0.44727611541748047 - score 0.0\n",
+      "2021-09-21 21:10:10,026 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:10:10,028 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:10,565 epoch 3 - iter 7/73 - loss 0.62759684 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 21:10:11,018 epoch 3 - iter 14/73 - loss 0.61704546 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 21:10:11,480 epoch 3 - iter 21/73 - loss 0.62312042 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 21:10:11,956 epoch 3 - iter 28/73 - loss 0.63802445 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 21:10:12,456 epoch 3 - iter 35/73 - loss 0.63674093 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 21:10:12,889 epoch 3 - iter 42/73 - loss 0.64042206 - samples/sec: 16.17 - lr: 0.020000\n",
+      "2021-09-21 21:10:13,336 epoch 3 - iter 49/73 - loss 0.63718517 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:10:13,806 epoch 3 - iter 56/73 - loss 0.63885780 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 21:10:14,264 epoch 3 - iter 63/73 - loss 0.63882106 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 21:10:14,759 epoch 3 - iter 70/73 - loss 0.63697260 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:10:14,946 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:14,946 EPOCH 3 done: loss 0.6398 - lr 0.0200000\n",
+      "2021-09-21 21:10:15,325 DEV : loss 0.46896597743034363 - score 0.0\n",
+      "2021-09-21 21:10:15,325 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:10:15,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:15,831 epoch 4 - iter 7/73 - loss 0.63151926 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:10:16,299 epoch 4 - iter 14/73 - loss 0.65255367 - samples/sec: 15.00 - lr: 0.020000\n",
+      "2021-09-21 21:10:16,734 epoch 4 - iter 21/73 - loss 0.65087845 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 21:10:17,169 epoch 4 - iter 28/73 - loss 0.64592531 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 21:10:17,577 epoch 4 - iter 35/73 - loss 0.64518102 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 21:10:17,947 epoch 4 - iter 42/73 - loss 0.63679413 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 21:10:18,353 epoch 4 - iter 49/73 - loss 0.62996128 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 21:10:18,742 epoch 4 - iter 56/73 - loss 0.62869438 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 21:10:19,159 epoch 4 - iter 63/73 - loss 0.62501260 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 21:10:19,570 epoch 4 - iter 70/73 - loss 0.62541440 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 21:10:19,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:19,751 EPOCH 4 done: loss 0.6302 - lr 0.0200000\n",
+      "2021-09-21 21:10:20,080 DEV : loss 0.5219247937202454 - score 0.0\n",
+      "2021-09-21 21:10:20,080 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:10:20,082 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:20,566 epoch 5 - iter 7/73 - loss 0.74065331 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 21:10:20,993 epoch 5 - iter 14/73 - loss 0.71480943 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 21:10:21,446 epoch 5 - iter 21/73 - loss 0.69098875 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:10:21,974 epoch 5 - iter 28/73 - loss 0.68429350 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 21:10:22,541 epoch 5 - iter 35/73 - loss 0.67029809 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:10:23,036 epoch 5 - iter 42/73 - loss 0.66250738 - samples/sec: 14.18 - lr: 0.020000\n",
+      "2021-09-21 21:10:23,545 epoch 5 - iter 49/73 - loss 0.65614937 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 21:10:24,022 epoch 5 - iter 56/73 - loss 0.65598524 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 21:10:24,419 epoch 5 - iter 63/73 - loss 0.65651278 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 21:10:24,833 epoch 5 - iter 70/73 - loss 0.65545653 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 21:10:25,012 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:25,012 EPOCH 5 done: loss 0.6538 - lr 0.0200000\n",
+      "2021-09-21 21:10:25,353 DEV : loss 0.4752102792263031 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:10:25,354 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:10:25,423 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:25,836 epoch 6 - iter 7/73 - loss 0.62414263 - samples/sec: 18.35 - lr: 0.010000\n",
+      "2021-09-21 21:10:26,244 epoch 6 - iter 14/73 - loss 0.63035108 - samples/sec: 17.17 - lr: 0.010000\n",
+      "2021-09-21 21:10:26,642 epoch 6 - iter 21/73 - loss 0.64608531 - samples/sec: 17.65 - lr: 0.010000\n",
+      "2021-09-21 21:10:27,050 epoch 6 - iter 28/73 - loss 0.64413130 - samples/sec: 17.19 - lr: 0.010000\n",
+      "2021-09-21 21:10:27,464 epoch 6 - iter 35/73 - loss 0.63795214 - samples/sec: 16.93 - lr: 0.010000\n",
+      "2021-09-21 21:10:27,847 epoch 6 - iter 42/73 - loss 0.63894246 - samples/sec: 18.33 - lr: 0.010000\n",
+      "2021-09-21 21:10:28,263 epoch 6 - iter 49/73 - loss 0.63835692 - samples/sec: 16.85 - lr: 0.010000\n",
+      "2021-09-21 21:10:28,764 epoch 6 - iter 56/73 - loss 0.63711567 - samples/sec: 14.01 - lr: 0.010000\n",
+      "2021-09-21 21:10:29,234 epoch 6 - iter 63/73 - loss 0.63844968 - samples/sec: 14.91 - lr: 0.010000\n",
+      "2021-09-21 21:10:29,750 epoch 6 - iter 70/73 - loss 0.64069313 - samples/sec: 13.59 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:27:50,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:50,075 EPOCH 6 done: loss 0.6373 - lr 0.0200000\n",
-      "2021-09-08 01:27:50,194 DEV : loss 0.4454892873764038 - score 0.0\n",
-      "2021-09-08 01:27:50,195 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:27:50,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:50,514 epoch 7 - iter 7/73 - loss 0.63690720 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:27:50,817 epoch 7 - iter 14/73 - loss 0.63284745 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 01:27:51,117 epoch 7 - iter 21/73 - loss 0.64008319 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 01:27:51,424 epoch 7 - iter 28/73 - loss 0.63678512 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:27:51,726 epoch 7 - iter 35/73 - loss 0.63600731 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:27:52,029 epoch 7 - iter 42/73 - loss 0.63300960 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:27:52,336 epoch 7 - iter 49/73 - loss 0.64171268 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:27:52,637 epoch 7 - iter 56/73 - loss 0.63998368 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 01:27:52,940 epoch 7 - iter 63/73 - loss 0.64145012 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:27:53,242 epoch 7 - iter 70/73 - loss 0.64017243 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 01:27:53,375 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:53,376 EPOCH 7 done: loss 0.6398 - lr 0.0200000\n",
-      "2021-09-08 01:27:53,495 DEV : loss 0.4793142080307007 - score 0.125\n",
-      "2021-09-08 01:27:53,495 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:27:53,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:53,815 epoch 8 - iter 7/73 - loss 0.65119651 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:27:54,117 epoch 8 - iter 14/73 - loss 0.65842677 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:27:54,420 epoch 8 - iter 21/73 - loss 0.65896009 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:27:54,722 epoch 8 - iter 28/73 - loss 0.65612277 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:27:55,026 epoch 8 - iter 35/73 - loss 0.65273509 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:27:55,329 epoch 8 - iter 42/73 - loss 0.65062642 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:27:55,631 epoch 8 - iter 49/73 - loss 0.64689101 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 01:27:55,938 epoch 8 - iter 56/73 - loss 0.64285210 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:27:56,240 epoch 8 - iter 63/73 - loss 0.64191243 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:27:56,543 epoch 8 - iter 70/73 - loss 0.64263105 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:27:56,676 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:56,676 EPOCH 8 done: loss 0.6432 - lr 0.0200000\n",
-      "2021-09-08 01:27:56,796 DEV : loss 0.4682483375072479 - score 0.0\n",
-      "2021-09-08 01:27:56,797 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:27:56,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:57,115 epoch 9 - iter 7/73 - loss 0.62611251 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:27:57,418 epoch 9 - iter 14/73 - loss 0.63735968 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:27:57,721 epoch 9 - iter 21/73 - loss 0.64514548 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:27:58,025 epoch 9 - iter 28/73 - loss 0.64509344 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:27:58,326 epoch 9 - iter 35/73 - loss 0.64247971 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 01:27:58,626 epoch 9 - iter 42/73 - loss 0.64133931 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 01:27:58,928 epoch 9 - iter 49/73 - loss 0.63860324 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:27:59,233 epoch 9 - iter 56/73 - loss 0.63762040 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 01:27:59,535 epoch 9 - iter 63/73 - loss 0.63735116 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 01:27:59,839 epoch 9 - iter 70/73 - loss 0.63941860 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 01:27:59,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:27:59,970 EPOCH 9 done: loss 0.6391 - lr 0.0200000\n",
-      "2021-09-08 01:28:00,090 DEV : loss 0.49601566791534424 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:28:00,091 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:28:00,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:00,412 epoch 10 - iter 7/73 - loss 0.63184532 - samples/sec: 22.93 - lr: 0.010000\n",
-      "2021-09-08 01:28:00,714 epoch 10 - iter 14/73 - loss 0.65507312 - samples/sec: 23.19 - lr: 0.010000\n",
-      "2021-09-08 01:28:01,016 epoch 10 - iter 21/73 - loss 0.64807729 - samples/sec: 23.25 - lr: 0.010000\n",
-      "2021-09-08 01:28:01,329 epoch 10 - iter 28/73 - loss 0.64923048 - samples/sec: 22.40 - lr: 0.010000\n",
-      "2021-09-08 01:28:01,640 epoch 10 - iter 35/73 - loss 0.64949476 - samples/sec: 22.56 - lr: 0.010000\n",
-      "2021-09-08 01:28:01,943 epoch 10 - iter 42/73 - loss 0.65040479 - samples/sec: 23.18 - lr: 0.010000\n",
-      "2021-09-08 01:28:02,244 epoch 10 - iter 49/73 - loss 0.65061202 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:28:02,550 epoch 10 - iter 56/73 - loss 0.64703687 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 01:28:02,855 epoch 10 - iter 63/73 - loss 0.64662252 - samples/sec: 22.99 - lr: 0.010000\n",
-      "2021-09-08 01:28:03,157 epoch 10 - iter 70/73 - loss 0.64446857 - samples/sec: 23.20 - lr: 0.010000\n",
-      "2021-09-08 01:28:03,288 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:03,288 EPOCH 10 done: loss 0.6445 - lr 0.0100000\n",
-      "2021-09-08 01:28:03,410 DEV : loss 0.4921010434627533 - score 0.0\n",
-      "2021-09-08 01:28:03,410 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:28:07,417 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:07,418 Testing using best model ...\n",
-      "2021-09-08 01:28:07,419 loading file None/best-model.pt\n",
+      "2021-09-21 21:10:29,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:29,954 EPOCH 6 done: loss 0.6405 - lr 0.0100000\n",
+      "2021-09-21 21:10:30,143 DEV : loss 0.4846979081630707 - score 0.125\n",
+      "2021-09-21 21:10:30,144 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:10:30,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:30,525 epoch 7 - iter 7/73 - loss 0.64981685 - samples/sec: 19.61 - lr: 0.010000\n",
+      "2021-09-21 21:10:30,853 epoch 7 - iter 14/73 - loss 0.62100358 - samples/sec: 21.41 - lr: 0.010000\n",
+      "2021-09-21 21:10:31,177 epoch 7 - iter 21/73 - loss 0.61753611 - samples/sec: 21.69 - lr: 0.010000\n",
+      "2021-09-21 21:10:31,483 epoch 7 - iter 28/73 - loss 0.62985229 - samples/sec: 22.91 - lr: 0.010000\n",
+      "2021-09-21 21:10:31,789 epoch 7 - iter 35/73 - loss 0.63156674 - samples/sec: 22.96 - lr: 0.010000\n",
+      "2021-09-21 21:10:32,091 epoch 7 - iter 42/73 - loss 0.63087614 - samples/sec: 23.24 - lr: 0.010000\n",
+      "2021-09-21 21:10:32,393 epoch 7 - iter 49/73 - loss 0.63525469 - samples/sec: 23.24 - lr: 0.010000\n",
+      "2021-09-21 21:10:32,700 epoch 7 - iter 56/73 - loss 0.63449700 - samples/sec: 22.87 - lr: 0.010000\n",
+      "2021-09-21 21:10:33,002 epoch 7 - iter 63/73 - loss 0.63487606 - samples/sec: 23.22 - lr: 0.010000\n",
+      "2021-09-21 21:10:33,306 epoch 7 - iter 70/73 - loss 0.63379942 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 21:10:33,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:33,437 EPOCH 7 done: loss 0.6339 - lr 0.0100000\n",
+      "2021-09-21 21:10:33,691 DEV : loss 0.5189858078956604 - score 0.0\n",
+      "2021-09-21 21:10:33,691 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:10:33,791 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:34,113 epoch 8 - iter 7/73 - loss 0.61806442 - samples/sec: 22.73 - lr: 0.010000\n",
+      "2021-09-21 21:10:34,427 epoch 8 - iter 14/73 - loss 0.62250458 - samples/sec: 22.37 - lr: 0.010000\n",
+      "2021-09-21 21:10:34,736 epoch 8 - iter 21/73 - loss 0.63290192 - samples/sec: 22.76 - lr: 0.010000\n",
+      "2021-09-21 21:10:35,047 epoch 8 - iter 28/73 - loss 0.63301670 - samples/sec: 22.53 - lr: 0.010000\n",
+      "2021-09-21 21:10:35,463 epoch 8 - iter 35/73 - loss 0.64621642 - samples/sec: 16.87 - lr: 0.010000\n",
+      "2021-09-21 21:10:35,876 epoch 8 - iter 42/73 - loss 0.64786637 - samples/sec: 17.00 - lr: 0.010000\n",
+      "2021-09-21 21:10:36,285 epoch 8 - iter 49/73 - loss 0.64833792 - samples/sec: 17.16 - lr: 0.010000\n",
+      "2021-09-21 21:10:36,714 epoch 8 - iter 56/73 - loss 0.64473282 - samples/sec: 16.32 - lr: 0.010000\n",
+      "2021-09-21 21:10:37,126 epoch 8 - iter 63/73 - loss 0.64548868 - samples/sec: 17.05 - lr: 0.010000\n",
+      "2021-09-21 21:10:37,506 epoch 8 - iter 70/73 - loss 0.64771180 - samples/sec: 18.42 - lr: 0.010000\n",
+      "2021-09-21 21:10:37,682 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:37,683 EPOCH 8 done: loss 0.6470 - lr 0.0100000\n",
+      "2021-09-21 21:10:38,857 DEV : loss 0.4779745638370514 - score 0.0\n",
+      "2021-09-21 21:10:38,858 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:10:38,861 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:39,430 epoch 9 - iter 7/73 - loss 0.63044201 - samples/sec: 13.74 - lr: 0.010000\n",
+      "2021-09-21 21:10:39,919 epoch 9 - iter 14/73 - loss 0.63771769 - samples/sec: 14.34 - lr: 0.010000\n",
+      "2021-09-21 21:10:40,334 epoch 9 - iter 21/73 - loss 0.63402528 - samples/sec: 16.90 - lr: 0.010000\n",
+      "2021-09-21 21:10:40,709 epoch 9 - iter 28/73 - loss 0.64566473 - samples/sec: 18.71 - lr: 0.010000\n",
+      "2021-09-21 21:10:41,098 epoch 9 - iter 35/73 - loss 0.64138597 - samples/sec: 18.01 - lr: 0.010000\n",
+      "2021-09-21 21:10:41,476 epoch 9 - iter 42/73 - loss 0.63773129 - samples/sec: 18.56 - lr: 0.010000\n",
+      "2021-09-21 21:10:41,798 epoch 9 - iter 49/73 - loss 0.63540185 - samples/sec: 21.83 - lr: 0.010000\n",
+      "2021-09-21 21:10:42,164 epoch 9 - iter 56/73 - loss 0.63505250 - samples/sec: 19.17 - lr: 0.010000\n",
+      "2021-09-21 21:10:42,540 epoch 9 - iter 63/73 - loss 0.63294742 - samples/sec: 18.65 - lr: 0.010000\n",
+      "2021-09-21 21:10:42,901 epoch 9 - iter 70/73 - loss 0.63296172 - samples/sec: 19.45 - lr: 0.010000\n",
+      "2021-09-21 21:10:43,056 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:43,057 EPOCH 9 done: loss 0.6341 - lr 0.0100000\n",
+      "2021-09-21 21:10:44,012 DEV : loss 0.5053138136863708 - score 0.125\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:10:44,013 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:10:44,127 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:44,720 epoch 10 - iter 7/73 - loss 0.63588603 - samples/sec: 13.11 - lr: 0.005000\n",
+      "2021-09-21 21:10:45,369 epoch 10 - iter 14/73 - loss 0.64064209 - samples/sec: 10.80 - lr: 0.005000\n",
+      "2021-09-21 21:10:45,890 epoch 10 - iter 21/73 - loss 0.64675525 - samples/sec: 13.45 - lr: 0.005000\n",
+      "2021-09-21 21:10:46,300 epoch 10 - iter 28/73 - loss 0.64858207 - samples/sec: 17.13 - lr: 0.005000\n",
+      "2021-09-21 21:10:46,688 epoch 10 - iter 35/73 - loss 0.64593202 - samples/sec: 18.05 - lr: 0.005000\n",
+      "2021-09-21 21:10:47,344 epoch 10 - iter 42/73 - loss 0.64358349 - samples/sec: 10.68 - lr: 0.005000\n",
+      "2021-09-21 21:10:48,048 epoch 10 - iter 49/73 - loss 0.64292862 - samples/sec: 9.96 - lr: 0.005000\n",
+      "2021-09-21 21:10:48,735 epoch 10 - iter 56/73 - loss 0.64130632 - samples/sec: 10.19 - lr: 0.005000\n",
+      "2021-09-21 21:10:49,256 epoch 10 - iter 63/73 - loss 0.63924581 - samples/sec: 13.45 - lr: 0.005000\n",
+      "2021-09-21 21:10:49,605 epoch 10 - iter 70/73 - loss 0.63887054 - samples/sec: 20.11 - lr: 0.005000\n",
+      "2021-09-21 21:10:49,737 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:49,737 EPOCH 10 done: loss 0.6381 - lr 0.0050000\n",
+      "2021-09-21 21:10:51,180 DEV : loss 0.4815981388092041 - score 0.0\n",
+      "2021-09-21 21:10:51,181 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:10:59,337 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:01,939 Testing using best model ...\n",
+      "2021-09-21 21:11:01,941 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:28:12,589 \t0.1111\n",
-      "2021-09-08 01:28:12,589 \n",
+      "2021-09-21 21:11:07,972 \t0.2222\n",
+      "2021-09-21 21:11:07,973 \n",
       "Results:\n",
-      "- F-score (micro) 0.1111\n",
-      "- F-score (macro) 0.0247\n",
-      "- Accuracy 0.1111\n",
+      "- F-score (micro) 0.2222\n",
+      "- F-score (macro) 0.0404\n",
+      "- Accuracy 0.2222\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
       "         joy     0.0000    0.0000    0.0000         0\n",
       "    surprise     0.0000    0.0000    0.0000         2\n",
-      "        love     0.0000    0.0000    0.0000         0\n",
+      "        love     0.0000    0.0000    0.0000         1\n",
       "     disgust     0.0000    0.0000    0.0000         1\n",
-      "        fear     0.0000    0.0000    0.0000         4\n",
-      "       anger     0.1250    1.0000    0.2222         1\n",
-      "       guilt     0.0000    0.0000    0.0000         0\n",
-      "       shame     0.0000    0.0000    0.0000         1\n",
-      "     sadness     0.0000    0.0000    0.0000         0\n",
+      "        fear     0.0000    0.0000    0.0000         0\n",
+      "       anger     0.0000    0.0000    0.0000         2\n",
+      "       guilt     0.0000    0.0000    0.0000         1\n",
+      "       shame     0.0000    0.0000    0.0000         0\n",
+      "     sadness     0.2222    1.0000    0.3636         2\n",
       "\n",
-      "   micro avg     0.1111    0.1111    0.1111         9\n",
-      "   macro avg     0.0139    0.1111    0.0247         9\n",
-      "weighted avg     0.0139    0.1111    0.0247         9\n",
-      " samples avg     0.1111    0.1111    0.1111         9\n",
+      "   micro avg     0.2222    0.2222    0.2222         9\n",
+      "   macro avg     0.0247    0.1111    0.0404         9\n",
+      "weighted avg     0.0494    0.2222    0.0808         9\n",
+      " samples avg     0.2222    0.2222    0.2222         9\n",
       "\n",
-      "2021-09-08 01:28:12,589 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:49,168 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:11:07,973 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:14,835 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:28:53,114 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:12:20,246 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 41603.11it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 47471.76it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:28:53,118 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
-      "2021-09-08 01:28:53,128 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:53,130 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:12:20,250 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness']\n",
+      "2021-09-21 21:12:20,258 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:20,260 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2616,25 +2611,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:28:53,130 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:53,131 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:28:53,131 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:53,131 Parameters:\n",
-      "2021-09-08 01:28:53,132  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:28:53,132  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:28:53,132  - patience: \"3\"\n",
-      "2021-09-08 01:28:53,132  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:28:53,133  - max_epochs: \"10\"\n",
-      "2021-09-08 01:28:53,133  - shuffle: \"True\"\n",
-      "2021-09-08 01:28:53,133  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:28:53,133  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:28:53,134 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:53,134 Model training base path: \"None\"\n",
-      "2021-09-08 01:28:53,134 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:53,135 Device: cuda:1\n",
-      "2021-09-08 01:28:53,135 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:53,135 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:28:53,143 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:12:20,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:20,261 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:12:20,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:20,261 Parameters:\n",
+      "2021-09-21 21:12:20,262  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:12:20,262  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:12:20,262  - patience: \"3\"\n",
+      "2021-09-21 21:12:20,263  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:12:20,263  - max_epochs: \"10\"\n",
+      "2021-09-21 21:12:20,263  - shuffle: \"True\"\n",
+      "2021-09-21 21:12:20,263  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:12:20,264  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:12:20,264 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:20,264 Model training base path: \"None\"\n",
+      "2021-09-21 21:12:20,265 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:20,265 Device: cuda:0\n",
+      "2021-09-21 21:12:20,265 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:20,265 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:12:20,272 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2648,170 +2643,171 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:28:53,468 epoch 1 - iter 7/73 - loss 0.23876814 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 01:28:53,783 epoch 1 - iter 14/73 - loss 0.32276276 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 01:28:54,103 epoch 1 - iter 21/73 - loss 0.42814781 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 01:28:54,425 epoch 1 - iter 28/73 - loss 0.50544199 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 01:28:54,756 epoch 1 - iter 35/73 - loss 0.53652171 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 01:28:55,081 epoch 1 - iter 42/73 - loss 0.52783523 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:28:55,406 epoch 1 - iter 49/73 - loss 0.53648697 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:28:55,738 epoch 1 - iter 56/73 - loss 0.53021576 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 01:28:56,063 epoch 1 - iter 63/73 - loss 0.53694693 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:28:56,388 epoch 1 - iter 70/73 - loss 0.54757246 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:28:56,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:28:56,529 EPOCH 1 done: loss 0.5385 - lr 0.0200000\n",
-      "2021-09-08 01:28:56,650 DEV : loss 0.6110658645629883 - score 0.25\n",
-      "2021-09-08 01:28:56,651 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:12:20,930 epoch 1 - iter 7/73 - loss 0.25694623 - samples/sec: 11.87 - lr: 0.020000\n",
+      "2021-09-21 21:12:21,552 epoch 1 - iter 14/73 - loss 0.49573464 - samples/sec: 11.27 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,210 epoch 1 - iter 21/73 - loss 0.59173575 - samples/sec: 10.65 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,747 epoch 1 - iter 28/73 - loss 0.58853854 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 21:12:23,263 epoch 1 - iter 35/73 - loss 0.62341923 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 21:12:23,696 epoch 1 - iter 42/73 - loss 0.59580203 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:12:24,138 epoch 1 - iter 49/73 - loss 0.62555333 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 21:12:24,592 epoch 1 - iter 56/73 - loss 0.63064514 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:12:25,026 epoch 1 - iter 63/73 - loss 0.62159365 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 21:12:25,451 epoch 1 - iter 70/73 - loss 0.62609827 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 21:12:25,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:25,617 EPOCH 1 done: loss 0.6084 - lr 0.0200000\n",
+      "2021-09-21 21:12:25,972 DEV : loss 0.7548813223838806 - score 0.125\n",
+      "2021-09-21 21:12:25,973 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:29:00,907 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:01,252 epoch 2 - iter 7/73 - loss 0.60649565 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 01:29:01,574 epoch 2 - iter 14/73 - loss 0.55035456 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 01:29:01,909 epoch 2 - iter 21/73 - loss 0.62543427 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 01:29:02,244 epoch 2 - iter 28/73 - loss 0.64529084 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:29:02,575 epoch 2 - iter 35/73 - loss 0.63923235 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:29:02,905 epoch 2 - iter 42/73 - loss 0.64694443 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:29:03,235 epoch 2 - iter 49/73 - loss 0.66621607 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:29:03,560 epoch 2 - iter 56/73 - loss 0.67248531 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 01:29:03,889 epoch 2 - iter 63/73 - loss 0.66505194 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:29:04,214 epoch 2 - iter 70/73 - loss 0.67009984 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 01:29:04,355 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:04,355 EPOCH 2 done: loss 0.6668 - lr 0.0200000\n",
-      "2021-09-08 01:29:04,475 DEV : loss 0.6492359638214111 - score 0.125\n",
-      "2021-09-08 01:29:04,476 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:29:04,479 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:04,823 epoch 3 - iter 7/73 - loss 0.46343111 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:29:05,150 epoch 3 - iter 14/73 - loss 0.60957015 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 01:29:05,476 epoch 3 - iter 21/73 - loss 0.59081929 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:29:05,797 epoch 3 - iter 28/73 - loss 0.60278356 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 01:29:06,126 epoch 3 - iter 35/73 - loss 0.58830182 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 01:29:06,448 epoch 3 - iter 42/73 - loss 0.58957395 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 01:29:06,773 epoch 3 - iter 49/73 - loss 0.61918100 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:29:07,101 epoch 3 - iter 56/73 - loss 0.62936418 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 01:29:07,410 epoch 3 - iter 63/73 - loss 0.63370801 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:29:07,715 epoch 3 - iter 70/73 - loss 0.63499420 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:29:07,848 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:07,849 EPOCH 3 done: loss 0.6351 - lr 0.0200000\n",
-      "2021-09-08 01:29:07,976 DEV : loss 0.4963236153125763 - score 0.0\n",
-      "2021-09-08 01:29:07,977 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:29:07,979 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:08,297 epoch 4 - iter 7/73 - loss 0.63015831 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 01:29:08,600 epoch 4 - iter 14/73 - loss 0.62568058 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:29:08,903 epoch 4 - iter 21/73 - loss 0.64769521 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 01:29:09,204 epoch 4 - iter 28/73 - loss 0.62206252 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:29:09,513 epoch 4 - iter 35/73 - loss 0.63319906 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:29:09,819 epoch 4 - iter 42/73 - loss 0.64083170 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:29:10,123 epoch 4 - iter 49/73 - loss 0.64380927 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:29:10,426 epoch 4 - iter 56/73 - loss 0.64272120 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 01:29:10,729 epoch 4 - iter 63/73 - loss 0.64456476 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:29:11,039 epoch 4 - iter 70/73 - loss 0.64481689 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:29:11,169 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:11,170 EPOCH 4 done: loss 0.6444 - lr 0.0200000\n",
-      "2021-09-08 01:29:11,290 DEV : loss 0.5055067539215088 - score 0.0\n",
-      "2021-09-08 01:29:11,291 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:29:11,292 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:11,614 epoch 5 - iter 7/73 - loss 0.63613628 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 01:29:11,918 epoch 5 - iter 14/73 - loss 0.63481075 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:29:12,221 epoch 5 - iter 21/73 - loss 0.63686167 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:29:12,525 epoch 5 - iter 28/73 - loss 0.64749105 - samples/sec: 23.12 - lr: 0.020000\n",
-      "2021-09-08 01:29:12,830 epoch 5 - iter 35/73 - loss 0.64457820 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:29:13,133 epoch 5 - iter 42/73 - loss 0.64036971 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 01:29:13,435 epoch 5 - iter 49/73 - loss 0.64541000 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:29:13,740 epoch 5 - iter 56/73 - loss 0.64615412 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:29:14,044 epoch 5 - iter 63/73 - loss 0.64544858 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:29:14,348 epoch 5 - iter 70/73 - loss 0.64712893 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:29:14,480 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:14,481 EPOCH 5 done: loss 0.6481 - lr 0.0200000\n",
-      "2021-09-08 01:29:14,600 DEV : loss 0.4674176573753357 - score 0.125\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:29:14,601 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:29:14,603 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:14,920 epoch 6 - iter 7/73 - loss 0.64369017 - samples/sec: 23.10 - lr: 0.010000\n",
-      "2021-09-08 01:29:15,226 epoch 6 - iter 14/73 - loss 0.64370124 - samples/sec: 22.96 - lr: 0.010000\n",
-      "2021-09-08 01:29:15,529 epoch 6 - iter 21/73 - loss 0.64057152 - samples/sec: 23.13 - lr: 0.010000\n",
-      "2021-09-08 01:29:15,835 epoch 6 - iter 28/73 - loss 0.64363836 - samples/sec: 22.88 - lr: 0.010000\n",
-      "2021-09-08 01:29:16,139 epoch 6 - iter 35/73 - loss 0.64071922 - samples/sec: 23.11 - lr: 0.010000\n",
-      "2021-09-08 01:29:16,441 epoch 6 - iter 42/73 - loss 0.63829453 - samples/sec: 23.21 - lr: 0.010000\n",
-      "2021-09-08 01:29:16,752 epoch 6 - iter 49/73 - loss 0.64028731 - samples/sec: 22.53 - lr: 0.010000\n",
-      "2021-09-08 01:29:17,055 epoch 6 - iter 56/73 - loss 0.63984869 - samples/sec: 23.16 - lr: 0.010000\n",
-      "2021-09-08 01:29:17,359 epoch 6 - iter 63/73 - loss 0.64097439 - samples/sec: 23.09 - lr: 0.010000\n",
-      "2021-09-08 01:29:17,664 epoch 6 - iter 70/73 - loss 0.63998109 - samples/sec: 22.97 - lr: 0.010000\n"
+      "2021-09-21 21:12:35,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:36,407 epoch 2 - iter 7/73 - loss 0.73323954 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 21:12:36,924 epoch 2 - iter 14/73 - loss 0.79220708 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,441 epoch 2 - iter 21/73 - loss 0.75087617 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,991 epoch 2 - iter 28/73 - loss 0.72456113 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:12:38,418 epoch 2 - iter 35/73 - loss 0.72238724 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 21:12:38,852 epoch 2 - iter 42/73 - loss 0.73384776 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 21:12:39,280 epoch 2 - iter 49/73 - loss 0.72480748 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 21:12:39,706 epoch 2 - iter 56/73 - loss 0.71746915 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 21:12:40,122 epoch 2 - iter 63/73 - loss 0.70503469 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 21:12:40,501 epoch 2 - iter 70/73 - loss 0.69917177 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 21:12:40,689 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:40,689 EPOCH 2 done: loss 0.6976 - lr 0.0200000\n",
+      "2021-09-21 21:12:41,039 DEV : loss 0.46116989850997925 - score 0.0\n",
+      "2021-09-21 21:12:41,040 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:12:41,856 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:42,335 epoch 3 - iter 7/73 - loss 0.68987708 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 21:12:42,734 epoch 3 - iter 14/73 - loss 0.67381467 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 21:12:43,203 epoch 3 - iter 21/73 - loss 0.65730130 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 21:12:43,654 epoch 3 - iter 28/73 - loss 0.65957037 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:12:44,110 epoch 3 - iter 35/73 - loss 0.66328493 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 21:12:44,588 epoch 3 - iter 42/73 - loss 0.65825854 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 21:12:45,035 epoch 3 - iter 49/73 - loss 0.66236484 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:12:45,511 epoch 3 - iter 56/73 - loss 0.66715549 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 21:12:45,972 epoch 3 - iter 63/73 - loss 0.67223616 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:12:46,431 epoch 3 - iter 70/73 - loss 0.66916297 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 21:12:46,638 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:46,639 EPOCH 3 done: loss 0.6730 - lr 0.0200000\n",
+      "2021-09-21 21:12:46,961 DEV : loss 0.656056821346283 - score 0.125\n",
+      "2021-09-21 21:12:46,962 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:12:51,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:51,589 epoch 4 - iter 7/73 - loss 0.68784047 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 21:12:52,062 epoch 4 - iter 14/73 - loss 0.66989616 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:12:52,542 epoch 4 - iter 21/73 - loss 0.65627086 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 21:12:52,903 epoch 4 - iter 28/73 - loss 0.65978134 - samples/sec: 19.39 - lr: 0.020000\n",
+      "2021-09-21 21:12:53,227 epoch 4 - iter 35/73 - loss 0.66186461 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 21:12:53,539 epoch 4 - iter 42/73 - loss 0.65768065 - samples/sec: 22.43 - lr: 0.020000\n",
+      "2021-09-21 21:12:53,874 epoch 4 - iter 49/73 - loss 0.65539018 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 21:12:54,189 epoch 4 - iter 56/73 - loss 0.65207687 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 21:12:54,506 epoch 4 - iter 63/73 - loss 0.66041438 - samples/sec: 22.12 - lr: 0.020000\n",
+      "2021-09-21 21:12:54,831 epoch 4 - iter 70/73 - loss 0.65755109 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 21:12:54,978 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:54,979 EPOCH 4 done: loss 0.6563 - lr 0.0200000\n",
+      "2021-09-21 21:12:55,147 DEV : loss 0.47160691022872925 - score 0.25\n",
+      "2021-09-21 21:12:55,148 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:12:58,913 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:59,421 epoch 5 - iter 7/73 - loss 0.61142268 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 21:12:59,896 epoch 5 - iter 14/73 - loss 0.61865744 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 21:13:00,369 epoch 5 - iter 21/73 - loss 0.62887100 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:13:00,836 epoch 5 - iter 28/73 - loss 0.63587013 - samples/sec: 15.02 - lr: 0.020000\n",
+      "2021-09-21 21:13:01,289 epoch 5 - iter 35/73 - loss 0.63899005 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:13:01,778 epoch 5 - iter 42/73 - loss 0.63630236 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 21:13:02,256 epoch 5 - iter 49/73 - loss 0.63462382 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 21:13:02,680 epoch 5 - iter 56/73 - loss 0.63878343 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,248 epoch 5 - iter 63/73 - loss 0.63740028 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,730 epoch 5 - iter 70/73 - loss 0.64177328 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,921 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:03,921 EPOCH 5 done: loss 0.6442 - lr 0.0200000\n",
+      "2021-09-21 21:13:04,242 DEV : loss 0.41698533296585083 - score 0.0\n",
+      "2021-09-21 21:13:04,243 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:04,246 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:04,728 epoch 6 - iter 7/73 - loss 0.63879772 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:13:05,122 epoch 6 - iter 14/73 - loss 0.65747938 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:13:05,489 epoch 6 - iter 21/73 - loss 0.65829938 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 21:13:05,907 epoch 6 - iter 28/73 - loss 0.65401948 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:13:06,346 epoch 6 - iter 35/73 - loss 0.65146093 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:13:06,787 epoch 6 - iter 42/73 - loss 0.65259923 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 21:13:07,200 epoch 6 - iter 49/73 - loss 0.65086350 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:13:07,614 epoch 6 - iter 56/73 - loss 0.64996324 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 21:13:08,070 epoch 6 - iter 63/73 - loss 0.64909553 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 21:13:08,668 epoch 6 - iter 70/73 - loss 0.65122119 - samples/sec: 11.71 - lr: 0.020000\n",
+      "2021-09-21 21:13:08,936 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:29:17,795 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:17,796 EPOCH 6 done: loss 0.6408 - lr 0.0100000\n",
-      "2021-09-08 01:29:17,918 DEV : loss 0.5398433804512024 - score 0.125\n",
-      "2021-09-08 01:29:17,918 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:29:17,922 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:18,240 epoch 7 - iter 7/73 - loss 0.63033612 - samples/sec: 23.00 - lr: 0.010000\n",
-      "2021-09-08 01:29:18,547 epoch 7 - iter 14/73 - loss 0.63029660 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:29:18,851 epoch 7 - iter 21/73 - loss 0.63754397 - samples/sec: 23.08 - lr: 0.010000\n",
-      "2021-09-08 01:29:19,153 epoch 7 - iter 28/73 - loss 0.64069129 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:29:19,459 epoch 7 - iter 35/73 - loss 0.64148872 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 01:29:19,762 epoch 7 - iter 42/73 - loss 0.64172109 - samples/sec: 23.17 - lr: 0.010000\n",
-      "2021-09-08 01:29:20,064 epoch 7 - iter 49/73 - loss 0.64102915 - samples/sec: 23.21 - lr: 0.010000\n",
-      "2021-09-08 01:29:20,366 epoch 7 - iter 56/73 - loss 0.64306208 - samples/sec: 23.23 - lr: 0.010000\n",
-      "2021-09-08 01:29:20,672 epoch 7 - iter 63/73 - loss 0.64275841 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 01:29:20,979 epoch 7 - iter 70/73 - loss 0.64248952 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:29:21,110 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:21,110 EPOCH 7 done: loss 0.6413 - lr 0.0100000\n",
-      "2021-09-08 01:29:21,232 DEV : loss 0.4448155462741852 - score 0.125\n",
-      "2021-09-08 01:29:21,233 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:29:21,248 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:21,565 epoch 8 - iter 7/73 - loss 0.62741014 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 01:29:21,870 epoch 8 - iter 14/73 - loss 0.64078136 - samples/sec: 22.97 - lr: 0.010000\n",
-      "2021-09-08 01:29:22,174 epoch 8 - iter 21/73 - loss 0.64065053 - samples/sec: 23.14 - lr: 0.010000\n",
-      "2021-09-08 01:29:22,478 epoch 8 - iter 28/73 - loss 0.63944484 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 01:29:22,783 epoch 8 - iter 35/73 - loss 0.64701864 - samples/sec: 22.94 - lr: 0.010000\n",
-      "2021-09-08 01:29:23,084 epoch 8 - iter 42/73 - loss 0.64288828 - samples/sec: 23.29 - lr: 0.010000\n",
-      "2021-09-08 01:29:23,389 epoch 8 - iter 49/73 - loss 0.64604187 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 01:29:23,692 epoch 8 - iter 56/73 - loss 0.64991011 - samples/sec: 23.10 - lr: 0.010000\n",
-      "2021-09-08 01:29:23,995 epoch 8 - iter 63/73 - loss 0.64937402 - samples/sec: 23.19 - lr: 0.010000\n",
-      "2021-09-08 01:29:24,303 epoch 8 - iter 70/73 - loss 0.64690757 - samples/sec: 22.74 - lr: 0.010000\n",
-      "2021-09-08 01:29:24,434 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:24,434 EPOCH 8 done: loss 0.6469 - lr 0.0100000\n",
-      "2021-09-08 01:29:24,553 DEV : loss 0.46597737073898315 - score 0.125\n",
-      "2021-09-08 01:29:24,554 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:29:24,556 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:24,871 epoch 9 - iter 7/73 - loss 0.65594962 - samples/sec: 23.18 - lr: 0.010000\n",
-      "2021-09-08 01:29:25,173 epoch 9 - iter 14/73 - loss 0.64038363 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:29:25,481 epoch 9 - iter 21/73 - loss 0.64735121 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 01:29:25,782 epoch 9 - iter 28/73 - loss 0.64112355 - samples/sec: 23.34 - lr: 0.010000\n",
-      "2021-09-08 01:29:26,086 epoch 9 - iter 35/73 - loss 0.64210069 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 01:29:26,387 epoch 9 - iter 42/73 - loss 0.64388116 - samples/sec: 23.31 - lr: 0.010000\n",
-      "2021-09-08 01:29:26,688 epoch 9 - iter 49/73 - loss 0.64787416 - samples/sec: 23.24 - lr: 0.010000\n",
-      "2021-09-08 01:29:26,991 epoch 9 - iter 56/73 - loss 0.64990306 - samples/sec: 23.18 - lr: 0.010000\n",
-      "2021-09-08 01:29:27,297 epoch 9 - iter 63/73 - loss 0.64774060 - samples/sec: 22.89 - lr: 0.010000\n",
-      "2021-09-08 01:29:27,598 epoch 9 - iter 70/73 - loss 0.64797341 - samples/sec: 23.30 - lr: 0.010000\n",
-      "2021-09-08 01:29:27,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:27,730 EPOCH 9 done: loss 0.6466 - lr 0.0100000\n",
-      "2021-09-08 01:29:27,855 DEV : loss 0.46965938806533813 - score 0.125\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:29:27,856 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:29:27,876 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:28,195 epoch 10 - iter 7/73 - loss 0.67699916 - samples/sec: 22.89 - lr: 0.005000\n",
-      "2021-09-08 01:29:28,499 epoch 10 - iter 14/73 - loss 0.65570215 - samples/sec: 23.12 - lr: 0.005000\n",
-      "2021-09-08 01:29:28,803 epoch 10 - iter 21/73 - loss 0.65420459 - samples/sec: 23.04 - lr: 0.005000\n",
-      "2021-09-08 01:29:29,115 epoch 10 - iter 28/73 - loss 0.65059579 - samples/sec: 22.50 - lr: 0.005000\n",
-      "2021-09-08 01:29:29,421 epoch 10 - iter 35/73 - loss 0.64799130 - samples/sec: 22.88 - lr: 0.005000\n",
-      "2021-09-08 01:29:29,726 epoch 10 - iter 42/73 - loss 0.64579198 - samples/sec: 23.04 - lr: 0.005000\n",
-      "2021-09-08 01:29:30,038 epoch 10 - iter 49/73 - loss 0.64530286 - samples/sec: 22.46 - lr: 0.005000\n",
-      "2021-09-08 01:29:30,344 epoch 10 - iter 56/73 - loss 0.64604338 - samples/sec: 22.91 - lr: 0.005000\n",
-      "2021-09-08 01:29:30,648 epoch 10 - iter 63/73 - loss 0.64481295 - samples/sec: 23.13 - lr: 0.005000\n",
-      "2021-09-08 01:29:30,954 epoch 10 - iter 70/73 - loss 0.64360894 - samples/sec: 22.90 - lr: 0.005000\n",
-      "2021-09-08 01:29:31,085 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:31,085 EPOCH 10 done: loss 0.6447 - lr 0.0050000\n",
-      "2021-09-08 01:29:31,208 DEV : loss 0.5132418870925903 - score 0.125\n",
-      "2021-09-08 01:29:31,209 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:29:35,389 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:29:35,390 Testing using best model ...\n",
-      "2021-09-08 01:29:35,391 loading file None/best-model.pt\n",
+      "2021-09-21 21:13:08,937 EPOCH 6 done: loss 0.6510 - lr 0.0200000\n",
+      "2021-09-21 21:13:09,564 DEV : loss 0.4668577313423157 - score 0.125\n",
+      "2021-09-21 21:13:09,565 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:13:09,567 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:10,214 epoch 7 - iter 7/73 - loss 0.63776035 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 21:13:10,793 epoch 7 - iter 14/73 - loss 0.64207342 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 21:13:11,349 epoch 7 - iter 21/73 - loss 0.64834629 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 21:13:11,951 epoch 7 - iter 28/73 - loss 0.65419660 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 21:13:12,540 epoch 7 - iter 35/73 - loss 0.65158163 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 21:13:13,151 epoch 7 - iter 42/73 - loss 0.64928518 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:13:13,633 epoch 7 - iter 49/73 - loss 0.65043046 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 21:13:14,148 epoch 7 - iter 56/73 - loss 0.65398139 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:13:14,666 epoch 7 - iter 63/73 - loss 0.65230192 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 21:13:15,178 epoch 7 - iter 70/73 - loss 0.65462674 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 21:13:15,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:15,404 EPOCH 7 done: loss 0.6535 - lr 0.0200000\n",
+      "2021-09-21 21:13:15,856 DEV : loss 0.4284491539001465 - score 0.125\n",
+      "2021-09-21 21:13:15,857 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:13:15,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:16,415 epoch 8 - iter 7/73 - loss 0.64563208 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:13:16,914 epoch 8 - iter 14/73 - loss 0.64797464 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:13:17,439 epoch 8 - iter 21/73 - loss 0.64377384 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 21:13:17,998 epoch 8 - iter 28/73 - loss 0.63146285 - samples/sec: 12.54 - lr: 0.020000\n",
+      "2021-09-21 21:13:18,423 epoch 8 - iter 35/73 - loss 0.63435779 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:13:18,877 epoch 8 - iter 42/73 - loss 0.63476857 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,273 epoch 8 - iter 49/73 - loss 0.64591815 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,771 epoch 8 - iter 56/73 - loss 0.64528897 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 21:13:20,213 epoch 8 - iter 63/73 - loss 0.64407621 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:13:20,571 epoch 8 - iter 70/73 - loss 0.64287142 - samples/sec: 19.61 - lr: 0.020000\n",
+      "2021-09-21 21:13:20,740 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:20,741 EPOCH 8 done: loss 0.6431 - lr 0.0200000\n",
+      "2021-09-21 21:13:21,057 DEV : loss 0.4826224148273468 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:13:21,058 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:13:21,176 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:21,519 epoch 9 - iter 7/73 - loss 0.60927130 - samples/sec: 22.14 - lr: 0.010000\n",
+      "2021-09-21 21:13:21,855 epoch 9 - iter 14/73 - loss 0.61879651 - samples/sec: 20.83 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,197 epoch 9 - iter 21/73 - loss 0.62691597 - samples/sec: 20.55 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,543 epoch 9 - iter 28/73 - loss 0.63184529 - samples/sec: 20.25 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,888 epoch 9 - iter 35/73 - loss 0.63147716 - samples/sec: 20.34 - lr: 0.010000\n",
+      "2021-09-21 21:13:23,267 epoch 9 - iter 42/73 - loss 0.62703692 - samples/sec: 18.50 - lr: 0.010000\n",
+      "2021-09-21 21:13:23,756 epoch 9 - iter 49/73 - loss 0.63747274 - samples/sec: 14.34 - lr: 0.010000\n",
+      "2021-09-21 21:13:24,276 epoch 9 - iter 56/73 - loss 0.63643625 - samples/sec: 13.47 - lr: 0.010000\n",
+      "2021-09-21 21:13:24,833 epoch 9 - iter 63/73 - loss 0.63870504 - samples/sec: 12.59 - lr: 0.010000\n",
+      "2021-09-21 21:13:25,353 epoch 9 - iter 70/73 - loss 0.63798260 - samples/sec: 13.47 - lr: 0.010000\n",
+      "2021-09-21 21:13:25,639 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:25,640 EPOCH 9 done: loss 0.6363 - lr 0.0100000\n",
+      "2021-09-21 21:13:26,558 DEV : loss 0.46695923805236816 - score 0.0\n",
+      "2021-09-21 21:13:26,559 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:26,561 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:27,148 epoch 10 - iter 7/73 - loss 0.66228910 - samples/sec: 13.72 - lr: 0.010000\n",
+      "2021-09-21 21:13:27,576 epoch 10 - iter 14/73 - loss 0.64378260 - samples/sec: 16.39 - lr: 0.010000\n",
+      "2021-09-21 21:13:27,971 epoch 10 - iter 21/73 - loss 0.63099406 - samples/sec: 17.77 - lr: 0.010000\n",
+      "2021-09-21 21:13:28,303 epoch 10 - iter 28/73 - loss 0.63462594 - samples/sec: 21.18 - lr: 0.010000\n",
+      "2021-09-21 21:13:28,610 epoch 10 - iter 35/73 - loss 0.63229683 - samples/sec: 22.85 - lr: 0.010000\n",
+      "2021-09-21 21:13:28,919 epoch 10 - iter 42/73 - loss 0.63036140 - samples/sec: 22.72 - lr: 0.010000\n",
+      "2021-09-21 21:13:29,220 epoch 10 - iter 49/73 - loss 0.62728788 - samples/sec: 23.31 - lr: 0.010000\n",
+      "2021-09-21 21:13:29,519 epoch 10 - iter 56/73 - loss 0.62918830 - samples/sec: 23.49 - lr: 0.010000\n",
+      "2021-09-21 21:13:29,818 epoch 10 - iter 63/73 - loss 0.63070542 - samples/sec: 23.46 - lr: 0.010000\n",
+      "2021-09-21 21:13:30,119 epoch 10 - iter 70/73 - loss 0.62989588 - samples/sec: 23.32 - lr: 0.010000\n",
+      "2021-09-21 21:13:30,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:30,249 EPOCH 10 done: loss 0.6299 - lr 0.0100000\n",
+      "2021-09-21 21:13:30,514 DEV : loss 0.49625030159950256 - score 0.0\n",
+      "2021-09-21 21:13:30,514 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:13:43,970 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:43,970 Testing using best model ...\n",
+      "2021-09-21 21:13:43,972 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:29:40,337 \t0.0\n",
-      "2021-09-08 01:29:40,338 \n",
+      "2021-09-21 21:13:49,067 \t0.0\n",
+      "2021-09-21 21:13:49,068 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -2820,23 +2816,23 @@
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "         joy     0.0000    0.0000    0.0000         0\n",
-      "    surprise     0.0000    0.0000    0.0000         1\n",
-      "        love     0.0000    0.0000    0.0000         1\n",
+      "         joy     0.0000    0.0000    0.0000         1\n",
+      "    surprise     0.0000    0.0000    0.0000         2\n",
+      "        love     0.0000    0.0000    0.0000         0\n",
       "     disgust     0.0000    0.0000    0.0000         2\n",
-      "        fear     0.0000    0.0000    0.0000         2\n",
-      "       anger     0.0000    0.0000    0.0000         1\n",
-      "       guilt     0.0000    0.0000    0.0000         0\n",
-      "       shame     0.0000    0.0000    0.0000         2\n",
-      "     sadness     0.0000    0.0000    0.0000         0\n",
+      "        fear     0.0000    0.0000    0.0000         0\n",
+      "       anger     0.0000    0.0000    0.0000         2\n",
+      "       guilt     0.0000    0.0000    0.0000         1\n",
+      "       shame     0.0000    0.0000    0.0000         0\n",
+      "     sadness     0.0000    0.0000    0.0000         1\n",
       "\n",
       "   micro avg     0.0000    0.0000    0.0000         9\n",
       "   macro avg     0.0000    0.0000    0.0000         9\n",
       "weighted avg     0.0000    0.0000    0.0000         9\n",
       " samples avg     0.0000    0.0000    0.0000         9\n",
       "\n",
-      "2021-09-08 01:29:40,338 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.17677032596478082\n"
+      "2021-09-21 21:13:49,068 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.1265642562757587\n"
      ]
     }
    ],
@@ -2912,6 +2908,26 @@
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7a802128",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.11614837017609592, 0.12663919070813037, 0.11877107530910454, 0.15698763581865868, 0.11427500936680404]\n",
+      "0.015785040895670312\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "440b1d3b",
@@ -2922,7 +2938,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2930,25 +2946,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:30:17,366 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:15:15,428 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:30:21,352 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:15:19,480 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 50585.81it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 39266.23it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:30:21,355 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
-      "2021-09-08 01:30:21,364 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:21,366 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:15:19,484 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
+      "2021-09-21 21:15:19,656 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,659 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3261,25 +3277,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:30:21,367 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:21,367 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:30:21,367 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:21,368 Parameters:\n",
-      "2021-09-08 01:30:21,368  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:30:21,368  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:30:21,368  - patience: \"3\"\n",
-      "2021-09-08 01:30:21,369  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:30:21,369  - max_epochs: \"10\"\n",
-      "2021-09-08 01:30:21,369  - shuffle: \"True\"\n",
-      "2021-09-08 01:30:21,370  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:30:21,370  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:30:21,370 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:21,370 Model training base path: \"None\"\n",
-      "2021-09-08 01:30:21,371 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:21,371 Device: cuda:1\n",
-      "2021-09-08 01:30:21,371 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:21,371 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:30:21,378 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:15:19,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,660 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:15:19,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,660 Parameters:\n",
+      "2021-09-21 21:15:19,660  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:15:19,661  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:15:19,661  - patience: \"3\"\n",
+      "2021-09-21 21:15:19,661  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:15:19,661  - max_epochs: \"10\"\n",
+      "2021-09-21 21:15:19,662  - shuffle: \"True\"\n",
+      "2021-09-21 21:15:19,662  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:15:19,662  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:15:19,663 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,663 Model training base path: \"None\"\n",
+      "2021-09-21 21:15:19,663 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,663 Device: cuda:0\n",
+      "2021-09-21 21:15:19,664 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,664 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -3293,216 +3308,215 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:30:21,701 epoch 1 - iter 7/73 - loss 0.33897385 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:30:22,010 epoch 1 - iter 14/73 - loss 0.48995271 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:30:22,319 epoch 1 - iter 21/73 - loss 0.55203828 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:30:22,622 epoch 1 - iter 28/73 - loss 0.57408074 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:30:22,925 epoch 1 - iter 35/73 - loss 0.57985425 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:30:23,232 epoch 1 - iter 42/73 - loss 0.59509451 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:30:23,539 epoch 1 - iter 49/73 - loss 0.60386412 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 01:30:23,852 epoch 1 - iter 56/73 - loss 0.60734121 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 01:30:24,157 epoch 1 - iter 63/73 - loss 0.60981473 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 01:30:24,459 epoch 1 - iter 70/73 - loss 0.61142401 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 01:30:24,589 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:24,590 EPOCH 1 done: loss 0.6119 - lr 0.0200000\n",
-      "2021-09-08 01:30:24,731 DEV : loss 0.430604487657547 - score 0.0\n",
-      "2021-09-08 01:30:24,732 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:15:19,935 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:20,319 epoch 1 - iter 7/73 - loss 0.21549459 - samples/sec: 19.25 - lr: 0.020000\n",
+      "2021-09-21 21:15:20,708 epoch 1 - iter 14/73 - loss 0.39190590 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 21:15:21,093 epoch 1 - iter 21/73 - loss 0.61741196 - samples/sec: 18.22 - lr: 0.020000\n",
+      "2021-09-21 21:15:21,441 epoch 1 - iter 28/73 - loss 0.62279264 - samples/sec: 20.16 - lr: 0.020000\n",
+      "2021-09-21 21:15:21,781 epoch 1 - iter 35/73 - loss 0.63883696 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 21:15:22,104 epoch 1 - iter 42/73 - loss 0.61529677 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 21:15:22,437 epoch 1 - iter 49/73 - loss 0.62288902 - samples/sec: 21.07 - lr: 0.020000\n",
+      "2021-09-21 21:15:22,796 epoch 1 - iter 56/73 - loss 0.60434669 - samples/sec: 19.52 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,214 epoch 1 - iter 63/73 - loss 0.62219411 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,632 epoch 1 - iter 70/73 - loss 0.61062218 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:23,808 EPOCH 1 done: loss 0.5963 - lr 0.0200000\n",
+      "2021-09-21 21:15:24,156 DEV : loss 1.0767606496810913 - score 0.0\n",
+      "2021-09-21 21:15:24,157 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:30:28,937 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:29,262 epoch 2 - iter 7/73 - loss 0.65680492 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:30:29,567 epoch 2 - iter 14/73 - loss 0.64733071 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:30:29,869 epoch 2 - iter 21/73 - loss 0.65828021 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:30:30,174 epoch 2 - iter 28/73 - loss 0.65651572 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:30:30,480 epoch 2 - iter 35/73 - loss 0.65880889 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 01:30:30,790 epoch 2 - iter 42/73 - loss 0.67588733 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:30:31,101 epoch 2 - iter 49/73 - loss 0.67229751 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 01:30:31,425 epoch 2 - iter 56/73 - loss 0.68778107 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 01:30:31,738 epoch 2 - iter 63/73 - loss 0.68540896 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 01:30:32,039 epoch 2 - iter 70/73 - loss 0.67609115 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:30:32,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:32,170 EPOCH 2 done: loss 0.6757 - lr 0.0200000\n",
-      "2021-09-08 01:30:32,308 DEV : loss 0.39158201217651367 - score 0.0\n",
-      "2021-09-08 01:30:32,309 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:15:29,471 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:29,830 epoch 2 - iter 7/73 - loss 1.29684072 - samples/sec: 20.34 - lr: 0.020000\n",
+      "2021-09-21 21:15:30,186 epoch 2 - iter 14/73 - loss 0.92057427 - samples/sec: 19.75 - lr: 0.020000\n",
+      "2021-09-21 21:15:30,511 epoch 2 - iter 21/73 - loss 0.87298133 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 21:15:30,842 epoch 2 - iter 28/73 - loss 0.84699326 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 21:15:31,165 epoch 2 - iter 35/73 - loss 0.78214669 - samples/sec: 21.68 - lr: 0.020000\n",
+      "2021-09-21 21:15:31,495 epoch 2 - iter 42/73 - loss 0.76911609 - samples/sec: 21.30 - lr: 0.020000\n",
+      "2021-09-21 21:15:31,825 epoch 2 - iter 49/73 - loss 0.75215226 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 21:15:32,152 epoch 2 - iter 56/73 - loss 0.72644923 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 21:15:32,483 epoch 2 - iter 63/73 - loss 0.74168313 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 21:15:32,816 epoch 2 - iter 70/73 - loss 0.73549372 - samples/sec: 21.11 - lr: 0.020000\n",
+      "2021-09-21 21:15:32,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:32,955 EPOCH 2 done: loss 0.7308 - lr 0.0200000\n",
+      "2021-09-21 21:15:33,236 DEV : loss 0.3919820785522461 - score 0.125\n",
+      "2021-09-21 21:15:33,237 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:30:36,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:36,793 epoch 3 - iter 7/73 - loss 0.70759311 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 01:30:37,097 epoch 3 - iter 14/73 - loss 0.66495270 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:30:37,399 epoch 3 - iter 21/73 - loss 0.65346730 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 01:30:37,702 epoch 3 - iter 28/73 - loss 0.64550992 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:30:38,012 epoch 3 - iter 35/73 - loss 0.63801852 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:30:38,314 epoch 3 - iter 42/73 - loss 0.63628127 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 01:30:38,617 epoch 3 - iter 49/73 - loss 0.63651072 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 01:30:38,926 epoch 3 - iter 56/73 - loss 0.64788769 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 01:30:39,230 epoch 3 - iter 63/73 - loss 0.64887156 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:30:39,535 epoch 3 - iter 70/73 - loss 0.65171096 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:30:39,669 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:39,669 EPOCH 3 done: loss 0.6512 - lr 0.0200000\n",
-      "2021-09-08 01:30:39,799 DEV : loss 0.526430606842041 - score 0.25\n",
-      "2021-09-08 01:30:39,799 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:30:47,216 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:47,534 epoch 4 - iter 7/73 - loss 0.66269416 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:30:47,838 epoch 4 - iter 14/73 - loss 0.65763854 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:30:48,144 epoch 4 - iter 21/73 - loss 0.65629976 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 01:30:48,448 epoch 4 - iter 28/73 - loss 0.65768617 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:30:48,757 epoch 4 - iter 35/73 - loss 0.64658646 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:30:49,062 epoch 4 - iter 42/73 - loss 0.65139355 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:30:49,364 epoch 4 - iter 49/73 - loss 0.65363612 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:30:49,668 epoch 4 - iter 56/73 - loss 0.65031715 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:30:49,970 epoch 4 - iter 63/73 - loss 0.65068369 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:30:50,271 epoch 4 - iter 70/73 - loss 0.64868302 - samples/sec: 23.31 - lr: 0.020000\n",
-      "2021-09-08 01:30:50,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:50,404 EPOCH 4 done: loss 0.6485 - lr 0.0200000\n",
-      "2021-09-08 01:30:50,534 DEV : loss 0.43458306789398193 - score 0.125\n",
-      "2021-09-08 01:30:50,535 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:30:50,538 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:50,852 epoch 5 - iter 7/73 - loss 0.67551439 - samples/sec: 23.31 - lr: 0.020000\n",
-      "2021-09-08 01:30:51,155 epoch 5 - iter 14/73 - loss 0.67734582 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:30:51,457 epoch 5 - iter 21/73 - loss 0.65114393 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:30:51,762 epoch 5 - iter 28/73 - loss 0.65482066 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:30:52,068 epoch 5 - iter 35/73 - loss 0.65519856 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:30:52,371 epoch 5 - iter 42/73 - loss 0.65892897 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:30:52,673 epoch 5 - iter 49/73 - loss 0.65606677 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:30:52,976 epoch 5 - iter 56/73 - loss 0.65533761 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:30:53,280 epoch 5 - iter 63/73 - loss 0.65581059 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:30:53,582 epoch 5 - iter 70/73 - loss 0.65595885 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:30:53,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:53,716 EPOCH 5 done: loss 0.6535 - lr 0.0200000\n",
-      "2021-09-08 01:30:53,851 DEV : loss 0.4717434346675873 - score 0.375\n",
-      "2021-09-08 01:30:53,852 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:30:58,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:30:58,327 epoch 6 - iter 7/73 - loss 0.64050734 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:30:58,629 epoch 6 - iter 14/73 - loss 0.63952669 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 01:30:58,930 epoch 6 - iter 21/73 - loss 0.63873243 - samples/sec: 23.31 - lr: 0.020000\n",
-      "2021-09-08 01:30:59,230 epoch 6 - iter 28/73 - loss 0.64501821 - samples/sec: 23.37 - lr: 0.020000\n",
-      "2021-09-08 01:30:59,534 epoch 6 - iter 35/73 - loss 0.64335346 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:30:59,835 epoch 6 - iter 42/73 - loss 0.64410692 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 01:31:00,135 epoch 6 - iter 49/73 - loss 0.64008715 - samples/sec: 23.39 - lr: 0.020000\n",
-      "2021-09-08 01:31:00,439 epoch 6 - iter 56/73 - loss 0.63593475 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:31:00,741 epoch 6 - iter 63/73 - loss 0.64312465 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:31:01,040 epoch 6 - iter 70/73 - loss 0.63996170 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 01:31:01,169 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:15:40,996 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:41,688 epoch 3 - iter 7/73 - loss 0.63928178 - samples/sec: 11.33 - lr: 0.020000\n",
+      "2021-09-21 21:15:42,287 epoch 3 - iter 14/73 - loss 0.53799939 - samples/sec: 11.71 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,081 epoch 3 - iter 21/73 - loss 0.59477884 - samples/sec: 8.82 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,718 epoch 3 - iter 28/73 - loss 0.59914804 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 21:15:44,308 epoch 3 - iter 35/73 - loss 0.61082378 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 21:15:44,836 epoch 3 - iter 42/73 - loss 0.62681681 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 21:15:45,336 epoch 3 - iter 49/73 - loss 0.63338062 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:15:45,815 epoch 3 - iter 56/73 - loss 0.65436656 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 21:15:46,344 epoch 3 - iter 63/73 - loss 0.66844556 - samples/sec: 13.27 - lr: 0.020000\n",
+      "2021-09-21 21:15:46,798 epoch 3 - iter 70/73 - loss 0.66328708 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 21:15:46,969 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:46,970 EPOCH 3 done: loss 0.6653 - lr 0.0200000\n",
+      "2021-09-21 21:15:47,318 DEV : loss 0.4861763119697571 - score 0.125\n",
+      "2021-09-21 21:15:47,321 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:47,740 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:48,390 epoch 4 - iter 7/73 - loss 0.59935724 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,043 epoch 4 - iter 14/73 - loss 0.66036564 - samples/sec: 10.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,686 epoch 4 - iter 21/73 - loss 0.63814537 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 21:15:50,339 epoch 4 - iter 28/73 - loss 0.62810078 - samples/sec: 10.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:50,922 epoch 4 - iter 35/73 - loss 0.61173837 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 21:15:51,553 epoch 4 - iter 42/73 - loss 0.61915406 - samples/sec: 11.10 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,235 epoch 4 - iter 49/73 - loss 0.62702859 - samples/sec: 10.29 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,827 epoch 4 - iter 56/73 - loss 0.62487170 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 21:15:53,479 epoch 4 - iter 63/73 - loss 0.62331072 - samples/sec: 10.76 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,083 epoch 4 - iter 70/73 - loss 0.63354892 - samples/sec: 11.60 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,364 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:54,365 EPOCH 4 done: loss 0.6262 - lr 0.0200000\n",
+      "2021-09-21 21:15:54,982 DEV : loss 0.5817396640777588 - score 0.125\n",
+      "2021-09-21 21:15:54,984 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:15:54,986 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:55,727 epoch 5 - iter 7/73 - loss 0.59545952 - samples/sec: 10.78 - lr: 0.020000\n",
+      "2021-09-21 21:15:56,352 epoch 5 - iter 14/73 - loss 0.56218026 - samples/sec: 11.23 - lr: 0.020000\n",
+      "2021-09-21 21:15:56,975 epoch 5 - iter 21/73 - loss 0.55683632 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 21:15:57,622 epoch 5 - iter 28/73 - loss 0.56190848 - samples/sec: 10.82 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,254 epoch 5 - iter 35/73 - loss 0.56877957 - samples/sec: 11.10 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,865 epoch 5 - iter 42/73 - loss 0.56788718 - samples/sec: 11.46 - lr: 0.020000\n",
+      "2021-09-21 21:15:59,503 epoch 5 - iter 49/73 - loss 0.58665577 - samples/sec: 11.00 - lr: 0.020000\n",
+      "2021-09-21 21:16:00,098 epoch 5 - iter 56/73 - loss 0.58880308 - samples/sec: 11.77 - lr: 0.020000\n",
+      "2021-09-21 21:16:00,713 epoch 5 - iter 63/73 - loss 0.59774105 - samples/sec: 11.40 - lr: 0.020000\n",
+      "2021-09-21 21:16:01,387 epoch 5 - iter 70/73 - loss 0.60283885 - samples/sec: 10.40 - lr: 0.020000\n",
+      "2021-09-21 21:16:01,691 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:01,692 EPOCH 5 done: loss 0.6076 - lr 0.0200000\n",
+      "2021-09-21 21:16:02,292 DEV : loss 0.4245604872703552 - score 0.125\n",
+      "2021-09-21 21:16:02,297 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:16:02,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:02,927 epoch 6 - iter 7/73 - loss 0.65068152 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 21:16:03,424 epoch 6 - iter 14/73 - loss 0.55202150 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 21:16:03,871 epoch 6 - iter 21/73 - loss 0.50941002 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 21:16:04,293 epoch 6 - iter 28/73 - loss 0.45470828 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 21:16:04,759 epoch 6 - iter 35/73 - loss 0.47935053 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 21:16:05,224 epoch 6 - iter 42/73 - loss 0.49863742 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 21:16:05,763 epoch 6 - iter 49/73 - loss 0.49951768 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 21:16:06,252 epoch 6 - iter 56/73 - loss 0.50267935 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 21:16:06,741 epoch 6 - iter 63/73 - loss 0.47673682 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:16:07,211 epoch 6 - iter 70/73 - loss 0.50490348 - samples/sec: 14.92 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:31:01,170 EPOCH 6 done: loss 0.6406 - lr 0.0200000\n",
-      "2021-09-08 01:31:01,306 DEV : loss 0.44844409823417664 - score 0.375\n",
-      "2021-09-08 01:31:01,307 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:16:07,431 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:07,432 EPOCH 6 done: loss 0.5164 - lr 0.0200000\n",
+      "2021-09-21 21:16:07,997 DEV : loss 0.44274091720581055 - score 0.125\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:16:08,000 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:16:08,052 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:08,501 epoch 7 - iter 7/73 - loss 0.41971121 - samples/sec: 17.22 - lr: 0.010000\n",
+      "2021-09-21 21:16:08,856 epoch 7 - iter 14/73 - loss 0.39500793 - samples/sec: 19.77 - lr: 0.010000\n",
+      "2021-09-21 21:16:09,206 epoch 7 - iter 21/73 - loss 0.44519561 - samples/sec: 20.01 - lr: 0.010000\n",
+      "2021-09-21 21:16:09,555 epoch 7 - iter 28/73 - loss 0.41302858 - samples/sec: 20.10 - lr: 0.010000\n",
+      "2021-09-21 21:16:09,899 epoch 7 - iter 35/73 - loss 0.39280346 - samples/sec: 20.43 - lr: 0.010000\n",
+      "2021-09-21 21:16:10,249 epoch 7 - iter 42/73 - loss 0.39353089 - samples/sec: 20.01 - lr: 0.010000\n",
+      "2021-09-21 21:16:10,596 epoch 7 - iter 49/73 - loss 0.36557573 - samples/sec: 20.22 - lr: 0.010000\n",
+      "2021-09-21 21:16:10,943 epoch 7 - iter 56/73 - loss 0.35685966 - samples/sec: 20.24 - lr: 0.010000\n",
+      "2021-09-21 21:16:11,291 epoch 7 - iter 63/73 - loss 0.34849233 - samples/sec: 20.12 - lr: 0.010000\n",
+      "2021-09-21 21:16:11,623 epoch 7 - iter 70/73 - loss 0.35588150 - samples/sec: 21.12 - lr: 0.010000\n",
+      "2021-09-21 21:16:11,780 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:11,781 EPOCH 7 done: loss 0.3618 - lr 0.0100000\n",
+      "2021-09-21 21:16:15,465 DEV : loss 0.4860753118991852 - score 0.25\n",
+      "2021-09-21 21:16:15,466 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:31:05,391 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:05,711 epoch 7 - iter 7/73 - loss 0.66296744 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:31:06,014 epoch 7 - iter 14/73 - loss 0.66401065 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:31:06,316 epoch 7 - iter 21/73 - loss 0.64969359 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:31:06,620 epoch 7 - iter 28/73 - loss 0.66701912 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:31:06,931 epoch 7 - iter 35/73 - loss 0.67501700 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 01:31:07,233 epoch 7 - iter 42/73 - loss 0.66772349 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:31:07,539 epoch 7 - iter 49/73 - loss 0.66719895 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 01:31:07,842 epoch 7 - iter 56/73 - loss 0.66118678 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:31:08,144 epoch 7 - iter 63/73 - loss 0.65713208 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:31:08,446 epoch 7 - iter 70/73 - loss 0.65582522 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:31:08,584 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:08,584 EPOCH 7 done: loss 0.6538 - lr 0.0200000\n",
-      "2021-09-08 01:31:08,716 DEV : loss 0.5219605565071106 - score 0.25\n",
-      "2021-09-08 01:31:08,717 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:31:08,719 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:09,035 epoch 8 - iter 7/73 - loss 0.65247984 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:31:09,338 epoch 8 - iter 14/73 - loss 0.65901604 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 01:31:09,639 epoch 8 - iter 21/73 - loss 0.64804977 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 01:31:09,940 epoch 8 - iter 28/73 - loss 0.63983310 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 01:31:10,245 epoch 8 - iter 35/73 - loss 0.63958359 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 01:31:10,546 epoch 8 - iter 42/73 - loss 0.63330250 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 01:31:10,848 epoch 8 - iter 49/73 - loss 0.63600202 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:31:11,152 epoch 8 - iter 56/73 - loss 0.63860464 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:31:11,454 epoch 8 - iter 63/73 - loss 0.64077771 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:31:11,756 epoch 8 - iter 70/73 - loss 0.64261733 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 01:31:11,888 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:11,888 EPOCH 8 done: loss 0.6433 - lr 0.0200000\n",
-      "2021-09-08 01:31:12,021 DEV : loss 0.5209428668022156 - score 0.125\n",
-      "2021-09-08 01:31:12,022 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:31:12,024 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:12,343 epoch 9 - iter 7/73 - loss 0.65387509 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:31:12,661 epoch 9 - iter 14/73 - loss 0.64957325 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:31:12,986 epoch 9 - iter 21/73 - loss 0.64387962 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:31:13,305 epoch 9 - iter 28/73 - loss 0.64312953 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:31:13,626 epoch 9 - iter 35/73 - loss 0.64843349 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 01:31:13,933 epoch 9 - iter 42/73 - loss 0.65427114 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:31:14,235 epoch 9 - iter 49/73 - loss 0.65461298 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:31:14,537 epoch 9 - iter 56/73 - loss 0.65611931 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:31:14,843 epoch 9 - iter 63/73 - loss 0.65634517 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:31:15,148 epoch 9 - iter 70/73 - loss 0.65180651 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:31:15,279 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:15,279 EPOCH 9 done: loss 0.6510 - lr 0.0200000\n",
-      "2021-09-08 01:31:15,417 DEV : loss 0.5108868479728699 - score 0.125\n",
-      "2021-09-08 01:31:15,417 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:31:15,420 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:15,736 epoch 10 - iter 7/73 - loss 0.69671963 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:31:16,037 epoch 10 - iter 14/73 - loss 0.66263632 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 01:31:16,342 epoch 10 - iter 21/73 - loss 0.63411541 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:31:16,642 epoch 10 - iter 28/73 - loss 0.64037134 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 01:31:16,945 epoch 10 - iter 35/73 - loss 0.64530977 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:31:17,250 epoch 10 - iter 42/73 - loss 0.64258005 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:31:17,556 epoch 10 - iter 49/73 - loss 0.64703455 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:31:17,859 epoch 10 - iter 56/73 - loss 0.64624253 - samples/sec: 23.12 - lr: 0.020000\n",
-      "2021-09-08 01:31:18,171 epoch 10 - iter 63/73 - loss 0.64587006 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 01:31:18,474 epoch 10 - iter 70/73 - loss 0.64762896 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:31:18,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:18,605 EPOCH 10 done: loss 0.6458 - lr 0.0200000\n",
-      "2021-09-08 01:31:18,738 DEV : loss 0.4554923176765442 - score 0.125\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:31:18,739 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:31:23,288 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:23,289 Testing using best model ...\n",
-      "2021-09-08 01:31:23,337 loading file None/best-model.pt\n",
+      "2021-09-21 21:16:19,568 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:20,065 epoch 8 - iter 7/73 - loss 0.25006783 - samples/sec: 14.70 - lr: 0.010000\n",
+      "2021-09-21 21:16:20,545 epoch 8 - iter 14/73 - loss 0.26364503 - samples/sec: 14.61 - lr: 0.010000\n",
+      "2021-09-21 21:16:21,020 epoch 8 - iter 21/73 - loss 0.28925448 - samples/sec: 14.77 - lr: 0.010000\n",
+      "2021-09-21 21:16:21,505 epoch 8 - iter 28/73 - loss 0.32835909 - samples/sec: 14.47 - lr: 0.010000\n",
+      "2021-09-21 21:16:21,967 epoch 8 - iter 35/73 - loss 0.31734009 - samples/sec: 15.16 - lr: 0.010000\n",
+      "2021-09-21 21:16:22,456 epoch 8 - iter 42/73 - loss 0.31160337 - samples/sec: 14.34 - lr: 0.010000\n",
+      "2021-09-21 21:16:23,118 epoch 8 - iter 49/73 - loss 0.29443190 - samples/sec: 10.58 - lr: 0.010000\n",
+      "2021-09-21 21:16:23,795 epoch 8 - iter 56/73 - loss 0.30616593 - samples/sec: 10.35 - lr: 0.010000\n",
+      "2021-09-21 21:16:24,437 epoch 8 - iter 63/73 - loss 0.30647696 - samples/sec: 10.93 - lr: 0.010000\n",
+      "2021-09-21 21:16:24,918 epoch 8 - iter 70/73 - loss 0.30002609 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 21:16:25,133 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:25,134 EPOCH 8 done: loss 0.3021 - lr 0.0100000\n",
+      "2021-09-21 21:16:25,658 DEV : loss 0.500405490398407 - score 0.25\n",
+      "2021-09-21 21:16:25,661 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:16:25,754 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:26,263 epoch 9 - iter 7/73 - loss 0.34290521 - samples/sec: 14.38 - lr: 0.010000\n",
+      "2021-09-21 21:16:26,736 epoch 9 - iter 14/73 - loss 0.32326843 - samples/sec: 14.84 - lr: 0.010000\n",
+      "2021-09-21 21:16:27,218 epoch 9 - iter 21/73 - loss 0.30337570 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 21:16:27,688 epoch 9 - iter 28/73 - loss 0.30212631 - samples/sec: 14.92 - lr: 0.010000\n",
+      "2021-09-21 21:16:28,145 epoch 9 - iter 35/73 - loss 0.29614319 - samples/sec: 15.31 - lr: 0.010000\n",
+      "2021-09-21 21:16:28,620 epoch 9 - iter 42/73 - loss 0.30421377 - samples/sec: 14.76 - lr: 0.010000\n",
+      "2021-09-21 21:16:29,090 epoch 9 - iter 49/73 - loss 0.29229629 - samples/sec: 14.91 - lr: 0.010000\n",
+      "2021-09-21 21:16:29,555 epoch 9 - iter 56/73 - loss 0.28530929 - samples/sec: 15.08 - lr: 0.010000\n",
+      "2021-09-21 21:16:30,022 epoch 9 - iter 63/73 - loss 0.27917081 - samples/sec: 15.03 - lr: 0.010000\n",
+      "2021-09-21 21:16:30,442 epoch 9 - iter 70/73 - loss 0.29996353 - samples/sec: 16.69 - lr: 0.010000\n",
+      "2021-09-21 21:16:30,610 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:30,611 EPOCH 9 done: loss 0.2932 - lr 0.0100000\n",
+      "2021-09-21 21:16:31,025 DEV : loss 0.5393446087837219 - score 0.125\n",
+      "2021-09-21 21:16:31,026 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:16:31,100 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:31,512 epoch 10 - iter 7/73 - loss 0.21617642 - samples/sec: 17.95 - lr: 0.010000\n",
+      "2021-09-21 21:16:31,919 epoch 10 - iter 14/73 - loss 0.31190383 - samples/sec: 17.21 - lr: 0.010000\n",
+      "2021-09-21 21:16:32,277 epoch 10 - iter 21/73 - loss 0.25842391 - samples/sec: 19.61 - lr: 0.010000\n",
+      "2021-09-21 21:16:32,673 epoch 10 - iter 28/73 - loss 0.23741434 - samples/sec: 17.71 - lr: 0.010000\n",
+      "2021-09-21 21:16:33,060 epoch 10 - iter 35/73 - loss 0.21027259 - samples/sec: 18.13 - lr: 0.010000\n",
+      "2021-09-21 21:16:33,474 epoch 10 - iter 42/73 - loss 0.23275715 - samples/sec: 16.94 - lr: 0.010000\n",
+      "2021-09-21 21:16:33,803 epoch 10 - iter 49/73 - loss 0.21629802 - samples/sec: 21.32 - lr: 0.010000\n",
+      "2021-09-21 21:16:34,131 epoch 10 - iter 56/73 - loss 0.20627190 - samples/sec: 21.38 - lr: 0.010000\n",
+      "2021-09-21 21:16:34,474 epoch 10 - iter 63/73 - loss 0.23856212 - samples/sec: 20.45 - lr: 0.010000\n",
+      "2021-09-21 21:16:34,813 epoch 10 - iter 70/73 - loss 0.24514582 - samples/sec: 20.70 - lr: 0.010000\n",
+      "2021-09-21 21:16:34,961 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:34,961 EPOCH 10 done: loss 0.2410 - lr 0.0100000\n",
+      "2021-09-21 21:16:35,172 DEV : loss 0.5350471138954163 - score 0.125\n",
+      "2021-09-21 21:16:35,173 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:16:39,842 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:39,844 Testing using best model ...\n",
+      "2021-09-21 21:16:39,881 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:31:30,222 \t0.1111\n",
-      "2021-09-08 01:31:30,223 \n",
+      "2021-09-21 21:16:51,831 \t0.0\n",
+      "2021-09-21 21:16:51,832 \n",
       "Results:\n",
-      "- F-score (micro) 0.1111\n",
-      "- F-score (macro) 0.0556\n",
-      "- Accuracy 0.1111\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
-      "this text expresses surprise     0.0000    0.0000    0.0000         1\n",
-      "    this text expresses love     1.0000    0.3333    0.5000         3\n",
-      " this text expresses disgust     0.0000    0.0000    0.0000         2\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         1\n",
+      "this text expresses surprise     0.0000    0.0000    0.0000         0\n",
+      "    this text expresses love     0.0000    0.0000    0.0000         2\n",
+      " this text expresses disgust     0.0000    0.0000    0.0000         0\n",
       "    this text expresses fear     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses guilt     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses shame     0.0000    0.0000    0.0000         0\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses guilt     0.0000    0.0000    0.0000         3\n",
+      "   this text expresses shame     0.0000    0.0000    0.0000         1\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "                   micro avg     0.1111    0.1111    0.1111         9\n",
-      "                   macro avg     0.1111    0.0370    0.0556         9\n",
-      "                weighted avg     0.3333    0.1111    0.1667         9\n",
-      "                 samples avg     0.1111    0.1111    0.1111         9\n",
+      "                   micro avg     0.0000    0.0000    0.0000         9\n",
+      "                   macro avg     0.0000    0.0000    0.0000         9\n",
+      "                weighted avg     0.0000    0.0000    0.0000         9\n",
+      "                 samples avg     0.0000    0.0000    0.0000         9\n",
       "\n",
-      "2021-09-08 01:31:30,223 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:15,591 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:16:51,832 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:34,353 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:32:20,018 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:18:38,958 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 48264.51it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 49615.25it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:20,022 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
-      "2021-09-08 01:32:20,031 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:20,033 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:18:38,961 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
+      "2021-09-21 21:18:38,971 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:38,972 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3815,25 +3829,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:20,033 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:20,034 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:32:20,034 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:20,034 Parameters:\n",
-      "2021-09-08 01:32:20,035  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:32:20,035  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:32:20,035  - patience: \"3\"\n",
-      "2021-09-08 01:32:20,035  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:32:20,036  - max_epochs: \"10\"\n",
-      "2021-09-08 01:32:20,036  - shuffle: \"True\"\n",
-      "2021-09-08 01:32:20,036  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:32:20,037  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:32:20,037 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:20,037 Model training base path: \"None\"\n",
-      "2021-09-08 01:32:20,037 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:20,038 Device: cuda:1\n",
-      "2021-09-08 01:32:20,038 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:20,038 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:32:20,045 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:18:38,973 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:38,973 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:18:38,974 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:38,974 Parameters:\n",
+      "2021-09-21 21:18:38,974  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:18:38,975  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:18:38,975  - patience: \"3\"\n",
+      "2021-09-21 21:18:38,975  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:18:38,975  - max_epochs: \"10\"\n",
+      "2021-09-21 21:18:38,976  - shuffle: \"True\"\n",
+      "2021-09-21 21:18:38,976  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:18:38,976  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:18:38,977 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:38,977 Model training base path: \"None\"\n",
+      "2021-09-21 21:18:38,977 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:38,977 Device: cuda:0\n",
+      "2021-09-21 21:18:38,978 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:38,978 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:18:38,984 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3847,215 +3861,213 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:20,377 epoch 1 - iter 7/73 - loss 0.38571989 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 01:32:20,689 epoch 1 - iter 14/73 - loss 0.41962651 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 01:32:21,011 epoch 1 - iter 21/73 - loss 0.62394506 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 01:32:21,333 epoch 1 - iter 28/73 - loss 0.65033065 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:32:21,660 epoch 1 - iter 35/73 - loss 0.63667491 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:32:21,982 epoch 1 - iter 42/73 - loss 0.61711947 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:32:22,307 epoch 1 - iter 49/73 - loss 0.59346928 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 01:32:22,631 epoch 1 - iter 56/73 - loss 0.63465078 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 01:32:22,960 epoch 1 - iter 63/73 - loss 0.63008972 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:32:23,287 epoch 1 - iter 70/73 - loss 0.63282103 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:32:23,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:23,427 EPOCH 1 done: loss 0.6213 - lr 0.0200000\n",
-      "2021-09-08 01:32:23,558 DEV : loss 0.9471192359924316 - score 0.125\n",
-      "2021-09-08 01:32:23,558 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:32:27,227 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:27,575 epoch 2 - iter 7/73 - loss 0.64477235 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:32:27,903 epoch 2 - iter 14/73 - loss 0.68491439 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:32:28,233 epoch 2 - iter 21/73 - loss 0.67179056 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 01:32:28,560 epoch 2 - iter 28/73 - loss 0.68459645 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:32:28,889 epoch 2 - iter 35/73 - loss 0.68974448 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 01:32:29,219 epoch 2 - iter 42/73 - loss 0.69832149 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 01:32:29,547 epoch 2 - iter 49/73 - loss 0.70001554 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:32:29,874 epoch 2 - iter 56/73 - loss 0.69728956 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:32:30,203 epoch 2 - iter 63/73 - loss 0.68953981 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:32:30,536 epoch 2 - iter 70/73 - loss 0.68943098 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 01:32:30,680 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:30,681 EPOCH 2 done: loss 0.6889 - lr 0.0200000\n",
-      "2021-09-08 01:32:30,816 DEV : loss 0.5280443429946899 - score 0.0\n",
-      "2021-09-08 01:32:30,817 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:32:30,819 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:31,162 epoch 3 - iter 7/73 - loss 0.70317824 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 01:32:31,489 epoch 3 - iter 14/73 - loss 0.68096086 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 01:32:31,817 epoch 3 - iter 21/73 - loss 0.67389532 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 01:32:32,140 epoch 3 - iter 28/73 - loss 0.67688548 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 01:32:32,470 epoch 3 - iter 35/73 - loss 0.67813119 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 01:32:32,798 epoch 3 - iter 42/73 - loss 0.67327349 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:32:33,122 epoch 3 - iter 49/73 - loss 0.67898354 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:32:33,440 epoch 3 - iter 56/73 - loss 0.67739264 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 01:32:33,764 epoch 3 - iter 63/73 - loss 0.67711178 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:32:34,088 epoch 3 - iter 70/73 - loss 0.68055668 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:32:34,231 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:34,231 EPOCH 3 done: loss 0.6809 - lr 0.0200000\n",
-      "2021-09-08 01:32:34,360 DEV : loss 0.5465387105941772 - score 0.125\n",
-      "2021-09-08 01:32:34,361 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:18:39,680 epoch 1 - iter 7/73 - loss 0.21462611 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 21:18:40,288 epoch 1 - iter 14/73 - loss 0.48099213 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 21:18:40,924 epoch 1 - iter 21/73 - loss 0.60575631 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 21:18:41,566 epoch 1 - iter 28/73 - loss 0.63697268 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 21:18:42,244 epoch 1 - iter 35/73 - loss 0.60401564 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 21:18:42,874 epoch 1 - iter 42/73 - loss 0.60258795 - samples/sec: 11.12 - lr: 0.020000\n",
+      "2021-09-21 21:18:43,536 epoch 1 - iter 49/73 - loss 0.58741920 - samples/sec: 10.59 - lr: 0.020000\n",
+      "2021-09-21 21:18:44,168 epoch 1 - iter 56/73 - loss 0.61352651 - samples/sec: 11.08 - lr: 0.020000\n",
+      "2021-09-21 21:18:44,828 epoch 1 - iter 63/73 - loss 0.61623027 - samples/sec: 10.61 - lr: 0.020000\n",
+      "2021-09-21 21:18:45,486 epoch 1 - iter 70/73 - loss 0.61499879 - samples/sec: 10.65 - lr: 0.020000\n",
+      "2021-09-21 21:18:45,760 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:45,760 EPOCH 1 done: loss 0.6051 - lr 0.0200000\n",
+      "2021-09-21 21:18:46,325 DEV : loss 0.4780804514884949 - score 0.125\n",
+      "2021-09-21 21:18:46,326 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:32:38,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:38,871 epoch 4 - iter 7/73 - loss 0.64205480 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,196 epoch 4 - iter 14/73 - loss 0.66911005 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,521 epoch 4 - iter 21/73 - loss 0.65249652 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,844 epoch 4 - iter 28/73 - loss 0.66123202 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 01:32:40,174 epoch 4 - iter 35/73 - loss 0.66858729 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 01:32:40,502 epoch 4 - iter 42/73 - loss 0.66742423 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 01:32:40,825 epoch 4 - iter 49/73 - loss 0.66554755 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 01:32:41,131 epoch 4 - iter 56/73 - loss 0.66381306 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:32:41,434 epoch 4 - iter 63/73 - loss 0.66423484 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 01:32:41,737 epoch 4 - iter 70/73 - loss 0.66246638 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 01:32:41,872 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:41,872 EPOCH 4 done: loss 0.6653 - lr 0.0200000\n",
-      "2021-09-08 01:32:42,129 DEV : loss 0.5247688889503479 - score 0.125\n",
-      "2021-09-08 01:32:42,130 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:32:47,619 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:47,938 epoch 5 - iter 7/73 - loss 0.65498515 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:32:48,244 epoch 5 - iter 14/73 - loss 0.66326145 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 01:32:48,552 epoch 5 - iter 21/73 - loss 0.64786627 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:32:48,862 epoch 5 - iter 28/73 - loss 0.65185025 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:32:49,166 epoch 5 - iter 35/73 - loss 0.65096742 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:32:49,474 epoch 5 - iter 42/73 - loss 0.64744990 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:32:49,779 epoch 5 - iter 49/73 - loss 0.64739107 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:32:50,083 epoch 5 - iter 56/73 - loss 0.64716557 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:32:50,391 epoch 5 - iter 63/73 - loss 0.64497125 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:32:50,696 epoch 5 - iter 70/73 - loss 0.64725537 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:32:50,829 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:50,830 EPOCH 5 done: loss 0.6466 - lr 0.0200000\n",
-      "2021-09-08 01:32:51,055 DEV : loss 0.47357094287872314 - score 0.125\n",
-      "2021-09-08 01:32:51,056 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:32:55,429 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:55,748 epoch 6 - iter 7/73 - loss 0.65486043 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,056 epoch 6 - iter 14/73 - loss 0.65557006 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,360 epoch 6 - iter 21/73 - loss 0.66276182 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,664 epoch 6 - iter 28/73 - loss 0.65905822 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,967 epoch 6 - iter 35/73 - loss 0.65972203 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,267 epoch 6 - iter 42/73 - loss 0.65542009 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,571 epoch 6 - iter 49/73 - loss 0.65308502 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,872 epoch 6 - iter 56/73 - loss 0.64952729 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 01:32:58,176 epoch 6 - iter 63/73 - loss 0.65126398 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 01:32:58,484 epoch 6 - iter 70/73 - loss 0.64948345 - samples/sec: 22.80 - lr: 0.020000\n"
+      "2021-09-21 21:18:50,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:50,933 epoch 2 - iter 7/73 - loss 0.71037379 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 21:18:51,389 epoch 2 - iter 14/73 - loss 0.71402517 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 21:18:51,832 epoch 2 - iter 21/73 - loss 0.69076326 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 21:18:52,287 epoch 2 - iter 28/73 - loss 0.68395885 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 21:18:52,745 epoch 2 - iter 35/73 - loss 0.68205057 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 21:18:53,204 epoch 2 - iter 42/73 - loss 0.67676260 - samples/sec: 15.30 - lr: 0.020000\n",
+      "2021-09-21 21:18:53,676 epoch 2 - iter 49/73 - loss 0.67300602 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 21:18:54,106 epoch 2 - iter 56/73 - loss 0.68455171 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 21:18:54,505 epoch 2 - iter 63/73 - loss 0.68145936 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 21:18:54,881 epoch 2 - iter 70/73 - loss 0.68269018 - samples/sec: 18.66 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,043 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:55,044 EPOCH 2 done: loss 0.6823 - lr 0.0200000\n",
+      "2021-09-21 21:18:55,352 DEV : loss 0.5445137619972229 - score 0.125\n",
+      "2021-09-21 21:18:55,353 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:18:55,418 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:55,827 epoch 3 - iter 7/73 - loss 0.69195399 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 21:18:56,155 epoch 3 - iter 14/73 - loss 0.66422002 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 21:18:56,494 epoch 3 - iter 21/73 - loss 0.66560070 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 21:18:56,829 epoch 3 - iter 28/73 - loss 0.66026985 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 21:18:57,156 epoch 3 - iter 35/73 - loss 0.65554657 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 21:18:57,493 epoch 3 - iter 42/73 - loss 0.65672907 - samples/sec: 20.80 - lr: 0.020000\n",
+      "2021-09-21 21:18:57,829 epoch 3 - iter 49/73 - loss 0.65477447 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 21:18:58,165 epoch 3 - iter 56/73 - loss 0.65575417 - samples/sec: 20.92 - lr: 0.020000\n",
+      "2021-09-21 21:18:58,511 epoch 3 - iter 63/73 - loss 0.65785181 - samples/sec: 20.29 - lr: 0.020000\n",
+      "2021-09-21 21:18:58,850 epoch 3 - iter 70/73 - loss 0.65941031 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 21:18:59,000 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:59,001 EPOCH 3 done: loss 0.6613 - lr 0.0200000\n",
+      "2021-09-21 21:19:13,737 DEV : loss 0.44899922609329224 - score 0.0\n",
+      "2021-09-21 21:19:13,799 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:19:13,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:14,509 epoch 4 - iter 7/73 - loss 0.65031983 - samples/sec: 11.40 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,075 epoch 4 - iter 14/73 - loss 0.65307499 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,718 epoch 4 - iter 21/73 - loss 0.65373349 - samples/sec: 10.89 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,298 epoch 4 - iter 28/73 - loss 0.65706056 - samples/sec: 12.08 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,857 epoch 4 - iter 35/73 - loss 0.64854700 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,341 epoch 4 - iter 42/73 - loss 0.64645065 - samples/sec: 14.50 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,862 epoch 4 - iter 49/73 - loss 0.64651504 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:19:18,338 epoch 4 - iter 56/73 - loss 0.64427808 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 21:19:18,762 epoch 4 - iter 63/73 - loss 0.64372925 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,238 epoch 4 - iter 70/73 - loss 0.64736314 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:19,479 EPOCH 4 done: loss 0.6476 - lr 0.0200000\n",
+      "2021-09-21 21:19:20,018 DEV : loss 0.4645424485206604 - score 0.0\n",
+      "2021-09-21 21:19:20,019 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:19:20,103 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:20,616 epoch 5 - iter 7/73 - loss 0.61270991 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:19:21,000 epoch 5 - iter 14/73 - loss 0.62831724 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 21:19:21,433 epoch 5 - iter 21/73 - loss 0.64750273 - samples/sec: 16.23 - lr: 0.020000\n",
+      "2021-09-21 21:19:21,844 epoch 5 - iter 28/73 - loss 0.64462716 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 21:19:22,234 epoch 5 - iter 35/73 - loss 0.64809588 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:19:22,642 epoch 5 - iter 42/73 - loss 0.65193417 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 21:19:23,060 epoch 5 - iter 49/73 - loss 0.64831122 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 21:19:23,424 epoch 5 - iter 56/73 - loss 0.65092074 - samples/sec: 19.29 - lr: 0.020000\n",
+      "2021-09-21 21:19:23,814 epoch 5 - iter 63/73 - loss 0.65058128 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 21:19:24,222 epoch 5 - iter 70/73 - loss 0.64977367 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:19:24,422 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:24,423 EPOCH 5 done: loss 0.6496 - lr 0.0200000\n",
+      "2021-09-21 21:19:25,922 DEV : loss 0.4820827543735504 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:19:25,923 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:19:25,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:26,578 epoch 6 - iter 7/73 - loss 0.61965593 - samples/sec: 12.77 - lr: 0.010000\n",
+      "2021-09-21 21:19:27,082 epoch 6 - iter 14/73 - loss 0.63178096 - samples/sec: 13.90 - lr: 0.010000\n",
+      "2021-09-21 21:19:27,609 epoch 6 - iter 21/73 - loss 0.63369872 - samples/sec: 13.29 - lr: 0.010000\n",
+      "2021-09-21 21:19:28,093 epoch 6 - iter 28/73 - loss 0.63182316 - samples/sec: 14.49 - lr: 0.010000\n",
+      "2021-09-21 21:19:28,604 epoch 6 - iter 35/73 - loss 0.63525176 - samples/sec: 13.73 - lr: 0.010000\n",
+      "2021-09-21 21:19:29,138 epoch 6 - iter 42/73 - loss 0.63803559 - samples/sec: 13.13 - lr: 0.010000\n",
+      "2021-09-21 21:19:29,641 epoch 6 - iter 49/73 - loss 0.63972143 - samples/sec: 13.92 - lr: 0.010000\n",
+      "2021-09-21 21:19:30,131 epoch 6 - iter 56/73 - loss 0.64063132 - samples/sec: 14.31 - lr: 0.010000\n",
+      "2021-09-21 21:19:30,586 epoch 6 - iter 63/73 - loss 0.64278809 - samples/sec: 15.39 - lr: 0.010000\n",
+      "2021-09-21 21:19:31,219 epoch 6 - iter 70/73 - loss 0.64306540 - samples/sec: 11.08 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:58,616 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:58,617 EPOCH 6 done: loss 0.6488 - lr 0.0200000\n",
-      "2021-09-08 01:32:58,746 DEV : loss 0.48398590087890625 - score 0.125\n",
-      "2021-09-08 01:32:58,747 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:32:58,749 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:59,067 epoch 7 - iter 7/73 - loss 0.62977975 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:32:59,402 epoch 7 - iter 14/73 - loss 0.66592367 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 01:32:59,725 epoch 7 - iter 21/73 - loss 0.67564270 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 01:33:00,047 epoch 7 - iter 28/73 - loss 0.68729149 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 01:33:00,401 epoch 7 - iter 35/73 - loss 0.67880883 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 01:33:00,706 epoch 7 - iter 42/73 - loss 0.67180673 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 01:33:01,014 epoch 7 - iter 49/73 - loss 0.66801186 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:33:01,324 epoch 7 - iter 56/73 - loss 0.66362848 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:33:01,640 epoch 7 - iter 63/73 - loss 0.66126333 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 01:33:01,962 epoch 7 - iter 70/73 - loss 0.66224408 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 01:33:02,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:02,095 EPOCH 7 done: loss 0.6624 - lr 0.0200000\n",
-      "2021-09-08 01:33:02,272 DEV : loss 0.46075505018234253 - score 0.25\n",
-      "2021-09-08 01:33:02,273 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:33:07,659 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:07,976 epoch 8 - iter 7/73 - loss 0.65582129 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 01:33:08,280 epoch 8 - iter 14/73 - loss 0.65174540 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:33:08,585 epoch 8 - iter 21/73 - loss 0.64800986 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:33:08,895 epoch 8 - iter 28/73 - loss 0.64430286 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:33:09,202 epoch 8 - iter 35/73 - loss 0.64223085 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:33:09,507 epoch 8 - iter 42/73 - loss 0.64241782 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:33:09,810 epoch 8 - iter 49/73 - loss 0.63801996 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 01:33:10,123 epoch 8 - iter 56/73 - loss 0.64116413 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 01:33:10,445 epoch 8 - iter 63/73 - loss 0.64467635 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:33:10,764 epoch 8 - iter 70/73 - loss 0.64540552 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 01:33:10,917 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:10,917 EPOCH 8 done: loss 0.6462 - lr 0.0200000\n",
-      "2021-09-08 01:33:11,149 DEV : loss 0.7708401083946228 - score 0.125\n",
-      "2021-09-08 01:33:11,150 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:33:11,221 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:11,566 epoch 9 - iter 7/73 - loss 0.69150519 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 01:33:11,893 epoch 9 - iter 14/73 - loss 0.70418798 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 01:33:12,212 epoch 9 - iter 21/73 - loss 0.68489087 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:33:12,536 epoch 9 - iter 28/73 - loss 0.67436714 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 01:33:12,859 epoch 9 - iter 35/73 - loss 0.65879619 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 01:33:13,182 epoch 9 - iter 42/73 - loss 0.65812095 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 01:33:13,499 epoch 9 - iter 49/73 - loss 0.65951360 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 01:33:13,821 epoch 9 - iter 56/73 - loss 0.65870859 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,130 epoch 9 - iter 63/73 - loss 0.65521355 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,485 epoch 9 - iter 70/73 - loss 0.65900223 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,644 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,645 EPOCH 9 done: loss 0.6587 - lr 0.0200000\n",
-      "2021-09-08 01:33:14,839 DEV : loss 0.497514933347702 - score 0.125\n",
-      "2021-09-08 01:33:14,840 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:33:14,842 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:15,229 epoch 10 - iter 7/73 - loss 0.64216816 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 01:33:15,555 epoch 10 - iter 14/73 - loss 0.63279872 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 01:33:15,886 epoch 10 - iter 21/73 - loss 0.63485121 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:33:16,200 epoch 10 - iter 28/73 - loss 0.64654573 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 01:33:16,503 epoch 10 - iter 35/73 - loss 0.64793131 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:33:16,805 epoch 10 - iter 42/73 - loss 0.64800040 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:33:17,113 epoch 10 - iter 49/73 - loss 0.64662290 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:33:17,417 epoch 10 - iter 56/73 - loss 0.65077279 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:33:17,727 epoch 10 - iter 63/73 - loss 0.64922940 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:33:18,037 epoch 10 - iter 70/73 - loss 0.65198871 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 01:33:18,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:18,171 EPOCH 10 done: loss 0.6515 - lr 0.0200000\n",
-      "2021-09-08 01:33:18,427 DEV : loss 0.45541641116142273 - score 0.125\n",
-      "2021-09-08 01:33:18,428 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:33:22,527 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:22,527 Testing using best model ...\n",
-      "2021-09-08 01:33:22,529 loading file None/best-model.pt\n",
+      "2021-09-21 21:19:31,512 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:31,512 EPOCH 6 done: loss 0.6420 - lr 0.0100000\n",
+      "2021-09-21 21:19:32,158 DEV : loss 0.47756069898605347 - score 0.0\n",
+      "2021-09-21 21:19:32,159 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:19:32,160 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:32,882 epoch 7 - iter 7/73 - loss 0.62555244 - samples/sec: 11.27 - lr: 0.010000\n",
+      "2021-09-21 21:19:33,468 epoch 7 - iter 14/73 - loss 0.63623785 - samples/sec: 11.96 - lr: 0.010000\n",
+      "2021-09-21 21:19:33,994 epoch 7 - iter 21/73 - loss 0.64393338 - samples/sec: 13.34 - lr: 0.010000\n",
+      "2021-09-21 21:19:34,535 epoch 7 - iter 28/73 - loss 0.64606505 - samples/sec: 12.96 - lr: 0.010000\n",
+      "2021-09-21 21:19:35,019 epoch 7 - iter 35/73 - loss 0.64699602 - samples/sec: 14.48 - lr: 0.010000\n",
+      "2021-09-21 21:19:35,515 epoch 7 - iter 42/73 - loss 0.64960957 - samples/sec: 14.12 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,006 epoch 7 - iter 49/73 - loss 0.65202534 - samples/sec: 14.30 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,502 epoch 7 - iter 56/73 - loss 0.65034274 - samples/sec: 14.12 - lr: 0.010000\n",
+      "2021-09-21 21:19:37,025 epoch 7 - iter 63/73 - loss 0.65045163 - samples/sec: 13.40 - lr: 0.010000\n",
+      "2021-09-21 21:19:37,455 epoch 7 - iter 70/73 - loss 0.64947024 - samples/sec: 16.30 - lr: 0.010000\n",
+      "2021-09-21 21:19:37,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:37,631 EPOCH 7 done: loss 0.6507 - lr 0.0100000\n",
+      "2021-09-21 21:19:38,060 DEV : loss 0.5081771016120911 - score 0.0\n",
+      "2021-09-21 21:19:38,061 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:19:38,063 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:38,630 epoch 8 - iter 7/73 - loss 0.64161723 - samples/sec: 13.28 - lr: 0.010000\n",
+      "2021-09-21 21:19:39,145 epoch 8 - iter 14/73 - loss 0.63534157 - samples/sec: 13.61 - lr: 0.010000\n",
+      "2021-09-21 21:19:39,628 epoch 8 - iter 21/73 - loss 0.63498471 - samples/sec: 14.52 - lr: 0.010000\n",
+      "2021-09-21 21:19:40,073 epoch 8 - iter 28/73 - loss 0.63642628 - samples/sec: 15.75 - lr: 0.010000\n",
+      "2021-09-21 21:19:40,519 epoch 8 - iter 35/73 - loss 0.63730904 - samples/sec: 15.75 - lr: 0.010000\n",
+      "2021-09-21 21:19:41,034 epoch 8 - iter 42/73 - loss 0.63965743 - samples/sec: 13.61 - lr: 0.010000\n",
+      "2021-09-21 21:19:44,120 epoch 8 - iter 49/73 - loss 0.63710055 - samples/sec: 16.54 - lr: 0.010000\n",
+      "2021-09-21 21:19:44,551 epoch 8 - iter 56/73 - loss 0.63529345 - samples/sec: 16.29 - lr: 0.010000\n",
+      "2021-09-21 21:19:44,985 epoch 8 - iter 63/73 - loss 0.63718890 - samples/sec: 16.16 - lr: 0.010000\n",
+      "2021-09-21 21:19:45,405 epoch 8 - iter 70/73 - loss 0.63688653 - samples/sec: 16.67 - lr: 0.010000\n",
+      "2021-09-21 21:19:45,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:45,588 EPOCH 8 done: loss 0.6366 - lr 0.0100000\n",
+      "2021-09-21 21:19:46,088 DEV : loss 0.4733853340148926 - score 0.0\n",
+      "2021-09-21 21:19:46,089 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:19:46,263 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:46,684 epoch 9 - iter 7/73 - loss 0.63504334 - samples/sec: 17.69 - lr: 0.010000\n",
+      "2021-09-21 21:19:47,093 epoch 9 - iter 14/73 - loss 0.63923649 - samples/sec: 17.13 - lr: 0.010000\n",
+      "2021-09-21 21:19:47,464 epoch 9 - iter 21/73 - loss 0.63789078 - samples/sec: 18.92 - lr: 0.010000\n",
+      "2021-09-21 21:19:47,851 epoch 9 - iter 28/73 - loss 0.63703653 - samples/sec: 18.13 - lr: 0.010000\n",
+      "2021-09-21 21:19:48,294 epoch 9 - iter 35/73 - loss 0.64008872 - samples/sec: 15.80 - lr: 0.010000\n",
+      "2021-09-21 21:19:48,778 epoch 9 - iter 42/73 - loss 0.63763642 - samples/sec: 14.49 - lr: 0.010000\n",
+      "2021-09-21 21:19:49,275 epoch 9 - iter 49/73 - loss 0.63742938 - samples/sec: 14.11 - lr: 0.010000\n",
+      "2021-09-21 21:19:49,745 epoch 9 - iter 56/73 - loss 0.64091110 - samples/sec: 14.92 - lr: 0.010000\n",
+      "2021-09-21 21:19:50,263 epoch 9 - iter 63/73 - loss 0.64222279 - samples/sec: 13.55 - lr: 0.010000\n",
+      "2021-09-21 21:19:50,778 epoch 9 - iter 70/73 - loss 0.64435179 - samples/sec: 13.60 - lr: 0.010000\n",
+      "2021-09-21 21:19:51,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:51,008 EPOCH 9 done: loss 0.6456 - lr 0.0100000\n",
+      "2021-09-21 21:19:51,468 DEV : loss 0.5129958391189575 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:19:51,469 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:19:51,471 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:52,088 epoch 10 - iter 7/73 - loss 0.66042258 - samples/sec: 12.68 - lr: 0.005000\n",
+      "2021-09-21 21:19:52,668 epoch 10 - iter 14/73 - loss 0.64219600 - samples/sec: 12.10 - lr: 0.005000\n",
+      "2021-09-21 21:19:53,177 epoch 10 - iter 21/73 - loss 0.64368479 - samples/sec: 13.75 - lr: 0.005000\n",
+      "2021-09-21 21:19:53,720 epoch 10 - iter 28/73 - loss 0.64724014 - samples/sec: 12.91 - lr: 0.005000\n",
+      "2021-09-21 21:19:54,257 epoch 10 - iter 35/73 - loss 0.64376574 - samples/sec: 13.05 - lr: 0.005000\n",
+      "2021-09-21 21:19:54,789 epoch 10 - iter 42/73 - loss 0.64713065 - samples/sec: 13.20 - lr: 0.005000\n",
+      "2021-09-21 21:19:55,316 epoch 10 - iter 49/73 - loss 0.64926793 - samples/sec: 13.28 - lr: 0.005000\n",
+      "2021-09-21 21:19:55,864 epoch 10 - iter 56/73 - loss 0.64514535 - samples/sec: 12.81 - lr: 0.005000\n",
+      "2021-09-21 21:19:56,411 epoch 10 - iter 63/73 - loss 0.64375780 - samples/sec: 12.82 - lr: 0.005000\n",
+      "2021-09-21 21:19:56,961 epoch 10 - iter 70/73 - loss 0.64591271 - samples/sec: 12.74 - lr: 0.005000\n",
+      "2021-09-21 21:19:57,179 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:57,180 EPOCH 10 done: loss 0.6458 - lr 0.0050000\n",
+      "2021-09-21 21:19:57,721 DEV : loss 0.4925203323364258 - score 0.0\n",
+      "2021-09-21 21:19:57,722 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:20:01,554 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:01,554 Testing using best model ...\n",
+      "2021-09-21 21:20:01,556 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:33:29,485 \t0.1111\n",
-      "2021-09-08 01:33:29,486 \n",
+      "2021-09-21 21:20:14,465 \t0.0\n",
+      "2021-09-21 21:20:14,466 \n",
       "Results:\n",
-      "- F-score (micro) 0.1111\n",
-      "- F-score (macro) 0.0556\n",
-      "- Accuracy 0.1111\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      "     this text expresses joy     1.0000    0.3333    0.5000         3\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         2\n",
       "this text expresses surprise     0.0000    0.0000    0.0000         0\n",
-      "    this text expresses love     0.0000    0.0000    0.0000         2\n",
-      " this text expresses disgust     0.0000    0.0000    0.0000         3\n",
+      "    this text expresses love     0.0000    0.0000    0.0000         1\n",
+      " this text expresses disgust     0.0000    0.0000    0.0000         1\n",
       "    this text expresses fear     0.0000    0.0000    0.0000         0\n",
       "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses guilt     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses shame     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses guilt     0.0000    0.0000    0.0000         2\n",
+      "   this text expresses shame     0.0000    0.0000    0.0000         3\n",
       " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                   micro avg     0.1111    0.1111    0.1111         9\n",
-      "                   macro avg     0.1111    0.0370    0.0556         9\n",
-      "                weighted avg     0.3333    0.1111    0.1667         9\n",
-      "                 samples avg     0.1111    0.1111    0.1111         9\n",
+      "                   micro avg     0.0000    0.0000    0.0000         9\n",
+      "                   macro avg     0.0000    0.0000    0.0000         9\n",
+      "                weighted avg     0.0000    0.0000    0.0000         9\n",
+      "                 samples avg     0.0000    0.0000    0.0000         9\n",
       "\n",
-      "2021-09-08 01:33:29,486 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:19,144 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:20:14,466 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:40,752 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:34:23,460 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:21:45,233 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 42221.08it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 43992.44it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:34:23,464 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
-      "2021-09-08 01:34:23,474 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:23,476 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:21:45,237 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
+      "2021-09-21 21:21:45,246 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:45,248 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4368,25 +4380,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:34:23,476 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:23,477 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:34:23,477 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:23,477 Parameters:\n",
-      "2021-09-08 01:34:23,478  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:34:23,478  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:34:23,478  - patience: \"3\"\n",
-      "2021-09-08 01:34:23,478  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:34:23,479  - max_epochs: \"10\"\n",
-      "2021-09-08 01:34:23,479  - shuffle: \"True\"\n",
-      "2021-09-08 01:34:23,479  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:34:23,480  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:34:23,480 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:23,480 Model training base path: \"None\"\n",
-      "2021-09-08 01:34:23,480 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:23,481 Device: cuda:1\n",
-      "2021-09-08 01:34:23,481 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:23,481 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:34:23,492 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:21:45,248 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:45,249 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:21:45,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:45,249 Parameters:\n",
+      "2021-09-21 21:21:45,250  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:21:45,250  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:21:45,250  - patience: \"3\"\n",
+      "2021-09-21 21:21:45,250  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:21:45,251  - max_epochs: \"10\"\n",
+      "2021-09-21 21:21:45,251  - shuffle: \"True\"\n",
+      "2021-09-21 21:21:45,251  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:21:45,252  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:21:45,252 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:45,252 Model training base path: \"None\"\n",
+      "2021-09-21 21:21:45,252 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:45,253 Device: cuda:0\n",
+      "2021-09-21 21:21:45,253 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:45,253 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:21:45,260 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4400,214 +4412,214 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:34:23,865 epoch 1 - iter 7/73 - loss 0.17883855 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,254 epoch 1 - iter 14/73 - loss 0.44436928 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,622 epoch 1 - iter 21/73 - loss 0.50614354 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,000 epoch 1 - iter 28/73 - loss 0.58563138 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,382 epoch 1 - iter 35/73 - loss 0.59715359 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,752 epoch 1 - iter 42/73 - loss 0.58533628 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,126 epoch 1 - iter 49/73 - loss 0.56663384 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,520 epoch 1 - iter 56/73 - loss 0.58298861 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,905 epoch 1 - iter 63/73 - loss 0.58238564 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 01:34:27,289 epoch 1 - iter 70/73 - loss 0.57970956 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 01:34:27,433 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:27,433 EPOCH 1 done: loss 0.5620 - lr 0.0200000\n",
-      "2021-09-08 01:34:27,664 DEV : loss 0.7548017501831055 - score 0.0\n",
-      "2021-09-08 01:34:27,665 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:21:45,720 epoch 1 - iter 7/73 - loss 0.28290112 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 21:21:46,170 epoch 1 - iter 14/73 - loss 0.52943827 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 21:21:46,667 epoch 1 - iter 21/73 - loss 0.57797801 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 21:21:47,100 epoch 1 - iter 28/73 - loss 0.58855656 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:21:47,564 epoch 1 - iter 35/73 - loss 0.61553086 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 21:21:48,041 epoch 1 - iter 42/73 - loss 0.62515821 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 21:21:48,504 epoch 1 - iter 49/73 - loss 0.59856316 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 21:21:48,980 epoch 1 - iter 56/73 - loss 0.60939007 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 21:21:49,501 epoch 1 - iter 63/73 - loss 0.61602693 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,042 epoch 1 - iter 70/73 - loss 0.62391948 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:50,292 EPOCH 1 done: loss 0.6251 - lr 0.0200000\n",
+      "2021-09-21 21:21:50,755 DEV : loss 0.4624209702014923 - score 0.125\n",
+      "2021-09-21 21:21:50,756 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:34:34,423 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,854 epoch 2 - iter 7/73 - loss 0.89594376 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,251 epoch 2 - iter 14/73 - loss 0.87761060 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,661 epoch 2 - iter 21/73 - loss 0.81764150 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 01:34:36,010 epoch 2 - iter 28/73 - loss 0.77287129 - samples/sec: 20.13 - lr: 0.020000\n",
-      "2021-09-08 01:34:36,360 epoch 2 - iter 35/73 - loss 0.76759550 - samples/sec: 20.07 - lr: 0.020000\n",
-      "2021-09-08 01:34:36,691 epoch 2 - iter 42/73 - loss 0.75298266 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:34:37,020 epoch 2 - iter 49/73 - loss 0.74365661 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:34:37,337 epoch 2 - iter 56/73 - loss 0.73487317 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:34:37,648 epoch 2 - iter 63/73 - loss 0.72374759 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:34:37,958 epoch 2 - iter 70/73 - loss 0.71621386 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 01:34:38,088 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:38,089 EPOCH 2 done: loss 0.7121 - lr 0.0200000\n",
-      "2021-09-08 01:34:38,318 DEV : loss 0.4555271565914154 - score 0.125\n",
-      "2021-09-08 01:34:38,319 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:21:55,031 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:55,661 epoch 2 - iter 7/73 - loss 0.64191852 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,243 epoch 2 - iter 14/73 - loss 0.68830195 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,853 epoch 2 - iter 21/73 - loss 0.67539909 - samples/sec: 11.49 - lr: 0.020000\n",
+      "2021-09-21 21:21:57,366 epoch 2 - iter 28/73 - loss 0.69628086 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 21:21:57,944 epoch 2 - iter 35/73 - loss 0.68593554 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 21:21:58,500 epoch 2 - iter 42/73 - loss 0.68452011 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 21:21:59,070 epoch 2 - iter 49/73 - loss 0.67760464 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 21:21:59,655 epoch 2 - iter 56/73 - loss 0.68557404 - samples/sec: 11.98 - lr: 0.020000\n",
+      "2021-09-21 21:22:00,243 epoch 2 - iter 63/73 - loss 0.68587299 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 21:22:00,845 epoch 2 - iter 70/73 - loss 0.68823457 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 21:22:01,059 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:01,059 EPOCH 2 done: loss 0.6856 - lr 0.0200000\n",
+      "2021-09-21 21:22:01,425 DEV : loss 0.40174269676208496 - score 0.25\n",
+      "2021-09-21 21:22:01,426 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:34:44,692 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:45,030 epoch 3 - iter 7/73 - loss 0.65066094 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:34:45,356 epoch 3 - iter 14/73 - loss 0.63643845 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:34:45,659 epoch 3 - iter 21/73 - loss 0.63804312 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:34:45,972 epoch 3 - iter 28/73 - loss 0.63810484 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 01:34:46,277 epoch 3 - iter 35/73 - loss 0.64187504 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:34:46,581 epoch 3 - iter 42/73 - loss 0.64136223 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 01:34:46,884 epoch 3 - iter 49/73 - loss 0.64209329 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 01:34:47,188 epoch 3 - iter 56/73 - loss 0.64171865 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:34:47,499 epoch 3 - iter 63/73 - loss 0.64041292 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 01:34:47,828 epoch 3 - iter 70/73 - loss 0.64244473 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:34:47,963 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:47,964 EPOCH 3 done: loss 0.6425 - lr 0.0200000\n",
-      "2021-09-08 01:34:48,215 DEV : loss 0.4767349362373352 - score 0.125\n",
-      "2021-09-08 01:34:48,216 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:34:48,304 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:48,642 epoch 4 - iter 7/73 - loss 0.63974715 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 01:34:48,969 epoch 4 - iter 14/73 - loss 0.64051830 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 01:34:49,282 epoch 4 - iter 21/73 - loss 0.63449273 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 01:34:49,594 epoch 4 - iter 28/73 - loss 0.63533972 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 01:34:49,901 epoch 4 - iter 35/73 - loss 0.63465311 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:34:50,209 epoch 4 - iter 42/73 - loss 0.63600506 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 01:34:50,516 epoch 4 - iter 49/73 - loss 0.63602597 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:34:50,825 epoch 4 - iter 56/73 - loss 0.63750365 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:34:51,129 epoch 4 - iter 63/73 - loss 0.63719326 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:34:51,434 epoch 4 - iter 70/73 - loss 0.63739763 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 01:34:51,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:51,565 EPOCH 4 done: loss 0.6377 - lr 0.0200000\n",
-      "2021-09-08 01:35:00,116 DEV : loss 0.4777582585811615 - score 0.25\n",
-      "2021-09-08 01:35:00,117 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:35:08,657 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:09,065 epoch 5 - iter 7/73 - loss 0.63585475 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,428 epoch 5 - iter 14/73 - loss 0.63464553 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,786 epoch 5 - iter 21/73 - loss 0.63630336 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,131 epoch 5 - iter 28/73 - loss 0.63746525 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,488 epoch 5 - iter 35/73 - loss 0.63424920 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,850 epoch 5 - iter 42/73 - loss 0.63355228 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,231 epoch 5 - iter 49/73 - loss 0.63670312 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,600 epoch 5 - iter 56/73 - loss 0.63589049 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,961 epoch 5 - iter 63/73 - loss 0.63455452 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,319 epoch 5 - iter 70/73 - loss 0.63545715 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,488 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:12,489 EPOCH 5 done: loss 0.6379 - lr 0.0200000\n",
-      "2021-09-08 01:35:12,662 DEV : loss 0.4693751037120819 - score 0.125\n",
-      "2021-09-08 01:35:12,663 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:35:12,665 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:13,022 epoch 6 - iter 7/73 - loss 0.65063900 - samples/sec: 20.55 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,414 epoch 6 - iter 14/73 - loss 0.63645360 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,788 epoch 6 - iter 21/73 - loss 0.64038323 - samples/sec: 18.76 - lr: 0.020000\n",
-      "2021-09-08 01:35:14,166 epoch 6 - iter 28/73 - loss 0.63871216 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:35:14,509 epoch 6 - iter 35/73 - loss 0.63645448 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 01:35:14,872 epoch 6 - iter 42/73 - loss 0.63556931 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:35:15,234 epoch 6 - iter 49/73 - loss 0.63511970 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 01:35:15,586 epoch 6 - iter 56/73 - loss 0.63320366 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:35:15,931 epoch 6 - iter 63/73 - loss 0.63508020 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 01:35:16,304 epoch 6 - iter 70/73 - loss 0.63603511 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 01:35:16,459 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:22:06,599 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:06,983 epoch 3 - iter 7/73 - loss 0.64642934 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 21:22:07,371 epoch 3 - iter 14/73 - loss 0.62966753 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 21:22:07,746 epoch 3 - iter 21/73 - loss 0.65045346 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 21:22:08,135 epoch 3 - iter 28/73 - loss 0.65168490 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 21:22:08,595 epoch 3 - iter 35/73 - loss 0.64647605 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:22:09,094 epoch 3 - iter 42/73 - loss 0.64533769 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:22:09,597 epoch 3 - iter 49/73 - loss 0.65190237 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 21:22:10,112 epoch 3 - iter 56/73 - loss 0.64954855 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 21:22:10,548 epoch 3 - iter 63/73 - loss 0.65021906 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 21:22:10,958 epoch 3 - iter 70/73 - loss 0.64873473 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:22:11,122 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:11,123 EPOCH 3 done: loss 0.6478 - lr 0.0200000\n",
+      "2021-09-21 21:22:11,463 DEV : loss 0.4932517409324646 - score 0.0\n",
+      "2021-09-21 21:22:11,464 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:22:11,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:11,967 epoch 4 - iter 7/73 - loss 0.66272666 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 21:22:12,344 epoch 4 - iter 14/73 - loss 0.65992247 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 21:22:12,748 epoch 4 - iter 21/73 - loss 0.65836074 - samples/sec: 17.38 - lr: 0.020000\n",
+      "2021-09-21 21:22:13,157 epoch 4 - iter 28/73 - loss 0.66223062 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:22:13,544 epoch 4 - iter 35/73 - loss 0.65724337 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 21:22:13,931 epoch 4 - iter 42/73 - loss 0.65715288 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 21:22:14,351 epoch 4 - iter 49/73 - loss 0.65667159 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 21:22:14,731 epoch 4 - iter 56/73 - loss 0.65497417 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 21:22:15,146 epoch 4 - iter 63/73 - loss 0.65120894 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:22:15,533 epoch 4 - iter 70/73 - loss 0.65616339 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 21:22:15,705 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:15,706 EPOCH 4 done: loss 0.6557 - lr 0.0200000\n",
+      "2021-09-21 21:22:15,902 DEV : loss 0.5202808976173401 - score 0.0\n",
+      "2021-09-21 21:22:15,903 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:22:15,904 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:16,325 epoch 5 - iter 7/73 - loss 0.64304472 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:22:16,636 epoch 5 - iter 14/73 - loss 0.64535298 - samples/sec: 22.58 - lr: 0.020000\n",
+      "2021-09-21 21:22:17,017 epoch 5 - iter 21/73 - loss 0.63694565 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 21:22:17,415 epoch 5 - iter 28/73 - loss 0.63863359 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 21:22:17,785 epoch 5 - iter 35/73 - loss 0.65172405 - samples/sec: 18.96 - lr: 0.020000\n",
+      "2021-09-21 21:22:18,202 epoch 5 - iter 42/73 - loss 0.65310155 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 21:22:18,616 epoch 5 - iter 49/73 - loss 0.65072842 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:22:19,009 epoch 5 - iter 56/73 - loss 0.65389069 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 21:22:19,453 epoch 5 - iter 63/73 - loss 0.65261860 - samples/sec: 15.80 - lr: 0.020000\n",
+      "2021-09-21 21:22:19,882 epoch 5 - iter 70/73 - loss 0.65443454 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 21:22:20,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:20,072 EPOCH 5 done: loss 0.6556 - lr 0.0200000\n",
+      "2021-09-21 21:22:20,454 DEV : loss 0.49763166904449463 - score 0.125\n",
+      "2021-09-21 21:22:20,454 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:22:20,458 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:20,970 epoch 6 - iter 7/73 - loss 0.66722825 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 21:22:21,417 epoch 6 - iter 14/73 - loss 0.64442052 - samples/sec: 15.71 - lr: 0.020000\n",
+      "2021-09-21 21:22:21,824 epoch 6 - iter 21/73 - loss 0.64565357 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:22:22,300 epoch 6 - iter 28/73 - loss 0.64442490 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 21:22:22,717 epoch 6 - iter 35/73 - loss 0.65219977 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 21:22:23,148 epoch 6 - iter 42/73 - loss 0.65010880 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 21:22:23,595 epoch 6 - iter 49/73 - loss 0.64897934 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:22:24,016 epoch 6 - iter 56/73 - loss 0.64932173 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 21:22:24,443 epoch 6 - iter 63/73 - loss 0.64806498 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:22:24,892 epoch 6 - iter 70/73 - loss 0.65040501 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:22:25,088 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:16,460 EPOCH 6 done: loss 0.6349 - lr 0.0200000\n",
-      "2021-09-08 01:35:16,631 DEV : loss 0.48847073316574097 - score 0.125\n",
-      "2021-09-08 01:35:16,631 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:35:16,633 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:17,026 epoch 7 - iter 7/73 - loss 0.65535713 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 01:35:17,381 epoch 7 - iter 14/73 - loss 0.64790069 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 01:35:17,755 epoch 7 - iter 21/73 - loss 0.64796776 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,122 epoch 7 - iter 28/73 - loss 0.64718318 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,481 epoch 7 - iter 35/73 - loss 0.64384397 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,803 epoch 7 - iter 42/73 - loss 0.64400484 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 01:35:19,108 epoch 7 - iter 49/73 - loss 0.64117352 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:35:19,417 epoch 7 - iter 56/73 - loss 0.63827511 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 01:35:19,751 epoch 7 - iter 63/73 - loss 0.63872377 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 01:35:20,084 epoch 7 - iter 70/73 - loss 0.63850492 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 01:35:20,226 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:20,227 EPOCH 7 done: loss 0.6390 - lr 0.0200000\n",
-      "2021-09-08 01:35:20,462 DEV : loss 0.4580986797809601 - score 0.125\n",
-      "2021-09-08 01:35:20,462 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:35:20,558 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:20,877 epoch 8 - iter 7/73 - loss 0.63863508 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:35:21,183 epoch 8 - iter 14/73 - loss 0.64048653 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 01:35:21,508 epoch 8 - iter 21/73 - loss 0.64113920 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:35:21,844 epoch 8 - iter 28/73 - loss 0.63983271 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:35:22,174 epoch 8 - iter 35/73 - loss 0.63811882 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 01:35:22,504 epoch 8 - iter 42/73 - loss 0.63563187 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:35:22,831 epoch 8 - iter 49/73 - loss 0.64097975 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 01:35:23,136 epoch 8 - iter 56/73 - loss 0.64021420 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:35:23,443 epoch 8 - iter 63/73 - loss 0.63991365 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:35:23,763 epoch 8 - iter 70/73 - loss 0.63956214 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 01:35:23,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:23,905 EPOCH 8 done: loss 0.6390 - lr 0.0200000\n",
-      "2021-09-08 01:35:24,182 DEV : loss 0.5054582953453064 - score 0.125\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:35:24,183 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:35:25,570 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:25,891 epoch 9 - iter 7/73 - loss 0.63981341 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 01:35:26,192 epoch 9 - iter 14/73 - loss 0.63877561 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:35:26,492 epoch 9 - iter 21/73 - loss 0.63620705 - samples/sec: 23.43 - lr: 0.010000\n",
-      "2021-09-08 01:35:26,796 epoch 9 - iter 28/73 - loss 0.63543553 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 01:35:27,097 epoch 9 - iter 35/73 - loss 0.64225036 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:35:27,406 epoch 9 - iter 42/73 - loss 0.64159157 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 01:35:27,708 epoch 9 - iter 49/73 - loss 0.63970000 - samples/sec: 23.24 - lr: 0.010000\n",
-      "2021-09-08 01:35:28,015 epoch 9 - iter 56/73 - loss 0.63984054 - samples/sec: 22.81 - lr: 0.010000\n",
-      "2021-09-08 01:35:28,326 epoch 9 - iter 63/73 - loss 0.64097414 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 01:35:28,633 epoch 9 - iter 70/73 - loss 0.64065072 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 01:35:28,768 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:28,769 EPOCH 9 done: loss 0.6405 - lr 0.0100000\n",
-      "2021-09-08 01:35:28,984 DEV : loss 0.493093341588974 - score 0.125\n",
-      "2021-09-08 01:35:28,985 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:35:29,059 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:29,378 epoch 10 - iter 7/73 - loss 0.62397434 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 01:35:29,701 epoch 10 - iter 14/73 - loss 0.63626219 - samples/sec: 21.70 - lr: 0.010000\n",
-      "2021-09-08 01:35:30,008 epoch 10 - iter 21/73 - loss 0.63542901 - samples/sec: 22.82 - lr: 0.010000\n",
-      "2021-09-08 01:35:30,328 epoch 10 - iter 28/73 - loss 0.63526319 - samples/sec: 21.95 - lr: 0.010000\n",
-      "2021-09-08 01:35:30,656 epoch 10 - iter 35/73 - loss 0.63272464 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 01:35:30,990 epoch 10 - iter 42/73 - loss 0.63257080 - samples/sec: 20.98 - lr: 0.010000\n",
-      "2021-09-08 01:35:31,310 epoch 10 - iter 49/73 - loss 0.63472986 - samples/sec: 21.96 - lr: 0.010000\n",
-      "2021-09-08 01:35:31,629 epoch 10 - iter 56/73 - loss 0.63503198 - samples/sec: 21.97 - lr: 0.010000\n",
-      "2021-09-08 01:35:31,944 epoch 10 - iter 63/73 - loss 0.63556897 - samples/sec: 22.27 - lr: 0.010000\n",
-      "2021-09-08 01:35:32,267 epoch 10 - iter 70/73 - loss 0.63580022 - samples/sec: 21.75 - lr: 0.010000\n",
-      "2021-09-08 01:35:32,402 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:32,403 EPOCH 10 done: loss 0.6353 - lr 0.0100000\n",
-      "2021-09-08 01:35:32,559 DEV : loss 0.4762292802333832 - score 0.125\n",
-      "2021-09-08 01:35:32,560 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:35:37,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:37,197 Testing using best model ...\n",
-      "2021-09-08 01:35:37,222 loading file None/best-model.pt\n",
+      "2021-09-21 21:22:25,089 EPOCH 6 done: loss 0.6484 - lr 0.0200000\n",
+      "2021-09-21 21:22:25,393 DEV : loss 0.4444483518600464 - score 0.125\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:22:25,394 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:22:25,396 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:25,874 epoch 7 - iter 7/73 - loss 0.64217235 - samples/sec: 15.86 - lr: 0.010000\n",
+      "2021-09-21 21:22:26,281 epoch 7 - iter 14/73 - loss 0.64666532 - samples/sec: 17.24 - lr: 0.010000\n",
+      "2021-09-21 21:22:26,731 epoch 7 - iter 21/73 - loss 0.64161834 - samples/sec: 15.57 - lr: 0.010000\n",
+      "2021-09-21 21:22:27,178 epoch 7 - iter 28/73 - loss 0.64036487 - samples/sec: 15.68 - lr: 0.010000\n",
+      "2021-09-21 21:22:27,624 epoch 7 - iter 35/73 - loss 0.63340984 - samples/sec: 15.74 - lr: 0.010000\n",
+      "2021-09-21 21:22:28,086 epoch 7 - iter 42/73 - loss 0.63157519 - samples/sec: 15.16 - lr: 0.010000\n",
+      "2021-09-21 21:22:28,477 epoch 7 - iter 49/73 - loss 0.63309299 - samples/sec: 17.91 - lr: 0.010000\n",
+      "2021-09-21 21:22:28,891 epoch 7 - iter 56/73 - loss 0.63403552 - samples/sec: 16.95 - lr: 0.010000\n",
+      "2021-09-21 21:22:29,314 epoch 7 - iter 63/73 - loss 0.63193388 - samples/sec: 16.59 - lr: 0.010000\n",
+      "2021-09-21 21:22:29,742 epoch 7 - iter 70/73 - loss 0.63474656 - samples/sec: 16.35 - lr: 0.010000\n",
+      "2021-09-21 21:22:29,941 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:29,941 EPOCH 7 done: loss 0.6354 - lr 0.0100000\n",
+      "2021-09-21 21:22:30,213 DEV : loss 0.43806469440460205 - score 0.5\n",
+      "2021-09-21 21:22:30,214 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:22:34,132 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:34,564 epoch 8 - iter 7/73 - loss 0.62752321 - samples/sec: 17.50 - lr: 0.010000\n",
+      "2021-09-21 21:22:35,000 epoch 8 - iter 14/73 - loss 0.63639122 - samples/sec: 16.07 - lr: 0.010000\n",
+      "2021-09-21 21:22:35,428 epoch 8 - iter 21/73 - loss 0.63158758 - samples/sec: 16.39 - lr: 0.010000\n",
+      "2021-09-21 21:22:35,900 epoch 8 - iter 28/73 - loss 0.62230042 - samples/sec: 14.87 - lr: 0.010000\n",
+      "2021-09-21 21:22:36,359 epoch 8 - iter 35/73 - loss 0.62392192 - samples/sec: 15.27 - lr: 0.010000\n",
+      "2021-09-21 21:22:36,774 epoch 8 - iter 42/73 - loss 0.62407705 - samples/sec: 16.90 - lr: 0.010000\n",
+      "2021-09-21 21:22:37,150 epoch 8 - iter 49/73 - loss 0.62816472 - samples/sec: 18.65 - lr: 0.010000\n",
+      "2021-09-21 21:22:37,611 epoch 8 - iter 56/73 - loss 0.63086611 - samples/sec: 15.20 - lr: 0.010000\n",
+      "2021-09-21 21:22:38,062 epoch 8 - iter 63/73 - loss 0.63039749 - samples/sec: 15.55 - lr: 0.010000\n",
+      "2021-09-21 21:22:38,488 epoch 8 - iter 70/73 - loss 0.63409179 - samples/sec: 16.47 - lr: 0.010000\n",
+      "2021-09-21 21:22:38,677 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:38,677 EPOCH 8 done: loss 0.6345 - lr 0.0100000\n",
+      "2021-09-21 21:22:38,983 DEV : loss 0.4820363223552704 - score 0.25\n",
+      "2021-09-21 21:22:38,984 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:22:38,986 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:39,474 epoch 9 - iter 7/73 - loss 0.66275723 - samples/sec: 15.55 - lr: 0.010000\n",
+      "2021-09-21 21:22:39,875 epoch 9 - iter 14/73 - loss 0.68134958 - samples/sec: 17.51 - lr: 0.010000\n",
+      "2021-09-21 21:22:40,260 epoch 9 - iter 21/73 - loss 0.66833438 - samples/sec: 18.21 - lr: 0.010000\n",
+      "2021-09-21 21:22:40,580 epoch 9 - iter 28/73 - loss 0.66281712 - samples/sec: 21.93 - lr: 0.010000\n",
+      "2021-09-21 21:22:40,903 epoch 9 - iter 35/73 - loss 0.66165579 - samples/sec: 21.71 - lr: 0.010000\n",
+      "2021-09-21 21:22:41,230 epoch 9 - iter 42/73 - loss 0.66648522 - samples/sec: 21.43 - lr: 0.010000\n",
+      "2021-09-21 21:22:41,550 epoch 9 - iter 49/73 - loss 0.66462095 - samples/sec: 21.93 - lr: 0.010000\n",
+      "2021-09-21 21:22:41,899 epoch 9 - iter 56/73 - loss 0.66127894 - samples/sec: 20.07 - lr: 0.010000\n",
+      "2021-09-21 21:22:42,215 epoch 9 - iter 63/73 - loss 0.65813759 - samples/sec: 22.18 - lr: 0.010000\n",
+      "2021-09-21 21:22:42,536 epoch 9 - iter 70/73 - loss 0.65938704 - samples/sec: 21.88 - lr: 0.010000\n",
+      "2021-09-21 21:22:42,675 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:42,675 EPOCH 9 done: loss 0.6578 - lr 0.0100000\n",
+      "2021-09-21 21:22:42,858 DEV : loss 0.49949485063552856 - score 0.125\n",
+      "2021-09-21 21:22:42,858 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:22:42,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:43,204 epoch 10 - iter 7/73 - loss 0.64134650 - samples/sec: 21.36 - lr: 0.010000\n",
+      "2021-09-21 21:22:43,520 epoch 10 - iter 14/73 - loss 0.64807813 - samples/sec: 22.18 - lr: 0.010000\n",
+      "2021-09-21 21:22:43,838 epoch 10 - iter 21/73 - loss 0.63824590 - samples/sec: 22.07 - lr: 0.010000\n",
+      "2021-09-21 21:22:44,156 epoch 10 - iter 28/73 - loss 0.63898090 - samples/sec: 22.03 - lr: 0.010000\n",
+      "2021-09-21 21:22:44,454 epoch 10 - iter 35/73 - loss 0.64030554 - samples/sec: 23.59 - lr: 0.010000\n",
+      "2021-09-21 21:22:44,769 epoch 10 - iter 42/73 - loss 0.63467361 - samples/sec: 22.25 - lr: 0.010000\n",
+      "2021-09-21 21:22:45,084 epoch 10 - iter 49/73 - loss 0.63932118 - samples/sec: 22.31 - lr: 0.010000\n",
+      "2021-09-21 21:22:45,382 epoch 10 - iter 56/73 - loss 0.63581017 - samples/sec: 23.55 - lr: 0.010000\n",
+      "2021-09-21 21:22:45,681 epoch 10 - iter 63/73 - loss 0.63709968 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 21:22:45,982 epoch 10 - iter 70/73 - loss 0.64220019 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:22:46,110 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:46,110 EPOCH 10 done: loss 0.6402 - lr 0.0100000\n",
+      "2021-09-21 21:22:46,241 DEV : loss 0.47376948595046997 - score 0.0\n",
+      "2021-09-21 21:22:46,242 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:22:50,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:50,281 Testing using best model ...\n",
+      "2021-09-21 21:22:50,310 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:35:43,849 \t0.2222\n",
-      "2021-09-08 01:35:43,850 \n",
+      "2021-09-21 21:23:02,365 \t0.1111\n",
+      "2021-09-21 21:23:02,366 \n",
       "Results:\n",
-      "- F-score (micro) 0.2222\n",
-      "- F-score (macro) 0.0873\n",
-      "- Accuracy 0.2222\n",
+      "- F-score (micro) 0.1111\n",
+      "- F-score (macro) 0.0317\n",
+      "- Accuracy 0.1111\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      "     this text expresses joy     0.3333    1.0000    0.5000         1\n",
-      "this text expresses surprise     0.0000    0.0000    0.0000         0\n",
-      "    this text expresses love     0.0000    0.0000    0.0000         1\n",
-      " this text expresses disgust     0.0000    0.0000    0.0000         1\n",
-      "    this text expresses fear     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
+      "this text expresses surprise     0.0000    0.0000    0.0000         1\n",
+      "    this text expresses love     0.1667    1.0000    0.2857         1\n",
+      " this text expresses disgust     0.0000    0.0000    0.0000         2\n",
+      "    this text expresses fear     0.0000    0.0000    0.0000         2\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
       "   this text expresses guilt     0.0000    0.0000    0.0000         2\n",
-      "   this text expresses shame     0.1667    1.0000    0.2857         1\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses shame     0.0000    0.0000    0.0000         1\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                   micro avg     0.2222    0.2222    0.2222         9\n",
-      "                   macro avg     0.0556    0.2222    0.0873         9\n",
-      "                weighted avg     0.0556    0.2222    0.0873         9\n",
-      "                 samples avg     0.2222    0.2222    0.2222         9\n",
+      "                   micro avg     0.1111    0.1111    0.1111         9\n",
+      "                   macro avg     0.0185    0.1111    0.0317         9\n",
+      "                weighted avg     0.0185    0.1111    0.0317         9\n",
+      "                 samples avg     0.1111    0.1111    0.1111         9\n",
       "\n",
-      "2021-09-08 01:35:43,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:36,039 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:23:02,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:28,178 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:36:40,023 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:24:32,078 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 51765.94it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 47478.32it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:40,027 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
-      "2021-09-08 01:36:40,037 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,038 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:24:32,081 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
+      "2021-09-21 21:24:32,090 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:32,092 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4920,25 +4932,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:40,039 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,039 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:36:40,039 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,040 Parameters:\n",
-      "2021-09-08 01:36:40,040  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:36:40,040  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:36:40,040  - patience: \"3\"\n",
-      "2021-09-08 01:36:40,041  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:36:40,041  - max_epochs: \"10\"\n",
-      "2021-09-08 01:36:40,041  - shuffle: \"True\"\n",
-      "2021-09-08 01:36:40,042  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:36:40,042  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:36:40,042 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,042 Model training base path: \"None\"\n",
-      "2021-09-08 01:36:40,043 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,043 Device: cuda:1\n",
-      "2021-09-08 01:36:40,043 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,044 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:36:40,050 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:24:32,093 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:32,093 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:24:32,093 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:32,093 Parameters:\n",
+      "2021-09-21 21:24:32,094  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:24:32,094  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:24:32,094  - patience: \"3\"\n",
+      "2021-09-21 21:24:32,095  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:24:32,095  - max_epochs: \"10\"\n",
+      "2021-09-21 21:24:32,095  - shuffle: \"True\"\n",
+      "2021-09-21 21:24:32,095  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:24:32,096  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:24:32,096 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:32,096 Model training base path: \"None\"\n",
+      "2021-09-21 21:24:32,097 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:32,097 Device: cuda:0\n",
+      "2021-09-21 21:24:32,097 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:32,097 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:24:32,104 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4952,172 +4964,171 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:40,375 epoch 1 - iter 7/73 - loss 0.20435529 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,693 epoch 1 - iter 14/73 - loss 0.40761804 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,018 epoch 1 - iter 21/73 - loss 0.56544322 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,346 epoch 1 - iter 28/73 - loss 0.56733712 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,667 epoch 1 - iter 35/73 - loss 0.56167973 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,996 epoch 1 - iter 42/73 - loss 0.54051508 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:36:42,381 epoch 1 - iter 49/73 - loss 0.49888428 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:36:42,701 epoch 1 - iter 56/73 - loss 0.49400580 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 01:36:43,033 epoch 1 - iter 63/73 - loss 0.52415359 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 01:36:43,392 epoch 1 - iter 70/73 - loss 0.54677845 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:36:43,538 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:43,538 EPOCH 1 done: loss 0.5317 - lr 0.0200000\n",
-      "2021-09-08 01:36:43,807 DEV : loss 0.8380573391914368 - score 0.125\n",
-      "2021-09-08 01:36:43,808 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:36:51,765 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:52,113 epoch 2 - iter 7/73 - loss 0.85073719 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:36:52,448 epoch 2 - iter 14/73 - loss 0.88567971 - samples/sec: 20.95 - lr: 0.020000\n",
-      "2021-09-08 01:36:52,787 epoch 2 - iter 21/73 - loss 0.75688462 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 01:36:53,122 epoch 2 - iter 28/73 - loss 0.75761931 - samples/sec: 20.95 - lr: 0.020000\n",
-      "2021-09-08 01:36:53,453 epoch 2 - iter 35/73 - loss 0.73250758 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 01:36:53,792 epoch 2 - iter 42/73 - loss 0.74408322 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,185 epoch 2 - iter 49/73 - loss 0.72467239 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,557 epoch 2 - iter 56/73 - loss 0.72268506 - samples/sec: 18.82 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,933 epoch 2 - iter 63/73 - loss 0.71997220 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:36:55,288 epoch 2 - iter 70/73 - loss 0.71692709 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 01:36:55,429 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:55,430 EPOCH 2 done: loss 0.7125 - lr 0.0200000\n",
-      "2021-09-08 01:36:55,955 DEV : loss 0.7146743535995483 - score 0.125\n",
-      "2021-09-08 01:36:55,956 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:24:32,425 epoch 1 - iter 7/73 - loss 0.23468122 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 21:24:32,750 epoch 1 - iter 14/73 - loss 0.38677943 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 21:24:33,074 epoch 1 - iter 21/73 - loss 0.50871604 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 21:24:33,393 epoch 1 - iter 28/73 - loss 0.50640031 - samples/sec: 21.96 - lr: 0.020000\n",
+      "2021-09-21 21:24:33,717 epoch 1 - iter 35/73 - loss 0.55570869 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 21:24:34,040 epoch 1 - iter 42/73 - loss 0.52623001 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 21:24:34,359 epoch 1 - iter 49/73 - loss 0.51952403 - samples/sec: 21.96 - lr: 0.020000\n",
+      "2021-09-21 21:24:34,679 epoch 1 - iter 56/73 - loss 0.52797136 - samples/sec: 21.94 - lr: 0.020000\n",
+      "2021-09-21 21:24:35,062 epoch 1 - iter 63/73 - loss 0.56485122 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 21:24:35,490 epoch 1 - iter 70/73 - loss 0.57830138 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 21:24:35,698 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:35,699 EPOCH 1 done: loss 0.5639 - lr 0.0200000\n",
+      "2021-09-21 21:24:35,938 DEV : loss 0.5643942356109619 - score 0.125\n",
+      "2021-09-21 21:24:35,939 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:37:04,808 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:05,207 epoch 3 - iter 7/73 - loss 0.73432084 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,592 epoch 3 - iter 14/73 - loss 0.75879261 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,970 epoch 3 - iter 21/73 - loss 0.75630089 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,329 epoch 3 - iter 28/73 - loss 0.74738824 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,706 epoch 3 - iter 35/73 - loss 0.73911303 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,073 epoch 3 - iter 42/73 - loss 0.73169225 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,421 epoch 3 - iter 49/73 - loss 0.71890284 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,786 epoch 3 - iter 56/73 - loss 0.72207902 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,145 epoch 3 - iter 63/73 - loss 0.71165398 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,498 epoch 3 - iter 70/73 - loss 0.70615169 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:08,653 EPOCH 3 done: loss 0.7050 - lr 0.0200000\n",
-      "2021-09-08 01:37:08,829 DEV : loss 0.4977949857711792 - score 0.125\n",
-      "2021-09-08 01:37:08,830 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:24:40,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:40,416 epoch 2 - iter 7/73 - loss 0.89779369 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 21:24:40,743 epoch 2 - iter 14/73 - loss 0.77142232 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 21:24:41,066 epoch 2 - iter 21/73 - loss 0.74842360 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 21:24:41,395 epoch 2 - iter 28/73 - loss 0.73275727 - samples/sec: 21.30 - lr: 0.020000\n",
+      "2021-09-21 21:24:41,719 epoch 2 - iter 35/73 - loss 0.72049182 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 21:24:42,042 epoch 2 - iter 42/73 - loss 0.70981527 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 21:24:42,367 epoch 2 - iter 49/73 - loss 0.70042421 - samples/sec: 21.54 - lr: 0.020000\n",
+      "2021-09-21 21:24:42,754 epoch 2 - iter 56/73 - loss 0.70055135 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 21:24:43,194 epoch 2 - iter 63/73 - loss 0.69606855 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 21:24:43,618 epoch 2 - iter 70/73 - loss 0.69088519 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:24:43,806 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:43,806 EPOCH 2 done: loss 0.6889 - lr 0.0200000\n",
+      "2021-09-21 21:24:44,035 DEV : loss 0.43145865201950073 - score 0.25\n",
+      "2021-09-21 21:24:44,035 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:37:12,886 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:13,254 epoch 4 - iter 7/73 - loss 0.60072394 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 01:37:13,622 epoch 4 - iter 14/73 - loss 0.61923205 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:37:13,995 epoch 4 - iter 21/73 - loss 0.64379448 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 01:37:14,342 epoch 4 - iter 28/73 - loss 0.64346946 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 01:37:14,646 epoch 4 - iter 35/73 - loss 0.63443113 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 01:37:14,951 epoch 4 - iter 42/73 - loss 0.63936951 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:37:15,259 epoch 4 - iter 49/73 - loss 0.64180236 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:37:15,561 epoch 4 - iter 56/73 - loss 0.64904413 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 01:37:15,878 epoch 4 - iter 63/73 - loss 0.64494424 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:37:16,195 epoch 4 - iter 70/73 - loss 0.64371044 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 01:37:16,331 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:16,331 EPOCH 4 done: loss 0.6424 - lr 0.0200000\n",
-      "2021-09-08 01:37:16,555 DEV : loss 0.47742918133735657 - score 0.125\n",
-      "2021-09-08 01:37:16,556 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:37:24,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:25,206 epoch 5 - iter 7/73 - loss 0.66405131 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 01:37:25,515 epoch 5 - iter 14/73 - loss 0.66490327 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:37:25,817 epoch 5 - iter 21/73 - loss 0.65510884 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:37:26,117 epoch 5 - iter 28/73 - loss 0.65316595 - samples/sec: 23.37 - lr: 0.020000\n",
-      "2021-09-08 01:37:26,421 epoch 5 - iter 35/73 - loss 0.65275325 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:37:26,726 epoch 5 - iter 42/73 - loss 0.65526504 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:37:27,028 epoch 5 - iter 49/73 - loss 0.65545089 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:37:27,330 epoch 5 - iter 56/73 - loss 0.65452235 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 01:37:27,638 epoch 5 - iter 63/73 - loss 0.65440420 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:37:27,939 epoch 5 - iter 70/73 - loss 0.65174983 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:37:28,069 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:28,070 EPOCH 5 done: loss 0.6528 - lr 0.0200000\n",
-      "2021-09-08 01:37:28,197 DEV : loss 0.475519061088562 - score 0.0\n",
-      "2021-09-08 01:37:28,197 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:37:28,199 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:28,515 epoch 6 - iter 7/73 - loss 0.67039237 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:37:28,828 epoch 6 - iter 14/73 - loss 0.66865754 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 01:37:29,147 epoch 6 - iter 21/73 - loss 0.65602912 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 01:37:29,463 epoch 6 - iter 28/73 - loss 0.65383137 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 01:37:29,784 epoch 6 - iter 35/73 - loss 0.65251558 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:37:30,108 epoch 6 - iter 42/73 - loss 0.65112994 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 01:37:30,432 epoch 6 - iter 49/73 - loss 0.65458995 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 01:37:30,749 epoch 6 - iter 56/73 - loss 0.64673975 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:37:31,068 epoch 6 - iter 63/73 - loss 0.64773465 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 01:37:31,389 epoch 6 - iter 70/73 - loss 0.64397361 - samples/sec: 21.89 - lr: 0.020000\n"
+      "2021-09-21 21:24:53,343 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:53,682 epoch 3 - iter 7/73 - loss 0.72794563 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 21:24:54,002 epoch 3 - iter 14/73 - loss 0.70026405 - samples/sec: 21.95 - lr: 0.020000\n",
+      "2021-09-21 21:24:54,317 epoch 3 - iter 21/73 - loss 0.68830445 - samples/sec: 22.21 - lr: 0.020000\n",
+      "2021-09-21 21:24:54,623 epoch 3 - iter 28/73 - loss 0.68536397 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 21:24:54,928 epoch 3 - iter 35/73 - loss 0.67779317 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 21:24:55,249 epoch 3 - iter 42/73 - loss 0.67167253 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 21:24:55,646 epoch 3 - iter 49/73 - loss 0.66050629 - samples/sec: 17.69 - lr: 0.020000\n",
+      "2021-09-21 21:24:56,049 epoch 3 - iter 56/73 - loss 0.65937814 - samples/sec: 17.38 - lr: 0.020000\n",
+      "2021-09-21 21:24:56,446 epoch 3 - iter 63/73 - loss 0.65654600 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:24:56,885 epoch 3 - iter 70/73 - loss 0.65785544 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:24:57,047 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:57,047 EPOCH 3 done: loss 0.6577 - lr 0.0200000\n",
+      "2021-09-21 21:24:57,290 DEV : loss 0.4548851251602173 - score 0.125\n",
+      "2021-09-21 21:24:57,291 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:24:57,293 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:57,715 epoch 4 - iter 7/73 - loss 0.68136258 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 21:24:58,094 epoch 4 - iter 14/73 - loss 0.66028830 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 21:24:58,489 epoch 4 - iter 21/73 - loss 0.65385871 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 21:24:58,881 epoch 4 - iter 28/73 - loss 0.64126066 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 21:24:59,281 epoch 4 - iter 35/73 - loss 0.64710904 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 21:24:59,667 epoch 4 - iter 42/73 - loss 0.64625522 - samples/sec: 18.17 - lr: 0.020000\n",
+      "2021-09-21 21:25:00,055 epoch 4 - iter 49/73 - loss 0.64696444 - samples/sec: 18.06 - lr: 0.020000\n",
+      "2021-09-21 21:25:00,422 epoch 4 - iter 56/73 - loss 0.64606383 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 21:25:00,809 epoch 4 - iter 63/73 - loss 0.64359474 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 21:25:01,252 epoch 4 - iter 70/73 - loss 0.64526469 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 21:25:01,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:01,428 EPOCH 4 done: loss 0.6453 - lr 0.0200000\n",
+      "2021-09-21 21:25:01,635 DEV : loss 0.4095485210418701 - score 0.125\n",
+      "2021-09-21 21:25:01,635 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:25:01,637 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:02,094 epoch 5 - iter 7/73 - loss 0.64321974 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 21:25:02,481 epoch 5 - iter 14/73 - loss 0.65389643 - samples/sec: 18.12 - lr: 0.020000\n",
+      "2021-09-21 21:25:02,909 epoch 5 - iter 21/73 - loss 0.64755077 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 21:25:03,296 epoch 5 - iter 28/73 - loss 0.66439183 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 21:25:03,703 epoch 5 - iter 35/73 - loss 0.66541375 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 21:25:04,090 epoch 5 - iter 42/73 - loss 0.66291380 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 21:25:04,464 epoch 5 - iter 49/73 - loss 0.66218646 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 21:25:04,852 epoch 5 - iter 56/73 - loss 0.66127242 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 21:25:05,249 epoch 5 - iter 63/73 - loss 0.65998583 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 21:25:05,647 epoch 5 - iter 70/73 - loss 0.66117906 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 21:25:05,797 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:05,798 EPOCH 5 done: loss 0.6587 - lr 0.0200000\n",
+      "2021-09-21 21:25:06,011 DEV : loss 0.48157185316085815 - score 0.0\n",
+      "2021-09-21 21:25:06,011 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:25:06,013 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:06,451 epoch 6 - iter 7/73 - loss 0.66738948 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 21:25:06,858 epoch 6 - iter 14/73 - loss 0.66554035 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:25:07,258 epoch 6 - iter 21/73 - loss 0.65220175 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 21:25:07,628 epoch 6 - iter 28/73 - loss 0.66357571 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,021 epoch 6 - iter 35/73 - loss 0.66019049 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,410 epoch 6 - iter 42/73 - loss 0.65569606 - samples/sec: 18.05 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,803 epoch 6 - iter 49/73 - loss 0.65515326 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:25:09,180 epoch 6 - iter 56/73 - loss 0.65026907 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 21:25:09,575 epoch 6 - iter 63/73 - loss 0.64982278 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:25:09,964 epoch 6 - iter 70/73 - loss 0.64764972 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 21:25:10,137 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:31,527 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:31,527 EPOCH 6 done: loss 0.6456 - lr 0.0200000\n",
-      "2021-09-08 01:37:31,688 DEV : loss 0.4331866502761841 - score 0.0\n",
-      "2021-09-08 01:37:31,689 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:37:31,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:32,048 epoch 7 - iter 7/73 - loss 0.68721094 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 01:37:32,396 epoch 7 - iter 14/73 - loss 0.66096921 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 01:37:32,747 epoch 7 - iter 21/73 - loss 0.65852905 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 01:37:33,095 epoch 7 - iter 28/73 - loss 0.66232742 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 01:37:33,433 epoch 7 - iter 35/73 - loss 0.66859616 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:37:33,772 epoch 7 - iter 42/73 - loss 0.68029982 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 01:37:34,102 epoch 7 - iter 49/73 - loss 0.67500708 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:37:34,436 epoch 7 - iter 56/73 - loss 0.66989776 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 01:37:34,758 epoch 7 - iter 63/73 - loss 0.66686697 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 01:37:35,105 epoch 7 - iter 70/73 - loss 0.66105312 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 01:37:35,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:35,255 EPOCH 7 done: loss 0.6631 - lr 0.0200000\n",
-      "2021-09-08 01:37:35,417 DEV : loss 0.4871489703655243 - score 0.0\n",
-      "2021-09-08 01:37:35,418 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:37:35,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:35,783 epoch 8 - iter 7/73 - loss 0.61030952 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 01:37:36,103 epoch 8 - iter 14/73 - loss 0.59504198 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 01:37:36,443 epoch 8 - iter 21/73 - loss 0.60371654 - samples/sec: 20.60 - lr: 0.020000\n",
-      "2021-09-08 01:37:36,781 epoch 8 - iter 28/73 - loss 0.61834092 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 01:37:37,119 epoch 8 - iter 35/73 - loss 0.63479232 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 01:37:37,471 epoch 8 - iter 42/73 - loss 0.65595793 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:37:37,814 epoch 8 - iter 49/73 - loss 0.64644500 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 01:37:38,152 epoch 8 - iter 56/73 - loss 0.66069557 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 01:37:38,497 epoch 8 - iter 63/73 - loss 0.65418725 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 01:37:38,840 epoch 8 - iter 70/73 - loss 0.64977826 - samples/sec: 20.45 - lr: 0.020000\n",
-      "2021-09-08 01:37:38,990 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:38,990 EPOCH 8 done: loss 0.6523 - lr 0.0200000\n",
-      "2021-09-08 01:37:39,150 DEV : loss 0.4803068935871124 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:37:39,150 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:37:39,152 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:39,511 epoch 9 - iter 7/73 - loss 0.50174062 - samples/sec: 20.60 - lr: 0.010000\n",
-      "2021-09-08 01:37:39,856 epoch 9 - iter 14/73 - loss 0.53423765 - samples/sec: 20.33 - lr: 0.010000\n",
-      "2021-09-08 01:37:40,200 epoch 9 - iter 21/73 - loss 0.57235479 - samples/sec: 20.40 - lr: 0.010000\n",
-      "2021-09-08 01:37:40,544 epoch 9 - iter 28/73 - loss 0.60279159 - samples/sec: 20.38 - lr: 0.010000\n",
-      "2021-09-08 01:37:40,892 epoch 9 - iter 35/73 - loss 0.60034533 - samples/sec: 20.18 - lr: 0.010000\n",
-      "2021-09-08 01:37:41,235 epoch 9 - iter 42/73 - loss 0.57295466 - samples/sec: 20.42 - lr: 0.010000\n",
-      "2021-09-08 01:37:41,579 epoch 9 - iter 49/73 - loss 0.56794792 - samples/sec: 20.44 - lr: 0.010000\n",
-      "2021-09-08 01:37:41,926 epoch 9 - iter 56/73 - loss 0.55874092 - samples/sec: 20.19 - lr: 0.010000\n",
-      "2021-09-08 01:37:42,269 epoch 9 - iter 63/73 - loss 0.57391341 - samples/sec: 20.48 - lr: 0.010000\n",
-      "2021-09-08 01:37:42,615 epoch 9 - iter 70/73 - loss 0.56662729 - samples/sec: 20.27 - lr: 0.010000\n",
-      "2021-09-08 01:37:42,764 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:42,765 EPOCH 9 done: loss 0.5609 - lr 0.0100000\n",
-      "2021-09-08 01:37:42,933 DEV : loss 0.5747549533843994 - score 0.0\n",
-      "2021-09-08 01:37:42,933 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:37:42,935 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:43,303 epoch 10 - iter 7/73 - loss 0.53211733 - samples/sec: 20.14 - lr: 0.010000\n",
-      "2021-09-08 01:37:43,642 epoch 10 - iter 14/73 - loss 0.55803193 - samples/sec: 20.69 - lr: 0.010000\n",
-      "2021-09-08 01:37:43,975 epoch 10 - iter 21/73 - loss 0.48837573 - samples/sec: 21.10 - lr: 0.010000\n",
-      "2021-09-08 01:37:44,301 epoch 10 - iter 28/73 - loss 0.52733300 - samples/sec: 21.49 - lr: 0.010000\n",
-      "2021-09-08 01:37:44,626 epoch 10 - iter 35/73 - loss 0.50669924 - samples/sec: 21.60 - lr: 0.010000\n",
-      "2021-09-08 01:37:44,950 epoch 10 - iter 42/73 - loss 0.51575765 - samples/sec: 21.65 - lr: 0.010000\n",
-      "2021-09-08 01:37:45,278 epoch 10 - iter 49/73 - loss 0.51735241 - samples/sec: 21.39 - lr: 0.010000\n",
-      "2021-09-08 01:37:45,602 epoch 10 - iter 56/73 - loss 0.53410097 - samples/sec: 21.66 - lr: 0.010000\n",
-      "2021-09-08 01:37:45,931 epoch 10 - iter 63/73 - loss 0.53267756 - samples/sec: 21.31 - lr: 0.010000\n",
-      "2021-09-08 01:37:46,343 epoch 10 - iter 70/73 - loss 0.52615961 - samples/sec: 17.03 - lr: 0.010000\n",
-      "2021-09-08 01:37:46,519 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:46,519 EPOCH 10 done: loss 0.5288 - lr 0.0100000\n",
-      "2021-09-08 01:37:46,662 DEV : loss 0.48601672053337097 - score 0.0\n",
-      "2021-09-08 01:37:46,663 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:37:50,910 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:50,911 Testing using best model ...\n",
-      "2021-09-08 01:37:50,935 loading file None/best-model.pt\n",
+      "2021-09-21 21:25:10,137 EPOCH 6 done: loss 0.6470 - lr 0.0200000\n",
+      "2021-09-21 21:25:10,328 DEV : loss 0.4679498076438904 - score 0.0\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:25:10,329 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:25:10,331 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:10,752 epoch 7 - iter 7/73 - loss 0.63708541 - samples/sec: 17.63 - lr: 0.010000\n",
+      "2021-09-21 21:25:11,172 epoch 7 - iter 14/73 - loss 0.63185867 - samples/sec: 16.71 - lr: 0.010000\n",
+      "2021-09-21 21:25:11,561 epoch 7 - iter 21/73 - loss 0.63237265 - samples/sec: 18.01 - lr: 0.010000\n",
+      "2021-09-21 21:25:11,970 epoch 7 - iter 28/73 - loss 0.63006329 - samples/sec: 17.14 - lr: 0.010000\n",
+      "2021-09-21 21:25:12,350 epoch 7 - iter 35/73 - loss 0.63880245 - samples/sec: 18.47 - lr: 0.010000\n",
+      "2021-09-21 21:25:12,705 epoch 7 - iter 42/73 - loss 0.64084282 - samples/sec: 19.74 - lr: 0.010000\n",
+      "2021-09-21 21:25:13,014 epoch 7 - iter 49/73 - loss 0.64073641 - samples/sec: 22.71 - lr: 0.010000\n",
+      "2021-09-21 21:25:13,320 epoch 7 - iter 56/73 - loss 0.64022312 - samples/sec: 22.87 - lr: 0.010000\n",
+      "2021-09-21 21:25:13,620 epoch 7 - iter 63/73 - loss 0.64000373 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 21:25:13,923 epoch 7 - iter 70/73 - loss 0.64164395 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 21:25:14,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:14,053 EPOCH 7 done: loss 0.6397 - lr 0.0100000\n",
+      "2021-09-21 21:25:14,272 DEV : loss 0.472800076007843 - score 0.0\n",
+      "2021-09-21 21:25:14,273 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:25:14,349 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:14,670 epoch 8 - iter 7/73 - loss 0.64717945 - samples/sec: 22.80 - lr: 0.010000\n",
+      "2021-09-21 21:25:14,981 epoch 8 - iter 14/73 - loss 0.63502289 - samples/sec: 22.54 - lr: 0.010000\n",
+      "2021-09-21 21:25:15,300 epoch 8 - iter 21/73 - loss 0.63187285 - samples/sec: 22.00 - lr: 0.010000\n",
+      "2021-09-21 21:25:15,614 epoch 8 - iter 28/73 - loss 0.63009570 - samples/sec: 22.36 - lr: 0.010000\n",
+      "2021-09-21 21:25:15,926 epoch 8 - iter 35/73 - loss 0.63207512 - samples/sec: 22.45 - lr: 0.010000\n",
+      "2021-09-21 21:25:16,233 epoch 8 - iter 42/73 - loss 0.63476508 - samples/sec: 22.87 - lr: 0.010000\n",
+      "2021-09-21 21:25:16,535 epoch 8 - iter 49/73 - loss 0.63432678 - samples/sec: 23.22 - lr: 0.010000\n",
+      "2021-09-21 21:25:16,837 epoch 8 - iter 56/73 - loss 0.63192881 - samples/sec: 23.29 - lr: 0.010000\n",
+      "2021-09-21 21:25:17,195 epoch 8 - iter 63/73 - loss 0.63053877 - samples/sec: 19.56 - lr: 0.010000\n",
+      "2021-09-21 21:25:17,584 epoch 8 - iter 70/73 - loss 0.62781163 - samples/sec: 18.02 - lr: 0.010000\n",
+      "2021-09-21 21:25:17,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:17,763 EPOCH 8 done: loss 0.6285 - lr 0.0100000\n",
+      "2021-09-21 21:25:17,994 DEV : loss 0.4771851897239685 - score 0.0\n",
+      "2021-09-21 21:25:17,994 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:25:17,996 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:18,431 epoch 9 - iter 7/73 - loss 0.65236766 - samples/sec: 17.27 - lr: 0.010000\n",
+      "2021-09-21 21:25:18,854 epoch 9 - iter 14/73 - loss 0.64906346 - samples/sec: 16.56 - lr: 0.010000\n",
+      "2021-09-21 21:25:19,247 epoch 9 - iter 21/73 - loss 0.64942538 - samples/sec: 17.85 - lr: 0.010000\n",
+      "2021-09-21 21:25:19,665 epoch 9 - iter 28/73 - loss 0.64604598 - samples/sec: 16.77 - lr: 0.010000\n",
+      "2021-09-21 21:25:20,043 epoch 9 - iter 35/73 - loss 0.64644902 - samples/sec: 18.55 - lr: 0.010000\n",
+      "2021-09-21 21:25:20,420 epoch 9 - iter 42/73 - loss 0.64672647 - samples/sec: 18.61 - lr: 0.010000\n",
+      "2021-09-21 21:25:20,809 epoch 9 - iter 49/73 - loss 0.64792109 - samples/sec: 18.04 - lr: 0.010000\n",
+      "2021-09-21 21:25:21,183 epoch 9 - iter 56/73 - loss 0.64482397 - samples/sec: 18.72 - lr: 0.010000\n",
+      "2021-09-21 21:25:21,566 epoch 9 - iter 63/73 - loss 0.64305638 - samples/sec: 18.31 - lr: 0.010000\n",
+      "2021-09-21 21:25:21,953 epoch 9 - iter 70/73 - loss 0.64347304 - samples/sec: 18.12 - lr: 0.010000\n",
+      "2021-09-21 21:25:22,131 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:22,132 EPOCH 9 done: loss 0.6423 - lr 0.0100000\n",
+      "2021-09-21 21:25:22,337 DEV : loss 0.4662077724933624 - score 0.0\n",
+      "2021-09-21 21:25:22,337 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:25:22,339 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:22,800 epoch 10 - iter 7/73 - loss 0.62694217 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 21:25:23,215 epoch 10 - iter 14/73 - loss 0.62627828 - samples/sec: 16.86 - lr: 0.010000\n",
+      "2021-09-21 21:25:23,617 epoch 10 - iter 21/73 - loss 0.62771467 - samples/sec: 17.48 - lr: 0.010000\n",
+      "2021-09-21 21:25:23,924 epoch 10 - iter 28/73 - loss 0.62958723 - samples/sec: 22.83 - lr: 0.010000\n",
+      "2021-09-21 21:25:24,232 epoch 10 - iter 35/73 - loss 0.63155815 - samples/sec: 22.72 - lr: 0.010000\n",
+      "2021-09-21 21:25:24,548 epoch 10 - iter 42/73 - loss 0.63381940 - samples/sec: 22.22 - lr: 0.010000\n",
+      "2021-09-21 21:25:24,865 epoch 10 - iter 49/73 - loss 0.63804812 - samples/sec: 22.11 - lr: 0.010000\n",
+      "2021-09-21 21:25:25,169 epoch 10 - iter 56/73 - loss 0.64153528 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 21:25:25,469 epoch 10 - iter 63/73 - loss 0.64004214 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 21:25:25,774 epoch 10 - iter 70/73 - loss 0.63974548 - samples/sec: 23.03 - lr: 0.010000\n",
+      "2021-09-21 21:25:25,903 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:25,904 EPOCH 10 done: loss 0.6388 - lr 0.0100000\n",
+      "2021-09-21 21:25:26,148 DEV : loss 0.4685164988040924 - score 0.0\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:25:26,149 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:25:30,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:30,542 Testing using best model ...\n",
+      "2021-09-21 21:25:30,544 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:38:03,086 \t0.0\n",
-      "2021-09-08 01:38:03,087 \n",
+      "2021-09-21 21:25:36,946 \t0.0\n",
+      "2021-09-21 21:25:36,946 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -5126,14 +5137,14 @@
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      "     this text expresses joy     0.0000    0.0000    0.0000         1\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         2\n",
       "this text expresses surprise     0.0000    0.0000    0.0000         1\n",
-      "    this text expresses love     0.0000    0.0000    0.0000         2\n",
-      " this text expresses disgust     0.0000    0.0000    0.0000         0\n",
-      "    this text expresses fear     0.0000    0.0000    0.0000         3\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
+      "    this text expresses love     0.0000    0.0000    0.0000         1\n",
+      " this text expresses disgust     0.0000    0.0000    0.0000         2\n",
+      "    this text expresses fear     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
       "   this text expresses guilt     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses shame     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses shame     0.0000    0.0000    0.0000         1\n",
       " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
       "\n",
       "                   micro avg     0.0000    0.0000    0.0000         9\n",
@@ -5141,39 +5152,26 @@
       "                weighted avg     0.0000    0.0000    0.0000         9\n",
       "                 samples avg     0.0000    0.0000    0.0000         9\n",
       "\n",
-      "2021-09-08 01:38:03,088 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:53,699 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:25:36,947 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:46,137 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:38:58,011 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 46972.54it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:38:58,014 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n"
+      "2021-09-21 21:26:50,285 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 82/82 [00:00<00:00, 46433.50it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:58,738 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,740 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:26:50,288 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness']\n",
+      "2021-09-21 21:26:50,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:50,301 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5486,190 +5484,203 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:58,740 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,740 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:38:58,741 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,741 Parameters:\n",
-      "2021-09-08 01:38:58,741  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:38:58,742  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:38:58,742  - patience: \"3\"\n",
-      "2021-09-08 01:38:58,742  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:38:58,742  - max_epochs: \"10\"\n",
-      "2021-09-08 01:38:58,743  - shuffle: \"True\"\n",
-      "2021-09-08 01:38:58,743  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:38:58,743  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:38:58,744 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,744 Model training base path: \"None\"\n",
-      "2021-09-08 01:38:58,744 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,744 Device: cuda:1\n",
-      "2021-09-08 01:38:58,745 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,745 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:38:58,754 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:59,106 epoch 1 - iter 7/73 - loss 0.25370236 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,504 epoch 1 - iter 14/73 - loss 0.47778849 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,871 epoch 1 - iter 21/73 - loss 0.44134775 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 01:39:00,208 epoch 1 - iter 28/73 - loss 0.50302666 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 01:39:00,550 epoch 1 - iter 35/73 - loss 0.45045414 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 01:39:00,891 epoch 1 - iter 42/73 - loss 0.50504821 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 01:39:01,214 epoch 1 - iter 49/73 - loss 0.51663693 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 01:39:01,541 epoch 1 - iter 56/73 - loss 0.52306179 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 01:39:01,888 epoch 1 - iter 63/73 - loss 0.54061365 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:39:02,218 epoch 1 - iter 70/73 - loss 0.52946169 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 01:39:02,359 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:02,359 EPOCH 1 done: loss 0.5194 - lr 0.0200000\n",
-      "2021-09-08 01:39:02,590 DEV : loss 0.7571791410446167 - score 0.125\n",
-      "2021-09-08 01:39:02,590 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:26:50,301 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:50,302 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:26:50,302 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:50,302 Parameters:\n",
+      "2021-09-21 21:26:50,302  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:26:50,303  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:26:50,303  - patience: \"3\"\n",
+      "2021-09-21 21:26:50,303  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:26:50,304  - max_epochs: \"10\"\n",
+      "2021-09-21 21:26:50,304  - shuffle: \"True\"\n",
+      "2021-09-21 21:26:50,304  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:26:50,304  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:26:50,305 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:50,305 Model training base path: \"None\"\n",
+      "2021-09-21 21:26:50,305 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:50,306 Device: cuda:0\n",
+      "2021-09-21 21:26:50,306 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:50,306 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:26:50,313 ----------------------------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:26:50,733 epoch 1 - iter 7/73 - loss 0.19651182 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 21:26:51,140 epoch 1 - iter 14/73 - loss 0.48059402 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:26:51,561 epoch 1 - iter 21/73 - loss 0.49245556 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 21:26:51,973 epoch 1 - iter 28/73 - loss 0.48690660 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 21:26:52,402 epoch 1 - iter 35/73 - loss 0.56832949 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 21:26:52,854 epoch 1 - iter 42/73 - loss 0.56880989 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 21:26:53,273 epoch 1 - iter 49/73 - loss 0.55766894 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 21:26:53,697 epoch 1 - iter 56/73 - loss 0.56517452 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 21:26:54,127 epoch 1 - iter 63/73 - loss 0.60745058 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 21:26:54,554 epoch 1 - iter 70/73 - loss 0.58327295 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 21:26:54,738 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:54,739 EPOCH 1 done: loss 0.5633 - lr 0.0200000\n",
+      "2021-09-21 21:26:55,040 DEV : loss 1.0621553659439087 - score 0.25\n",
+      "2021-09-21 21:26:55,041 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:39:07,451 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:07,824 epoch 2 - iter 7/73 - loss 1.14044368 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 01:39:08,156 epoch 2 - iter 14/73 - loss 0.85931114 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:39:08,490 epoch 2 - iter 21/73 - loss 0.86647334 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 01:39:08,821 epoch 2 - iter 28/73 - loss 0.83428743 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 01:39:09,134 epoch 2 - iter 35/73 - loss 0.79448490 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 01:39:09,444 epoch 2 - iter 42/73 - loss 0.77105481 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 01:39:09,753 epoch 2 - iter 49/73 - loss 0.75118223 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:39:10,062 epoch 2 - iter 56/73 - loss 0.73705973 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:39:10,367 epoch 2 - iter 63/73 - loss 0.72547731 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:39:10,674 epoch 2 - iter 70/73 - loss 0.71743479 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:39:10,807 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:10,808 EPOCH 2 done: loss 0.7149 - lr 0.0200000\n",
-      "2021-09-08 01:39:11,534 DEV : loss 0.3682263195514679 - score 0.125\n",
-      "2021-09-08 01:39:11,536 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:26:58,924 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:59,431 epoch 2 - iter 7/73 - loss 1.12987294 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 21:26:59,833 epoch 2 - iter 14/73 - loss 0.93119148 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:27:00,265 epoch 2 - iter 21/73 - loss 0.82587740 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:27:00,683 epoch 2 - iter 28/73 - loss 0.78138213 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 21:27:01,096 epoch 2 - iter 35/73 - loss 0.76127366 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 21:27:01,517 epoch 2 - iter 42/73 - loss 0.74675402 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 21:27:01,913 epoch 2 - iter 49/73 - loss 0.73351024 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:27:02,323 epoch 2 - iter 56/73 - loss 0.72443425 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 21:27:02,695 epoch 2 - iter 63/73 - loss 0.71518154 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:27:03,082 epoch 2 - iter 70/73 - loss 0.70678454 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 21:27:03,211 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:03,211 EPOCH 2 done: loss 0.7090 - lr 0.0200000\n",
+      "2021-09-21 21:27:03,379 DEV : loss 0.46807441115379333 - score 0.25\n",
+      "2021-09-21 21:27:03,380 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:39:15,907 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:16,251 epoch 3 - iter 7/73 - loss 0.64082874 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 01:39:16,558 epoch 3 - iter 14/73 - loss 0.65085692 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:39:16,861 epoch 3 - iter 21/73 - loss 0.64894274 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:39:17,174 epoch 3 - iter 28/73 - loss 0.63950856 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 01:39:17,494 epoch 3 - iter 35/73 - loss 0.64181722 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 01:39:17,808 epoch 3 - iter 42/73 - loss 0.64434138 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 01:39:18,111 epoch 3 - iter 49/73 - loss 0.64232861 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 01:39:18,415 epoch 3 - iter 56/73 - loss 0.64507027 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:39:18,717 epoch 3 - iter 63/73 - loss 0.64313232 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,028 epoch 3 - iter 70/73 - loss 0.64473548 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,164 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:19,165 EPOCH 3 done: loss 0.6460 - lr 0.0200000\n",
-      "2021-09-08 01:39:19,299 DEV : loss 0.4451117515563965 - score 0.125\n",
-      "2021-09-08 01:39:19,300 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:39:19,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:19,619 epoch 4 - iter 7/73 - loss 0.64357607 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,921 epoch 4 - iter 14/73 - loss 0.65336453 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,226 epoch 4 - iter 21/73 - loss 0.65937691 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,528 epoch 4 - iter 28/73 - loss 0.66607002 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,832 epoch 4 - iter 35/73 - loss 0.66108753 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:39:21,134 epoch 4 - iter 42/73 - loss 0.66028251 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:39:21,435 epoch 4 - iter 49/73 - loss 0.65780865 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 01:39:21,740 epoch 4 - iter 56/73 - loss 0.65557279 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:39:22,047 epoch 4 - iter 63/73 - loss 0.65057341 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:39:22,350 epoch 4 - iter 70/73 - loss 0.65274459 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 01:39:22,483 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:22,483 EPOCH 4 done: loss 0.6525 - lr 0.0200000\n",
-      "2021-09-08 01:39:22,613 DEV : loss 0.4503408968448639 - score 0.125\n",
-      "2021-09-08 01:39:22,614 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:39:22,616 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:22,931 epoch 5 - iter 7/73 - loss 0.64499399 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:39:23,235 epoch 5 - iter 14/73 - loss 0.64278936 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:39:23,537 epoch 5 - iter 21/73 - loss 0.63397876 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:39:23,840 epoch 5 - iter 28/73 - loss 0.63777249 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,140 epoch 5 - iter 35/73 - loss 0.62789329 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,441 epoch 5 - iter 42/73 - loss 0.63407810 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,741 epoch 5 - iter 49/73 - loss 0.63325497 - samples/sec: 23.39 - lr: 0.020000\n",
-      "2021-09-08 01:39:25,045 epoch 5 - iter 56/73 - loss 0.63578221 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:39:25,346 epoch 5 - iter 63/73 - loss 0.63471274 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:39:25,647 epoch 5 - iter 70/73 - loss 0.63391561 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 01:39:25,781 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:27:07,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:07,693 epoch 3 - iter 7/73 - loss 0.64325751 - samples/sec: 18.28 - lr: 0.020000\n",
+      "2021-09-21 21:27:08,101 epoch 3 - iter 14/73 - loss 0.65705944 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:27:08,494 epoch 3 - iter 21/73 - loss 0.66417775 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 21:27:08,880 epoch 3 - iter 28/73 - loss 0.65650396 - samples/sec: 18.20 - lr: 0.020000\n",
+      "2021-09-21 21:27:09,298 epoch 3 - iter 35/73 - loss 0.65404588 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:27:09,691 epoch 3 - iter 42/73 - loss 0.65564157 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 21:27:10,090 epoch 3 - iter 49/73 - loss 0.65536002 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:27:10,466 epoch 3 - iter 56/73 - loss 0.65393854 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 21:27:10,850 epoch 3 - iter 63/73 - loss 0.65187317 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 21:27:11,242 epoch 3 - iter 70/73 - loss 0.65480871 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:27:11,400 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:11,400 EPOCH 3 done: loss 0.6518 - lr 0.0200000\n",
+      "2021-09-21 21:27:11,690 DEV : loss 0.4159531593322754 - score 0.0\n",
+      "2021-09-21 21:27:11,691 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:27:11,693 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:12,131 epoch 4 - iter 7/73 - loss 0.66650241 - samples/sec: 18.74 - lr: 0.020000\n",
+      "2021-09-21 21:27:12,538 epoch 4 - iter 14/73 - loss 0.64935444 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:27:12,945 epoch 4 - iter 21/73 - loss 0.65422613 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 21:27:13,291 epoch 4 - iter 28/73 - loss 0.65840337 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 21:27:13,594 epoch 4 - iter 35/73 - loss 0.65192414 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 21:27:13,899 epoch 4 - iter 42/73 - loss 0.65425631 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 21:27:14,203 epoch 4 - iter 49/73 - loss 0.64894076 - samples/sec: 23.02 - lr: 0.020000\n",
+      "2021-09-21 21:27:14,504 epoch 4 - iter 56/73 - loss 0.64733611 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:27:14,802 epoch 4 - iter 63/73 - loss 0.65041987 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 21:27:15,104 epoch 4 - iter 70/73 - loss 0.65217523 - samples/sec: 23.25 - lr: 0.020000\n",
+      "2021-09-21 21:27:15,235 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:15,235 EPOCH 4 done: loss 0.6512 - lr 0.0200000\n",
+      "2021-09-21 21:27:15,505 DEV : loss 0.43761467933654785 - score 0.0\n",
+      "2021-09-21 21:27:15,505 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:27:15,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:15,907 epoch 5 - iter 7/73 - loss 0.65452113 - samples/sec: 23.26 - lr: 0.020000\n",
+      "2021-09-21 21:27:16,206 epoch 5 - iter 14/73 - loss 0.65213251 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 21:27:16,506 epoch 5 - iter 21/73 - loss 0.64243554 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 21:27:16,808 epoch 5 - iter 28/73 - loss 0.63190151 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 21:27:17,109 epoch 5 - iter 35/73 - loss 0.65203007 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:27:17,426 epoch 5 - iter 42/73 - loss 0.65125742 - samples/sec: 22.13 - lr: 0.020000\n",
+      "2021-09-21 21:27:17,755 epoch 5 - iter 49/73 - loss 0.64889420 - samples/sec: 21.32 - lr: 0.020000\n",
+      "2021-09-21 21:27:18,082 epoch 5 - iter 56/73 - loss 0.64514526 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 21:27:18,388 epoch 5 - iter 63/73 - loss 0.64338578 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 21:27:18,688 epoch 5 - iter 70/73 - loss 0.64189369 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 21:27:18,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:18,818 EPOCH 5 done: loss 0.6460 - lr 0.0200000\n",
+      "2021-09-21 21:27:18,950 DEV : loss 0.49936750531196594 - score 0.0\n",
+      "2021-09-21 21:27:18,950 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:27:18,952 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:19,267 epoch 6 - iter 7/73 - loss 0.63788287 - samples/sec: 23.29 - lr: 0.020000\n",
+      "2021-09-21 21:27:19,568 epoch 6 - iter 14/73 - loss 0.65559740 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:27:19,866 epoch 6 - iter 21/73 - loss 0.67021943 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 21:27:20,166 epoch 6 - iter 28/73 - loss 0.66279025 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 21:27:20,464 epoch 6 - iter 35/73 - loss 0.66437220 - samples/sec: 23.55 - lr: 0.020000\n",
+      "2021-09-21 21:27:20,761 epoch 6 - iter 42/73 - loss 0.66415245 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 21:27:21,062 epoch 6 - iter 49/73 - loss 0.66413641 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 21:27:21,360 epoch 6 - iter 56/73 - loss 0.66115159 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 21:27:21,658 epoch 6 - iter 63/73 - loss 0.65762305 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 21:27:21,981 epoch 6 - iter 70/73 - loss 0.65314137 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 21:27:22,145 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:39:25,781 EPOCH 5 done: loss 0.6369 - lr 0.0200000\n",
-      "2021-09-08 01:39:25,908 DEV : loss 0.4951401352882385 - score 0.125\n",
-      "2021-09-08 01:39:25,909 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:39:25,911 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:26,225 epoch 6 - iter 7/73 - loss 0.64270137 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 01:39:26,529 epoch 6 - iter 14/73 - loss 0.64764659 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:39:26,830 epoch 6 - iter 21/73 - loss 0.66620327 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 01:39:27,132 epoch 6 - iter 28/73 - loss 0.65934719 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 01:39:27,438 epoch 6 - iter 35/73 - loss 0.65773366 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:39:27,738 epoch 6 - iter 42/73 - loss 0.65274562 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 01:39:28,039 epoch 6 - iter 49/73 - loss 0.65254781 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:39:28,342 epoch 6 - iter 56/73 - loss 0.64970090 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 01:39:28,643 epoch 6 - iter 63/73 - loss 0.64599965 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:39:28,944 epoch 6 - iter 70/73 - loss 0.64586096 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 01:39:29,074 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:29,075 EPOCH 6 done: loss 0.6453 - lr 0.0200000\n",
-      "2021-09-08 01:39:29,204 DEV : loss 0.4758201241493225 - score 0.125\n",
+      "2021-09-21 21:27:22,145 EPOCH 6 done: loss 0.6532 - lr 0.0200000\n",
+      "2021-09-21 21:27:22,410 DEV : loss 0.47740793228149414 - score 0.0\n",
       "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:39:29,205 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:39:29,207 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:29,522 epoch 7 - iter 7/73 - loss 0.65151402 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:39:29,822 epoch 7 - iter 14/73 - loss 0.64004306 - samples/sec: 23.33 - lr: 0.010000\n",
-      "2021-09-08 01:39:30,126 epoch 7 - iter 21/73 - loss 0.63329774 - samples/sec: 23.09 - lr: 0.010000\n",
-      "2021-09-08 01:39:30,432 epoch 7 - iter 28/73 - loss 0.63390489 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 01:39:30,733 epoch 7 - iter 35/73 - loss 0.64459927 - samples/sec: 23.27 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,037 epoch 7 - iter 42/73 - loss 0.63977609 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,339 epoch 7 - iter 49/73 - loss 0.64198234 - samples/sec: 23.28 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,639 epoch 7 - iter 56/73 - loss 0.64115304 - samples/sec: 23.37 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,943 epoch 7 - iter 63/73 - loss 0.64357302 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 01:39:32,243 epoch 7 - iter 70/73 - loss 0.64815119 - samples/sec: 23.36 - lr: 0.010000\n",
-      "2021-09-08 01:39:32,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:32,374 EPOCH 7 done: loss 0.6472 - lr 0.0100000\n",
-      "2021-09-08 01:39:32,507 DEV : loss 0.4532211720943451 - score 0.125\n",
-      "2021-09-08 01:39:32,509 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:39:32,510 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:32,826 epoch 8 - iter 7/73 - loss 0.61927841 - samples/sec: 23.22 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,131 epoch 8 - iter 14/73 - loss 0.62935144 - samples/sec: 23.03 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,433 epoch 8 - iter 21/73 - loss 0.61854901 - samples/sec: 23.22 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,734 epoch 8 - iter 28/73 - loss 0.63000393 - samples/sec: 23.28 - lr: 0.010000\n",
-      "2021-09-08 01:39:34,039 epoch 8 - iter 35/73 - loss 0.62600347 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 01:39:34,339 epoch 8 - iter 42/73 - loss 0.62755076 - samples/sec: 23.37 - lr: 0.010000\n",
-      "2021-09-08 01:39:34,642 epoch 8 - iter 49/73 - loss 0.62836965 - samples/sec: 23.17 - lr: 0.010000\n",
-      "2021-09-08 01:39:34,951 epoch 8 - iter 56/73 - loss 0.63186978 - samples/sec: 22.71 - lr: 0.010000\n",
-      "2021-09-08 01:39:35,257 epoch 8 - iter 63/73 - loss 0.63089572 - samples/sec: 22.92 - lr: 0.010000\n",
-      "2021-09-08 01:39:35,560 epoch 8 - iter 70/73 - loss 0.63255918 - samples/sec: 23.12 - lr: 0.010000\n",
-      "2021-09-08 01:39:35,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:35,692 EPOCH 8 done: loss 0.6331 - lr 0.0100000\n",
-      "2021-09-08 01:39:35,915 DEV : loss 0.47033536434173584 - score 0.125\n",
-      "2021-09-08 01:39:35,916 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:39:35,987 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:36,305 epoch 9 - iter 7/73 - loss 0.64663512 - samples/sec: 23.03 - lr: 0.010000\n",
-      "2021-09-08 01:39:36,611 epoch 9 - iter 14/73 - loss 0.64737452 - samples/sec: 22.98 - lr: 0.010000\n",
-      "2021-09-08 01:39:36,914 epoch 9 - iter 21/73 - loss 0.64335661 - samples/sec: 23.11 - lr: 0.010000\n",
-      "2021-09-08 01:39:37,218 epoch 9 - iter 28/73 - loss 0.64147442 - samples/sec: 23.13 - lr: 0.010000\n",
-      "2021-09-08 01:39:37,525 epoch 9 - iter 35/73 - loss 0.64338393 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 01:39:37,828 epoch 9 - iter 42/73 - loss 0.64370806 - samples/sec: 23.11 - lr: 0.010000\n",
-      "2021-09-08 01:39:38,129 epoch 9 - iter 49/73 - loss 0.64704257 - samples/sec: 23.30 - lr: 0.010000\n",
-      "2021-09-08 01:39:38,434 epoch 9 - iter 56/73 - loss 0.64370555 - samples/sec: 23.00 - lr: 0.010000\n",
-      "2021-09-08 01:39:38,736 epoch 9 - iter 63/73 - loss 0.64409266 - samples/sec: 23.25 - lr: 0.010000\n",
-      "2021-09-08 01:39:39,042 epoch 9 - iter 70/73 - loss 0.64172048 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 01:39:39,174 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:39,175 EPOCH 9 done: loss 0.6441 - lr 0.0100000\n",
-      "2021-09-08 01:39:39,364 DEV : loss 0.5189850926399231 - score 0.125\n",
-      "2021-09-08 01:39:39,365 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:39:39,370 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:39,699 epoch 10 - iter 7/73 - loss 0.65022145 - samples/sec: 22.30 - lr: 0.010000\n",
-      "2021-09-08 01:39:40,001 epoch 10 - iter 14/73 - loss 0.64893789 - samples/sec: 23.22 - lr: 0.010000\n",
-      "2021-09-08 01:39:40,305 epoch 10 - iter 21/73 - loss 0.64926314 - samples/sec: 23.11 - lr: 0.010000\n",
-      "2021-09-08 01:39:40,605 epoch 10 - iter 28/73 - loss 0.65550424 - samples/sec: 23.31 - lr: 0.010000\n",
-      "2021-09-08 01:39:40,906 epoch 10 - iter 35/73 - loss 0.65702083 - samples/sec: 23.31 - lr: 0.010000\n",
-      "2021-09-08 01:39:41,209 epoch 10 - iter 42/73 - loss 0.65255653 - samples/sec: 23.18 - lr: 0.010000\n",
-      "2021-09-08 01:39:41,511 epoch 10 - iter 49/73 - loss 0.65042191 - samples/sec: 23.21 - lr: 0.010000\n",
-      "2021-09-08 01:39:41,818 epoch 10 - iter 56/73 - loss 0.65453264 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 01:39:42,118 epoch 10 - iter 63/73 - loss 0.64813855 - samples/sec: 23.36 - lr: 0.010000\n",
-      "2021-09-08 01:39:42,420 epoch 10 - iter 70/73 - loss 0.64681581 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:39:42,549 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:42,550 EPOCH 10 done: loss 0.6462 - lr 0.0100000\n",
-      "2021-09-08 01:39:42,701 DEV : loss 0.4471508860588074 - score 0.125\n",
+      "2021-09-21 21:27:22,410 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:27:22,412 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:22,835 epoch 7 - iter 7/73 - loss 0.64791958 - samples/sec: 18.13 - lr: 0.010000\n",
+      "2021-09-21 21:27:23,221 epoch 7 - iter 14/73 - loss 0.63244747 - samples/sec: 18.13 - lr: 0.010000\n",
+      "2021-09-21 21:27:23,591 epoch 7 - iter 21/73 - loss 0.62275640 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 21:27:23,972 epoch 7 - iter 28/73 - loss 0.61967557 - samples/sec: 18.42 - lr: 0.010000\n",
+      "2021-09-21 21:27:24,386 epoch 7 - iter 35/73 - loss 0.62819433 - samples/sec: 16.91 - lr: 0.010000\n",
+      "2021-09-21 21:27:24,782 epoch 7 - iter 42/73 - loss 0.63914026 - samples/sec: 17.71 - lr: 0.010000\n",
+      "2021-09-21 21:27:25,169 epoch 7 - iter 49/73 - loss 0.63784177 - samples/sec: 18.11 - lr: 0.010000\n",
+      "2021-09-21 21:27:25,604 epoch 7 - iter 56/73 - loss 0.63542091 - samples/sec: 16.13 - lr: 0.010000\n",
+      "2021-09-21 21:27:25,964 epoch 7 - iter 63/73 - loss 0.63479255 - samples/sec: 19.47 - lr: 0.010000\n",
+      "2021-09-21 21:27:26,336 epoch 7 - iter 70/73 - loss 0.63822269 - samples/sec: 18.86 - lr: 0.010000\n",
+      "2021-09-21 21:27:26,524 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:26,524 EPOCH 7 done: loss 0.6377 - lr 0.0100000\n",
+      "2021-09-21 21:27:26,790 DEV : loss 0.48390263319015503 - score 0.125\n",
+      "2021-09-21 21:27:26,790 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:27:26,792 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:27,209 epoch 8 - iter 7/73 - loss 0.63601545 - samples/sec: 17.84 - lr: 0.010000\n",
+      "2021-09-21 21:27:27,606 epoch 8 - iter 14/73 - loss 0.64914475 - samples/sec: 17.69 - lr: 0.010000\n",
+      "2021-09-21 21:27:28,018 epoch 8 - iter 21/73 - loss 0.65430315 - samples/sec: 16.99 - lr: 0.010000\n",
+      "2021-09-21 21:27:28,423 epoch 8 - iter 28/73 - loss 0.66031122 - samples/sec: 17.33 - lr: 0.010000\n",
+      "2021-09-21 21:27:28,811 epoch 8 - iter 35/73 - loss 0.65075812 - samples/sec: 18.07 - lr: 0.010000\n",
+      "2021-09-21 21:27:29,180 epoch 8 - iter 42/73 - loss 0.65031481 - samples/sec: 19.02 - lr: 0.010000\n",
+      "2021-09-21 21:27:29,569 epoch 8 - iter 49/73 - loss 0.64681385 - samples/sec: 18.01 - lr: 0.010000\n",
+      "2021-09-21 21:27:29,972 epoch 8 - iter 56/73 - loss 0.64277151 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 21:27:30,358 epoch 8 - iter 63/73 - loss 0.64739376 - samples/sec: 18.19 - lr: 0.010000\n",
+      "2021-09-21 21:27:30,752 epoch 8 - iter 70/73 - loss 0.64470829 - samples/sec: 17.80 - lr: 0.010000\n",
+      "2021-09-21 21:27:30,913 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:30,913 EPOCH 8 done: loss 0.6442 - lr 0.0100000\n",
+      "2021-09-21 21:27:31,143 DEV : loss 0.47894084453582764 - score 0.125\n",
+      "2021-09-21 21:27:31,144 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:27:31,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:31,542 epoch 9 - iter 7/73 - loss 0.62018810 - samples/sec: 19.17 - lr: 0.010000\n",
+      "2021-09-21 21:27:31,952 epoch 9 - iter 14/73 - loss 0.59768130 - samples/sec: 17.09 - lr: 0.010000\n",
+      "2021-09-21 21:27:32,332 epoch 9 - iter 21/73 - loss 0.61854534 - samples/sec: 18.46 - lr: 0.010000\n",
+      "2021-09-21 21:27:32,720 epoch 9 - iter 28/73 - loss 0.62704293 - samples/sec: 18.09 - lr: 0.010000\n",
+      "2021-09-21 21:27:33,134 epoch 9 - iter 35/73 - loss 0.62946039 - samples/sec: 16.93 - lr: 0.010000\n",
+      "2021-09-21 21:27:33,571 epoch 9 - iter 42/73 - loss 0.63370177 - samples/sec: 16.04 - lr: 0.010000\n",
+      "2021-09-21 21:27:33,921 epoch 9 - iter 49/73 - loss 0.63512487 - samples/sec: 20.02 - lr: 0.010000\n",
+      "2021-09-21 21:27:34,301 epoch 9 - iter 56/73 - loss 0.63172819 - samples/sec: 18.46 - lr: 0.010000\n",
+      "2021-09-21 21:27:34,684 epoch 9 - iter 63/73 - loss 0.63093170 - samples/sec: 18.29 - lr: 0.010000\n",
+      "2021-09-21 21:27:35,061 epoch 9 - iter 70/73 - loss 0.63630679 - samples/sec: 18.61 - lr: 0.010000\n",
+      "2021-09-21 21:27:35,255 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:35,255 EPOCH 9 done: loss 0.6379 - lr 0.0100000\n",
+      "2021-09-21 21:27:35,499 DEV : loss 0.525863528251648 - score 0.0\n",
+      "2021-09-21 21:27:35,499 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:27:35,501 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:35,974 epoch 10 - iter 7/73 - loss 0.62369148 - samples/sec: 16.85 - lr: 0.010000\n",
+      "2021-09-21 21:27:36,377 epoch 10 - iter 14/73 - loss 0.63519680 - samples/sec: 17.36 - lr: 0.010000\n",
+      "2021-09-21 21:27:36,781 epoch 10 - iter 21/73 - loss 0.64884091 - samples/sec: 17.38 - lr: 0.010000\n",
+      "2021-09-21 21:27:37,163 epoch 10 - iter 28/73 - loss 0.65108383 - samples/sec: 18.33 - lr: 0.010000\n",
+      "2021-09-21 21:27:37,534 epoch 10 - iter 35/73 - loss 0.64770510 - samples/sec: 18.90 - lr: 0.010000\n",
+      "2021-09-21 21:27:37,900 epoch 10 - iter 42/73 - loss 0.64742828 - samples/sec: 19.18 - lr: 0.010000\n",
+      "2021-09-21 21:27:38,277 epoch 10 - iter 49/73 - loss 0.64790649 - samples/sec: 18.58 - lr: 0.010000\n",
+      "2021-09-21 21:27:38,659 epoch 10 - iter 56/73 - loss 0.64412939 - samples/sec: 18.37 - lr: 0.010000\n",
+      "2021-09-21 21:27:39,030 epoch 10 - iter 63/73 - loss 0.64450728 - samples/sec: 18.88 - lr: 0.010000\n",
+      "2021-09-21 21:27:39,430 epoch 10 - iter 70/73 - loss 0.64601315 - samples/sec: 17.54 - lr: 0.010000\n",
+      "2021-09-21 21:27:39,599 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:39,600 EPOCH 10 done: loss 0.6463 - lr 0.0100000\n",
+      "2021-09-21 21:27:39,838 DEV : loss 0.47089263796806335 - score 0.0\n",
       "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:39:42,701 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:39:46,801 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:46,802 Testing using best model ...\n",
-      "2021-09-08 01:39:46,803 loading file None/best-model.pt\n",
+      "2021-09-21 21:27:39,839 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:27:43,882 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:43,883 Testing using best model ...\n",
+      "2021-09-21 21:27:43,885 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:39:51,622 \t0.1111\n",
-      "2021-09-08 01:39:51,622 \n",
+      "2021-09-21 21:27:48,825 \t0.1111\n",
+      "2021-09-21 21:27:48,826 \n",
       "Results:\n",
       "- F-score (micro) 0.1111\n",
       "- F-score (macro) 0.0222\n",
@@ -5679,28 +5690,22 @@
       "                              precision    recall  f1-score   support\n",
       "\n",
       "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
-      "this text expresses surprise     0.0000    0.0000    0.0000         1\n",
-      "    this text expresses love     0.0000    0.0000    0.0000         1\n",
-      " this text expresses disgust     0.0000    0.0000    0.0000         1\n",
-      "    this text expresses fear     0.0000    0.0000    0.0000         3\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses guilt     0.0000    0.0000    0.0000         1\n",
+      "this text expresses surprise     0.1111    1.0000    0.2000         1\n",
+      "    this text expresses love     0.0000    0.0000    0.0000         2\n",
+      " this text expresses disgust     0.0000    0.0000    0.0000         2\n",
+      "    this text expresses fear     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         2\n",
+      "   this text expresses guilt     0.0000    0.0000    0.0000         0\n",
       "   this text expresses shame     0.0000    0.0000    0.0000         0\n",
-      " this text expresses sadness     0.1111    1.0000    0.2000         1\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                   micro avg     0.1111    0.1111    0.1111         9\n",
       "                   macro avg     0.0123    0.1111    0.0222         9\n",
       "                weighted avg     0.0123    0.1111    0.0222         9\n",
       "                 samples avg     0.1111    0.1111    0.1111         9\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:39:51,623 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.133648393194707\n"
+      "\n",
+      "2021-09-21 21:27:48,826 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.15381222432262132\n"
      ]
     }
    ],
@@ -5776,6 +5781,26 @@
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9e942dc3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.295526149968494, 0.13074984247006932, 0.1042848141146818, 0.11373660995589162, 0.12476370510396975]\n",
+      "0.07143884920160679\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "7c31f0f3",
@@ -5786,7 +5811,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5794,38 +5819,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:40:36,093 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:28:55,371 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:40:40,061 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 32221.56it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:40:40,065 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n"
+      "2021-09-21 21:28:59,523 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 82/82 [00:00<00:00, 49091.20it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:40:40,277 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:40,279 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:28:59,526 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
+      "2021-09-21 21:28:59,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:59,538 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6138,240 +6150,251 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:40:40,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:40,280 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:40:40,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:40,281 Parameters:\n",
-      "2021-09-08 01:40:40,281  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:40:40,281  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:40:40,282  - patience: \"3\"\n",
-      "2021-09-08 01:40:40,282  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:40:40,282  - max_epochs: \"10\"\n",
-      "2021-09-08 01:40:40,282  - shuffle: \"True\"\n",
-      "2021-09-08 01:40:40,283  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:40:40,283  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:40:40,283 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:40,284 Model training base path: \"None\"\n",
-      "2021-09-08 01:40:40,284 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:40,284 Device: cuda:1\n",
-      "2021-09-08 01:40:40,285 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:40,285 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:40:40,503 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:40,833 epoch 1 - iter 7/73 - loss 0.31445513 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 01:40:41,150 epoch 1 - iter 14/73 - loss 0.26697978 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:40:41,463 epoch 1 - iter 21/73 - loss 0.40493239 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 01:40:41,778 epoch 1 - iter 28/73 - loss 0.46285103 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 01:40:42,101 epoch 1 - iter 35/73 - loss 0.50455432 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:40:42,424 epoch 1 - iter 42/73 - loss 0.49885474 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 01:40:42,751 epoch 1 - iter 49/73 - loss 0.48341176 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 01:40:43,072 epoch 1 - iter 56/73 - loss 0.49223960 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 01:40:43,399 epoch 1 - iter 63/73 - loss 0.50349652 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:40:43,723 epoch 1 - iter 70/73 - loss 0.51298830 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 01:40:43,859 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:43,859 EPOCH 1 done: loss 0.4957 - lr 0.0200000\n",
-      "2021-09-08 01:40:44,108 DEV : loss 0.961652934551239 - score 0.0\n",
-      "2021-09-08 01:40:44,109 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:40:52,407 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:52,747 epoch 2 - iter 7/73 - loss 0.85454197 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:40:53,073 epoch 2 - iter 14/73 - loss 0.87972904 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 01:40:53,396 epoch 2 - iter 21/73 - loss 0.88283010 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:40:53,720 epoch 2 - iter 28/73 - loss 0.86812932 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 01:40:54,042 epoch 2 - iter 35/73 - loss 0.83163451 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 01:40:54,364 epoch 2 - iter 42/73 - loss 0.82625453 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 01:40:54,690 epoch 2 - iter 49/73 - loss 0.81351965 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:40:55,003 epoch 2 - iter 56/73 - loss 0.80627793 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 01:40:55,309 epoch 2 - iter 63/73 - loss 0.79087939 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:40:55,613 epoch 2 - iter 70/73 - loss 0.77758797 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 01:40:55,756 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:55,757 EPOCH 2 done: loss 0.7730 - lr 0.0200000\n",
-      "2021-09-08 01:40:55,892 DEV : loss 0.4698299765586853 - score 0.0\n",
-      "2021-09-08 01:40:55,893 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:41:00,077 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:00,402 epoch 3 - iter 7/73 - loss 0.66198920 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:41:00,702 epoch 3 - iter 14/73 - loss 0.64705781 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 01:41:01,008 epoch 3 - iter 21/73 - loss 0.63507833 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 01:41:01,308 epoch 3 - iter 28/73 - loss 0.63807219 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 01:41:01,615 epoch 3 - iter 35/73 - loss 0.64075292 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:41:01,919 epoch 3 - iter 42/73 - loss 0.64005837 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:41:02,237 epoch 3 - iter 49/73 - loss 0.64545041 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 01:41:02,543 epoch 3 - iter 56/73 - loss 0.64285781 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:41:02,843 epoch 3 - iter 63/73 - loss 0.64615205 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 01:41:03,143 epoch 3 - iter 70/73 - loss 0.64498012 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 01:41:03,279 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:03,279 EPOCH 3 done: loss 0.6441 - lr 0.0200000\n",
-      "2021-09-08 01:41:03,410 DEV : loss 0.48148301243782043 - score 0.125\n",
-      "2021-09-08 01:41:03,410 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:41:07,747 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:08,065 epoch 4 - iter 7/73 - loss 0.63937513 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:41:08,366 epoch 4 - iter 14/73 - loss 0.66915060 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 01:41:08,668 epoch 4 - iter 21/73 - loss 0.66474911 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:41:08,969 epoch 4 - iter 28/73 - loss 0.65674110 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 01:41:09,293 epoch 4 - iter 35/73 - loss 0.66095119 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:41:09,594 epoch 4 - iter 42/73 - loss 0.65928914 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:41:09,898 epoch 4 - iter 49/73 - loss 0.66304257 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:41:10,198 epoch 4 - iter 56/73 - loss 0.65811105 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 01:41:10,502 epoch 4 - iter 63/73 - loss 0.65594544 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:41:10,804 epoch 4 - iter 70/73 - loss 0.65919752 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 01:41:10,933 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:10,934 EPOCH 4 done: loss 0.6578 - lr 0.0200000\n",
-      "2021-09-08 01:41:11,066 DEV : loss 0.4523374140262604 - score 0.0\n",
-      "2021-09-08 01:41:11,067 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:41:11,073 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:11,391 epoch 5 - iter 7/73 - loss 0.65903234 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 01:41:11,690 epoch 5 - iter 14/73 - loss 0.64621720 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 01:41:11,997 epoch 5 - iter 21/73 - loss 0.65626620 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:41:12,322 epoch 5 - iter 28/73 - loss 0.65524253 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 01:41:12,653 epoch 5 - iter 35/73 - loss 0.65261315 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 01:41:12,969 epoch 5 - iter 42/73 - loss 0.64887154 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 01:41:13,270 epoch 5 - iter 49/73 - loss 0.65061646 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 01:41:13,575 epoch 5 - iter 56/73 - loss 0.64693763 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:41:13,878 epoch 5 - iter 63/73 - loss 0.65075811 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:41:14,182 epoch 5 - iter 70/73 - loss 0.65098849 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:41:14,312 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:28:59,538 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:59,538 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:28:59,539 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:59,539 Parameters:\n",
+      "2021-09-21 21:28:59,539  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:28:59,540  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:28:59,540  - patience: \"3\"\n",
+      "2021-09-21 21:28:59,540  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:28:59,540  - max_epochs: \"10\"\n",
+      "2021-09-21 21:28:59,541  - shuffle: \"True\"\n",
+      "2021-09-21 21:28:59,541  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:28:59,541  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:28:59,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:59,542 Model training base path: \"None\"\n",
+      "2021-09-21 21:28:59,542 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:59,542 Device: cuda:0\n",
+      "2021-09-21 21:28:59,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:59,543 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:28:59,550 ----------------------------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:41:14,313 EPOCH 5 done: loss 0.6506 - lr 0.0200000\n",
-      "2021-09-08 01:41:14,446 DEV : loss 0.5037050247192383 - score 0.0\n",
-      "2021-09-08 01:41:14,446 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:41:14,448 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:14,768 epoch 6 - iter 7/73 - loss 0.63097889 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 01:41:15,072 epoch 6 - iter 14/73 - loss 0.63299631 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:41:15,383 epoch 6 - iter 21/73 - loss 0.63697092 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 01:41:15,698 epoch 6 - iter 28/73 - loss 0.64518498 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 01:41:16,001 epoch 6 - iter 35/73 - loss 0.64100409 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:41:16,310 epoch 6 - iter 42/73 - loss 0.63909366 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:41:16,615 epoch 6 - iter 49/73 - loss 0.63917501 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:41:16,924 epoch 6 - iter 56/73 - loss 0.63984492 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:41:17,234 epoch 6 - iter 63/73 - loss 0.64448695 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 01:41:17,539 epoch 6 - iter 70/73 - loss 0.64395813 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:41:17,671 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:17,671 EPOCH 6 done: loss 0.6445 - lr 0.0200000\n",
-      "2021-09-08 01:41:17,921 DEV : loss 0.6920270919799805 - score 0.0\n",
-      "2021-09-08 01:41:17,922 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:41:18,010 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:18,333 epoch 7 - iter 7/73 - loss 0.66544852 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:41:18,698 epoch 7 - iter 14/73 - loss 0.64798779 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 01:41:19,025 epoch 7 - iter 21/73 - loss 0.63973487 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 01:41:19,330 epoch 7 - iter 28/73 - loss 0.63992331 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:41:19,642 epoch 7 - iter 35/73 - loss 0.64403479 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 01:41:19,957 epoch 7 - iter 42/73 - loss 0.64426754 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 01:41:20,262 epoch 7 - iter 49/73 - loss 0.64634536 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:41:20,570 epoch 7 - iter 56/73 - loss 0.64690301 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:41:20,873 epoch 7 - iter 63/73 - loss 0.65150756 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 01:41:21,174 epoch 7 - iter 70/73 - loss 0.64918371 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 01:41:21,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:21,309 EPOCH 7 done: loss 0.6490 - lr 0.0200000\n",
-      "2021-09-08 01:41:21,440 DEV : loss 0.49308234453201294 - score 0.0\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:41:21,441 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:41:21,443 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:21,764 epoch 8 - iter 7/73 - loss 0.65153674 - samples/sec: 22.87 - lr: 0.010000\n",
-      "2021-09-08 01:41:22,066 epoch 8 - iter 14/73 - loss 0.64187413 - samples/sec: 23.23 - lr: 0.010000\n",
-      "2021-09-08 01:41:22,367 epoch 8 - iter 21/73 - loss 0.64646864 - samples/sec: 23.30 - lr: 0.010000\n",
-      "2021-09-08 01:41:22,668 epoch 8 - iter 28/73 - loss 0.64427556 - samples/sec: 23.32 - lr: 0.010000\n",
-      "2021-09-08 01:41:22,974 epoch 8 - iter 35/73 - loss 0.64421645 - samples/sec: 22.87 - lr: 0.010000\n",
-      "2021-09-08 01:41:23,274 epoch 8 - iter 42/73 - loss 0.64479862 - samples/sec: 23.38 - lr: 0.010000\n",
-      "2021-09-08 01:41:23,579 epoch 8 - iter 49/73 - loss 0.64300813 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 01:41:23,880 epoch 8 - iter 56/73 - loss 0.64439929 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 01:41:24,183 epoch 8 - iter 63/73 - loss 0.64247060 - samples/sec: 23.18 - lr: 0.010000\n",
-      "2021-09-08 01:41:24,489 epoch 8 - iter 70/73 - loss 0.64268419 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 01:41:24,620 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:24,621 EPOCH 8 done: loss 0.6423 - lr 0.0100000\n",
-      "2021-09-08 01:41:24,754 DEV : loss 0.47623956203460693 - score 0.0\n",
-      "2021-09-08 01:41:24,755 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:41:24,757 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:25,092 epoch 9 - iter 7/73 - loss 0.65017778 - samples/sec: 21.84 - lr: 0.010000\n",
-      "2021-09-08 01:41:25,410 epoch 9 - iter 14/73 - loss 0.65220451 - samples/sec: 22.06 - lr: 0.010000\n",
-      "2021-09-08 01:41:25,726 epoch 9 - iter 21/73 - loss 0.64644643 - samples/sec: 22.21 - lr: 0.010000\n",
-      "2021-09-08 01:41:26,037 epoch 9 - iter 28/73 - loss 0.64745886 - samples/sec: 22.55 - lr: 0.010000\n",
-      "2021-09-08 01:41:26,341 epoch 9 - iter 35/73 - loss 0.64687224 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 01:41:26,641 epoch 9 - iter 42/73 - loss 0.64658532 - samples/sec: 23.38 - lr: 0.010000\n",
-      "2021-09-08 01:41:26,946 epoch 9 - iter 49/73 - loss 0.64525317 - samples/sec: 23.04 - lr: 0.010000\n",
-      "2021-09-08 01:41:27,246 epoch 9 - iter 56/73 - loss 0.64441730 - samples/sec: 23.38 - lr: 0.010000\n",
-      "2021-09-08 01:41:27,549 epoch 9 - iter 63/73 - loss 0.64292112 - samples/sec: 23.08 - lr: 0.010000\n",
-      "2021-09-08 01:41:27,850 epoch 9 - iter 70/73 - loss 0.64258822 - samples/sec: 23.31 - lr: 0.010000\n",
-      "2021-09-08 01:41:27,982 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:27,982 EPOCH 9 done: loss 0.6413 - lr 0.0100000\n",
-      "2021-09-08 01:41:28,115 DEV : loss 0.4629209339618683 - score 0.125\n",
-      "2021-09-08 01:41:28,116 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:28:59,995 epoch 1 - iter 7/73 - loss 0.35846732 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 21:29:00,370 epoch 1 - iter 14/73 - loss 0.44862586 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 21:29:00,793 epoch 1 - iter 21/73 - loss 0.51129487 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 21:29:01,243 epoch 1 - iter 28/73 - loss 0.52696225 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 21:29:01,677 epoch 1 - iter 35/73 - loss 0.51123457 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 21:29:02,106 epoch 1 - iter 42/73 - loss 0.52514791 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 21:29:02,549 epoch 1 - iter 49/73 - loss 0.55017023 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 21:29:02,991 epoch 1 - iter 56/73 - loss 0.57036775 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 21:29:03,377 epoch 1 - iter 63/73 - loss 0.57278875 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 21:29:03,772 epoch 1 - iter 70/73 - loss 0.57482165 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 21:29:03,937 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:03,938 EPOCH 1 done: loss 0.5872 - lr 0.0200000\n",
+      "2021-09-21 21:29:04,178 DEV : loss 0.5560257434844971 - score 0.125\n",
+      "2021-09-21 21:29:04,179 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:41:34,050 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:34,373 epoch 10 - iter 7/73 - loss 0.60315203 - samples/sec: 22.79 - lr: 0.010000\n",
-      "2021-09-08 01:41:34,678 epoch 10 - iter 14/73 - loss 0.63047028 - samples/sec: 23.00 - lr: 0.010000\n",
-      "2021-09-08 01:41:34,978 epoch 10 - iter 21/73 - loss 0.62277177 - samples/sec: 23.38 - lr: 0.010000\n",
-      "2021-09-08 01:41:35,279 epoch 10 - iter 28/73 - loss 0.62688987 - samples/sec: 23.33 - lr: 0.010000\n",
-      "2021-09-08 01:41:35,582 epoch 10 - iter 35/73 - loss 0.63119561 - samples/sec: 23.10 - lr: 0.010000\n",
-      "2021-09-08 01:41:35,882 epoch 10 - iter 42/73 - loss 0.63252478 - samples/sec: 23.37 - lr: 0.010000\n",
-      "2021-09-08 01:41:36,183 epoch 10 - iter 49/73 - loss 0.63291263 - samples/sec: 23.35 - lr: 0.010000\n",
-      "2021-09-08 01:41:36,487 epoch 10 - iter 56/73 - loss 0.63463797 - samples/sec: 23.06 - lr: 0.010000\n",
-      "2021-09-08 01:41:36,790 epoch 10 - iter 63/73 - loss 0.63802702 - samples/sec: 23.13 - lr: 0.010000\n",
-      "2021-09-08 01:41:37,094 epoch 10 - iter 70/73 - loss 0.63652486 - samples/sec: 23.04 - lr: 0.010000\n",
-      "2021-09-08 01:41:37,225 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:37,226 EPOCH 10 done: loss 0.6367 - lr 0.0100000\n",
-      "2021-09-08 01:41:38,523 DEV : loss 0.4971274435520172 - score 0.125\n",
-      "2021-09-08 01:41:38,524 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:41:43,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:43,154 Testing using best model ...\n",
-      "2021-09-08 01:41:43,212 loading file None/best-model.pt\n",
+      "2021-09-21 21:29:08,081 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:08,563 epoch 2 - iter 7/73 - loss 0.58643813 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:29:08,980 epoch 2 - iter 14/73 - loss 0.62548319 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:29:09,371 epoch 2 - iter 21/73 - loss 0.61651655 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 21:29:09,767 epoch 2 - iter 28/73 - loss 0.61220661 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 21:29:10,159 epoch 2 - iter 35/73 - loss 0.61441522 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 21:29:10,557 epoch 2 - iter 42/73 - loss 0.62302837 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 21:29:10,955 epoch 2 - iter 49/73 - loss 0.62471044 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:29:11,350 epoch 2 - iter 56/73 - loss 0.63926861 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 21:29:11,792 epoch 2 - iter 63/73 - loss 0.65556717 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 21:29:12,190 epoch 2 - iter 70/73 - loss 0.65537468 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:29:12,371 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:12,371 EPOCH 2 done: loss 0.6552 - lr 0.0200000\n",
+      "2021-09-21 21:29:12,641 DEV : loss 0.48085418343544006 - score 0.0\n",
+      "2021-09-21 21:29:12,641 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:29:12,643 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:13,097 epoch 3 - iter 7/73 - loss 0.64819651 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 21:29:13,510 epoch 3 - iter 14/73 - loss 0.63554729 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:29:13,914 epoch 3 - iter 21/73 - loss 0.64288453 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 21:29:14,299 epoch 3 - iter 28/73 - loss 0.64687670 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 21:29:14,674 epoch 3 - iter 35/73 - loss 0.64803939 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 21:29:15,070 epoch 3 - iter 42/73 - loss 0.65896055 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 21:29:15,485 epoch 3 - iter 49/73 - loss 0.66311883 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 21:29:15,895 epoch 3 - iter 56/73 - loss 0.66255836 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 21:29:16,241 epoch 3 - iter 63/73 - loss 0.66045258 - samples/sec: 20.29 - lr: 0.020000\n",
+      "2021-09-21 21:29:16,648 epoch 3 - iter 70/73 - loss 0.65703209 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:29:16,822 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:16,822 EPOCH 3 done: loss 0.6555 - lr 0.0200000\n",
+      "2021-09-21 21:29:17,058 DEV : loss 0.48644453287124634 - score 0.0\n",
+      "2021-09-21 21:29:17,058 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:29:17,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:17,486 epoch 4 - iter 7/73 - loss 0.69109852 - samples/sec: 18.38 - lr: 0.020000\n",
+      "2021-09-21 21:29:17,885 epoch 4 - iter 14/73 - loss 0.67343458 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 21:29:18,287 epoch 4 - iter 21/73 - loss 0.65920901 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 21:29:18,696 epoch 4 - iter 28/73 - loss 0.65972997 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 21:29:19,078 epoch 4 - iter 35/73 - loss 0.65379817 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 21:29:19,494 epoch 4 - iter 42/73 - loss 0.65458734 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:29:19,887 epoch 4 - iter 49/73 - loss 0.65091141 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 21:29:20,280 epoch 4 - iter 56/73 - loss 0.64861649 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 21:29:20,641 epoch 4 - iter 63/73 - loss 0.65755096 - samples/sec: 19.42 - lr: 0.020000\n",
+      "2021-09-21 21:29:21,062 epoch 4 - iter 70/73 - loss 0.65576491 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 21:29:21,251 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:21,252 EPOCH 4 done: loss 0.6568 - lr 0.0200000\n",
+      "2021-09-21 21:29:21,475 DEV : loss 0.4654969871044159 - score 0.0\n",
+      "2021-09-21 21:29:21,476 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:29:21,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:21,873 epoch 5 - iter 7/73 - loss 0.61319067 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 21:29:22,274 epoch 5 - iter 14/73 - loss 0.67946359 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 21:29:22,681 epoch 5 - iter 21/73 - loss 0.66376423 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 21:29:23,116 epoch 5 - iter 28/73 - loss 0.67133491 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 21:29:23,519 epoch 5 - iter 35/73 - loss 0.63973959 - samples/sec: 17.40 - lr: 0.020000\n",
+      "2021-09-21 21:29:23,945 epoch 5 - iter 42/73 - loss 0.64897258 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 21:29:24,394 epoch 5 - iter 49/73 - loss 0.65175133 - samples/sec: 15.62 - lr: 0.020000\n",
+      "2021-09-21 21:29:24,742 epoch 5 - iter 56/73 - loss 0.65331085 - samples/sec: 20.14 - lr: 0.020000\n",
+      "2021-09-21 21:29:25,168 epoch 5 - iter 63/73 - loss 0.66131076 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 21:29:25,545 epoch 5 - iter 70/73 - loss 0.65663359 - samples/sec: 18.61 - lr: 0.020000\n",
+      "2021-09-21 21:29:25,716 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:25,717 EPOCH 5 done: loss 0.6545 - lr 0.0200000\n",
+      "2021-09-21 21:29:25,932 DEV : loss 0.46789613366127014 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:29:25,932 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:29:25,934 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:26,316 epoch 6 - iter 7/73 - loss 0.63719944 - samples/sec: 19.29 - lr: 0.010000\n",
+      "2021-09-21 21:29:26,678 epoch 6 - iter 14/73 - loss 0.64244811 - samples/sec: 19.38 - lr: 0.010000\n",
+      "2021-09-21 21:29:27,089 epoch 6 - iter 21/73 - loss 0.64258232 - samples/sec: 17.06 - lr: 0.010000\n",
+      "2021-09-21 21:29:27,478 epoch 6 - iter 28/73 - loss 0.63469927 - samples/sec: 18.02 - lr: 0.010000\n",
+      "2021-09-21 21:29:27,872 epoch 6 - iter 35/73 - loss 0.64382167 - samples/sec: 17.79 - lr: 0.010000\n",
+      "2021-09-21 21:29:28,262 epoch 6 - iter 42/73 - loss 0.64451334 - samples/sec: 17.98 - lr: 0.010000\n",
+      "2021-09-21 21:29:28,660 epoch 6 - iter 49/73 - loss 0.64456912 - samples/sec: 17.61 - lr: 0.010000\n",
+      "2021-09-21 21:29:29,028 epoch 6 - iter 56/73 - loss 0.64360963 - samples/sec: 19.02 - lr: 0.010000\n",
+      "2021-09-21 21:29:29,404 epoch 6 - iter 63/73 - loss 0.64126543 - samples/sec: 18.68 - lr: 0.010000\n",
+      "2021-09-21 21:29:29,762 epoch 6 - iter 70/73 - loss 0.64295195 - samples/sec: 19.57 - lr: 0.010000\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:29:29,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:29,939 EPOCH 6 done: loss 0.6472 - lr 0.0100000\n",
+      "2021-09-21 21:29:30,162 DEV : loss 0.4768385887145996 - score 0.0\n",
+      "2021-09-21 21:29:30,163 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:29:30,164 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:30,590 epoch 7 - iter 7/73 - loss 0.64435980 - samples/sec: 17.75 - lr: 0.010000\n",
+      "2021-09-21 21:29:31,008 epoch 7 - iter 14/73 - loss 0.65372087 - samples/sec: 16.75 - lr: 0.010000\n",
+      "2021-09-21 21:29:31,377 epoch 7 - iter 21/73 - loss 0.65005727 - samples/sec: 19.01 - lr: 0.010000\n",
+      "2021-09-21 21:29:31,747 epoch 7 - iter 28/73 - loss 0.64641656 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 21:29:32,173 epoch 7 - iter 35/73 - loss 0.64335301 - samples/sec: 16.47 - lr: 0.010000\n",
+      "2021-09-21 21:29:32,531 epoch 7 - iter 42/73 - loss 0.64430803 - samples/sec: 19.57 - lr: 0.010000\n",
+      "2021-09-21 21:29:32,925 epoch 7 - iter 49/73 - loss 0.64302351 - samples/sec: 17.80 - lr: 0.010000\n",
+      "2021-09-21 21:29:33,321 epoch 7 - iter 56/73 - loss 0.64429771 - samples/sec: 17.70 - lr: 0.010000\n",
+      "2021-09-21 21:29:33,698 epoch 7 - iter 63/73 - loss 0.64288373 - samples/sec: 18.56 - lr: 0.010000\n",
+      "2021-09-21 21:29:34,083 epoch 7 - iter 70/73 - loss 0.64566676 - samples/sec: 18.24 - lr: 0.010000\n",
+      "2021-09-21 21:29:34,248 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:34,249 EPOCH 7 done: loss 0.6448 - lr 0.0100000\n",
+      "2021-09-21 21:29:34,542 DEV : loss 0.4983251690864563 - score 0.0\n",
+      "2021-09-21 21:29:34,543 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:29:34,544 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:34,951 epoch 8 - iter 7/73 - loss 0.62999710 - samples/sec: 18.08 - lr: 0.010000\n",
+      "2021-09-21 21:29:35,312 epoch 8 - iter 14/73 - loss 0.63111857 - samples/sec: 19.42 - lr: 0.010000\n",
+      "2021-09-21 21:29:35,693 epoch 8 - iter 21/73 - loss 0.62301795 - samples/sec: 18.41 - lr: 0.010000\n",
+      "2021-09-21 21:29:36,112 epoch 8 - iter 28/73 - loss 0.63014460 - samples/sec: 16.74 - lr: 0.010000\n",
+      "2021-09-21 21:29:36,529 epoch 8 - iter 35/73 - loss 0.63228496 - samples/sec: 16.80 - lr: 0.010000\n",
+      "2021-09-21 21:29:36,921 epoch 8 - iter 42/73 - loss 0.63547309 - samples/sec: 17.89 - lr: 0.010000\n",
+      "2021-09-21 21:29:37,317 epoch 8 - iter 49/73 - loss 0.63646030 - samples/sec: 17.70 - lr: 0.010000\n",
+      "2021-09-21 21:29:37,725 epoch 8 - iter 56/73 - loss 0.63677821 - samples/sec: 17.19 - lr: 0.010000\n",
+      "2021-09-21 21:29:38,122 epoch 8 - iter 63/73 - loss 0.63546287 - samples/sec: 17.65 - lr: 0.010000\n",
+      "2021-09-21 21:29:38,542 epoch 8 - iter 70/73 - loss 0.63761182 - samples/sec: 16.68 - lr: 0.010000\n",
+      "2021-09-21 21:29:38,726 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:38,726 EPOCH 8 done: loss 0.6381 - lr 0.0100000\n",
+      "2021-09-21 21:29:38,979 DEV : loss 0.4947577714920044 - score 0.0\n",
+      "2021-09-21 21:29:38,980 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:29:38,982 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:39,394 epoch 9 - iter 7/73 - loss 0.62873769 - samples/sec: 18.50 - lr: 0.010000\n",
+      "2021-09-21 21:29:39,818 epoch 9 - iter 14/73 - loss 0.63128405 - samples/sec: 16.55 - lr: 0.010000\n",
+      "2021-09-21 21:29:40,194 epoch 9 - iter 21/73 - loss 0.64060618 - samples/sec: 18.64 - lr: 0.010000\n",
+      "2021-09-21 21:29:40,583 epoch 9 - iter 28/73 - loss 0.64131511 - samples/sec: 18.03 - lr: 0.010000\n",
+      "2021-09-21 21:29:40,978 epoch 9 - iter 35/73 - loss 0.64863121 - samples/sec: 17.75 - lr: 0.010000\n",
+      "2021-09-21 21:29:41,392 epoch 9 - iter 42/73 - loss 0.65192875 - samples/sec: 16.94 - lr: 0.010000\n",
+      "2021-09-21 21:29:41,779 epoch 9 - iter 49/73 - loss 0.65142401 - samples/sec: 18.09 - lr: 0.010000\n",
+      "2021-09-21 21:29:42,195 epoch 9 - iter 56/73 - loss 0.64830065 - samples/sec: 16.88 - lr: 0.010000\n",
+      "2021-09-21 21:29:42,587 epoch 9 - iter 63/73 - loss 0.64814003 - samples/sec: 17.89 - lr: 0.010000\n",
+      "2021-09-21 21:29:42,993 epoch 9 - iter 70/73 - loss 0.64798700 - samples/sec: 17.25 - lr: 0.010000\n",
+      "2021-09-21 21:29:43,158 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:43,159 EPOCH 9 done: loss 0.6472 - lr 0.0100000\n",
+      "2021-09-21 21:29:43,427 DEV : loss 0.4582485854625702 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:29:43,428 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:29:43,430 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:43,756 epoch 10 - iter 7/73 - loss 0.65274419 - samples/sec: 23.27 - lr: 0.005000\n",
+      "2021-09-21 21:29:44,058 epoch 10 - iter 14/73 - loss 0.62823188 - samples/sec: 23.24 - lr: 0.005000\n",
+      "2021-09-21 21:29:44,359 epoch 10 - iter 21/73 - loss 0.62504878 - samples/sec: 23.27 - lr: 0.005000\n",
+      "2021-09-21 21:29:44,659 epoch 10 - iter 28/73 - loss 0.62910970 - samples/sec: 23.39 - lr: 0.005000\n",
+      "2021-09-21 21:29:44,961 epoch 10 - iter 35/73 - loss 0.62941070 - samples/sec: 23.19 - lr: 0.005000\n",
+      "2021-09-21 21:29:45,264 epoch 10 - iter 42/73 - loss 0.62849547 - samples/sec: 23.17 - lr: 0.005000\n",
+      "2021-09-21 21:29:45,563 epoch 10 - iter 49/73 - loss 0.62642842 - samples/sec: 23.49 - lr: 0.005000\n",
+      "2021-09-21 21:29:45,869 epoch 10 - iter 56/73 - loss 0.62396092 - samples/sec: 22.90 - lr: 0.005000\n",
+      "2021-09-21 21:29:46,169 epoch 10 - iter 63/73 - loss 0.62600166 - samples/sec: 23.35 - lr: 0.005000\n",
+      "2021-09-21 21:29:46,472 epoch 10 - iter 70/73 - loss 0.62766614 - samples/sec: 23.21 - lr: 0.005000\n",
+      "2021-09-21 21:29:46,601 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:46,601 EPOCH 10 done: loss 0.6262 - lr 0.0050000\n",
+      "2021-09-21 21:29:46,732 DEV : loss 0.49495989084243774 - score 0.0\n",
+      "2021-09-21 21:29:46,733 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:29:50,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,448 Testing using best model ...\n",
+      "2021-09-21 21:29:50,449 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:41:49,995 \t0.0\n",
-      "2021-09-08 01:41:49,996 \n",
+      "2021-09-21 21:29:55,536 \t0.2222\n",
+      "2021-09-21 21:29:55,537 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.2222\n",
+      "- F-score (macro) 0.0444\n",
+      "- Accuracy 0.2222\n",
       "\n",
       "By class:\n",
       "                                                                            precision    recall  f1-score   support\n",
       "\n",
-      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         1\n",
-      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         1\n",
-      "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         1\n",
+      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
+      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         0\n",
+      "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         2\n",
       "                                                strong feelings of dislike     0.0000    0.0000    0.0000         1\n",
-      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         1\n",
+      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         2\n",
       "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
       "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         1\n",
-      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         2\n",
-      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         1\n",
+      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         1\n",
+      "                    emotions experienced when not in a state of well-being     0.2500    1.0000    0.4000         2\n",
       "\n",
-      "                                                                 micro avg     0.0000    0.0000    0.0000         9\n",
-      "                                                                 macro avg     0.0000    0.0000    0.0000         9\n",
-      "                                                              weighted avg     0.0000    0.0000    0.0000         9\n",
-      "                                                               samples avg     0.0000    0.0000    0.0000         9\n",
-      "\n"
+      "                                                                 micro avg     0.2222    0.2222    0.2222         9\n",
+      "                                                                 macro avg     0.0278    0.1111    0.0444         9\n",
+      "                                                              weighted avg     0.0556    0.2222    0.0889         9\n",
+      "                                                               samples avg     0.2222    0.2222    0.2222         9\n",
+      "\n",
+      "2021-09-21 21:29:55,537 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:41:49,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:42,620 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:30:57,026 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:42:46,788 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:31:01,039 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 42831.00it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 45433.68it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:46,792 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
-      "2021-09-08 01:42:46,962 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:46,965 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:31:01,043 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
+      "2021-09-21 21:31:01,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:01,055 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6684,24 +6707,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:46,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:46,966 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:42:46,966 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:46,966 Parameters:\n",
-      "2021-09-08 01:42:46,966  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:42:46,967  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:42:46,967  - patience: \"3\"\n",
-      "2021-09-08 01:42:46,967  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:42:46,968  - max_epochs: \"10\"\n",
-      "2021-09-08 01:42:46,968  - shuffle: \"True\"\n",
-      "2021-09-08 01:42:46,968  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:42:46,968  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:42:46,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:46,969 Model training base path: \"None\"\n",
-      "2021-09-08 01:42:46,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:46,970 Device: cuda:1\n",
-      "2021-09-08 01:42:46,970 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:46,970 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:31:01,056 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:01,056 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:31:01,056 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:01,056 Parameters:\n",
+      "2021-09-21 21:31:01,057  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:31:01,057  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:31:01,057  - patience: \"3\"\n",
+      "2021-09-21 21:31:01,058  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:31:01,058  - max_epochs: \"10\"\n",
+      "2021-09-21 21:31:01,058  - shuffle: \"True\"\n",
+      "2021-09-21 21:31:01,058  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:31:01,059  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:31:01,059 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:01,059 Model training base path: \"None\"\n",
+      "2021-09-21 21:31:01,060 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:01,060 Device: cuda:0\n",
+      "2021-09-21 21:31:01,060 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:01,060 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:31:01,067 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -6715,175 +6739,174 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:47,216 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:47,579 epoch 1 - iter 7/73 - loss 0.43328005 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 01:42:47,920 epoch 1 - iter 14/73 - loss 0.52031919 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 01:42:48,263 epoch 1 - iter 21/73 - loss 0.42066633 - samples/sec: 20.47 - lr: 0.020000\n",
-      "2021-09-08 01:42:48,615 epoch 1 - iter 28/73 - loss 0.45898432 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:42:48,960 epoch 1 - iter 35/73 - loss 0.49309573 - samples/sec: 20.30 - lr: 0.020000\n",
-      "2021-09-08 01:42:49,306 epoch 1 - iter 42/73 - loss 0.49269897 - samples/sec: 20.30 - lr: 0.020000\n",
-      "2021-09-08 01:42:49,651 epoch 1 - iter 49/73 - loss 0.52529933 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,003 epoch 1 - iter 56/73 - loss 0.53965580 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,451 epoch 1 - iter 63/73 - loss 0.54999300 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,800 epoch 1 - iter 70/73 - loss 0.55482629 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,948 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:50,949 EPOCH 1 done: loss 0.5399 - lr 0.0200000\n",
-      "2021-09-08 01:42:51,117 DEV : loss 0.663774847984314 - score 0.125\n",
-      "2021-09-08 01:42:51,118 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:43:00,094 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:00,459 epoch 2 - iter 7/73 - loss 0.94029556 - samples/sec: 20.29 - lr: 0.020000\n",
-      "2021-09-08 01:43:00,800 epoch 2 - iter 14/73 - loss 0.82515816 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 01:43:01,149 epoch 2 - iter 21/73 - loss 0.76507775 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 01:43:01,508 epoch 2 - iter 28/73 - loss 0.76125813 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 01:43:01,864 epoch 2 - iter 35/73 - loss 0.74657250 - samples/sec: 19.75 - lr: 0.020000\n",
-      "2021-09-08 01:43:02,228 epoch 2 - iter 42/73 - loss 0.74042466 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 01:43:02,578 epoch 2 - iter 49/73 - loss 0.73429050 - samples/sec: 20.05 - lr: 0.020000\n",
-      "2021-09-08 01:43:02,938 epoch 2 - iter 56/73 - loss 0.72686211 - samples/sec: 19.51 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,274 epoch 2 - iter 63/73 - loss 0.72003850 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,619 epoch 2 - iter 70/73 - loss 0.71548859 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,763 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:03,763 EPOCH 2 done: loss 0.7130 - lr 0.0200000\n",
-      "2021-09-08 01:43:03,893 DEV : loss 0.45949089527130127 - score 0.0\n",
-      "2021-09-08 01:43:03,893 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:43:03,895 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:04,224 epoch 3 - iter 7/73 - loss 0.66513707 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 01:43:04,531 epoch 3 - iter 14/73 - loss 0.65869788 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 01:43:04,832 epoch 3 - iter 21/73 - loss 0.66345732 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,136 epoch 3 - iter 28/73 - loss 0.65695130 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,442 epoch 3 - iter 35/73 - loss 0.65550682 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,743 epoch 3 - iter 42/73 - loss 0.65111697 - samples/sec: 23.35 - lr: 0.020000\n",
-      "2021-09-08 01:43:06,047 epoch 3 - iter 49/73 - loss 0.65221256 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:43:06,348 epoch 3 - iter 56/73 - loss 0.65046488 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 01:43:06,647 epoch 3 - iter 63/73 - loss 0.65068002 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 01:43:06,953 epoch 3 - iter 70/73 - loss 0.65000632 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,084 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:07,084 EPOCH 3 done: loss 0.6492 - lr 0.0200000\n",
-      "2021-09-08 01:43:07,219 DEV : loss 0.49618253111839294 - score 0.0\n",
-      "2021-09-08 01:43:07,219 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:43:07,222 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:07,535 epoch 4 - iter 7/73 - loss 0.61751576 - samples/sec: 23.41 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,843 epoch 4 - iter 14/73 - loss 0.62635041 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,157 epoch 4 - iter 21/73 - loss 0.63770811 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,532 epoch 4 - iter 28/73 - loss 0.64038840 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,925 epoch 4 - iter 35/73 - loss 0.64000059 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,326 epoch 4 - iter 42/73 - loss 0.64081897 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,683 epoch 4 - iter 49/73 - loss 0.64078571 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,011 epoch 4 - iter 56/73 - loss 0.64054744 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,374 epoch 4 - iter 63/73 - loss 0.63965719 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,699 epoch 4 - iter 70/73 - loss 0.64328927 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:10,830 EPOCH 4 done: loss 0.6435 - lr 0.0200000\n",
-      "2021-09-08 01:43:11,074 DEV : loss 0.4804157316684723 - score 0.125\n",
-      "2021-09-08 01:43:11,075 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:31:01,531 epoch 1 - iter 7/73 - loss 0.42319028 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:31:01,939 epoch 1 - iter 14/73 - loss 0.51989689 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:31:02,343 epoch 1 - iter 21/73 - loss 0.43920739 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 21:31:02,755 epoch 1 - iter 28/73 - loss 0.50256983 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:31:03,187 epoch 1 - iter 35/73 - loss 0.50062199 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:31:03,625 epoch 1 - iter 42/73 - loss 0.52415730 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:31:04,075 epoch 1 - iter 49/73 - loss 0.57110819 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 21:31:04,517 epoch 1 - iter 56/73 - loss 0.54783305 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 21:31:04,961 epoch 1 - iter 63/73 - loss 0.56123564 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 21:31:05,409 epoch 1 - iter 70/73 - loss 0.57325021 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:31:05,606 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:05,607 EPOCH 1 done: loss 0.5611 - lr 0.0200000\n",
+      "2021-09-21 21:31:05,870 DEV : loss 0.526210367679596 - score 0.25\n",
+      "2021-09-21 21:31:05,871 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:43:18,762 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:19,100 epoch 5 - iter 7/73 - loss 0.63927798 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 01:43:19,412 epoch 5 - iter 14/73 - loss 0.63577371 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 01:43:19,727 epoch 5 - iter 21/73 - loss 0.63213766 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 01:43:20,039 epoch 5 - iter 28/73 - loss 0.63307912 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 01:43:20,343 epoch 5 - iter 35/73 - loss 0.63097475 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:43:20,649 epoch 5 - iter 42/73 - loss 0.63403540 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:43:20,958 epoch 5 - iter 49/73 - loss 0.63374194 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:43:21,270 epoch 5 - iter 56/73 - loss 0.63644101 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 01:43:21,571 epoch 5 - iter 63/73 - loss 0.63683940 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:43:21,876 epoch 5 - iter 70/73 - loss 0.63608028 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:43:22,011 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:22,012 EPOCH 5 done: loss 0.6367 - lr 0.0200000\n",
-      "2021-09-08 01:43:22,875 DEV : loss 0.4641663134098053 - score 0.0\n",
-      "2021-09-08 01:43:22,876 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:43:22,891 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:23,289 epoch 6 - iter 7/73 - loss 0.63419781 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,667 epoch 6 - iter 14/73 - loss 0.62907954 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,031 epoch 6 - iter 21/73 - loss 0.63463797 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,394 epoch 6 - iter 28/73 - loss 0.63909684 - samples/sec: 19.35 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,781 epoch 6 - iter 35/73 - loss 0.63854070 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,144 epoch 6 - iter 42/73 - loss 0.63700076 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,515 epoch 6 - iter 49/73 - loss 0.63996855 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,895 epoch 6 - iter 56/73 - loss 0.63843439 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,250 epoch 6 - iter 63/73 - loss 0.63877725 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,608 epoch 6 - iter 70/73 - loss 0.63870339 - samples/sec: 19.58 - lr: 0.020000\n"
+      "2021-09-21 21:31:10,020 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:10,511 epoch 2 - iter 7/73 - loss 0.77922109 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 21:31:10,933 epoch 2 - iter 14/73 - loss 0.86145802 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 21:31:11,365 epoch 2 - iter 21/73 - loss 0.83143216 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 21:31:11,790 epoch 2 - iter 28/73 - loss 0.80813555 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 21:31:12,239 epoch 2 - iter 35/73 - loss 0.76359489 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:31:12,606 epoch 2 - iter 42/73 - loss 0.76487691 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 21:31:13,042 epoch 2 - iter 49/73 - loss 0.74539204 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 21:31:13,495 epoch 2 - iter 56/73 - loss 0.73810217 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 21:31:13,923 epoch 2 - iter 63/73 - loss 0.72407686 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 21:31:14,366 epoch 2 - iter 70/73 - loss 0.71865742 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:31:14,524 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:14,524 EPOCH 2 done: loss 0.7192 - lr 0.0200000\n",
+      "2021-09-21 21:31:14,735 DEV : loss 0.4953729808330536 - score 0.125\n",
+      "2021-09-21 21:31:14,736 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:31:14,738 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:15,221 epoch 3 - iter 7/73 - loss 0.63116727 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 21:31:15,641 epoch 3 - iter 14/73 - loss 0.64155232 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 21:31:16,054 epoch 3 - iter 21/73 - loss 0.64066921 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 21:31:16,468 epoch 3 - iter 28/73 - loss 0.64450424 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:31:16,880 epoch 3 - iter 35/73 - loss 0.64321861 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 21:31:17,267 epoch 3 - iter 42/73 - loss 0.63751372 - samples/sec: 18.14 - lr: 0.020000\n",
+      "2021-09-21 21:31:17,667 epoch 3 - iter 49/73 - loss 0.63068993 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 21:31:18,066 epoch 3 - iter 56/73 - loss 0.62941564 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 21:31:18,459 epoch 3 - iter 63/73 - loss 0.63226016 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 21:31:18,850 epoch 3 - iter 70/73 - loss 0.63393830 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:31:19,050 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:19,050 EPOCH 3 done: loss 0.6408 - lr 0.0200000\n",
+      "2021-09-21 21:31:19,301 DEV : loss 0.4363147020339966 - score 0.0\n",
+      "2021-09-21 21:31:19,302 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:31:19,304 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:19,769 epoch 4 - iter 7/73 - loss 0.62841528 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 21:31:20,136 epoch 4 - iter 14/73 - loss 0.64711296 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:31:20,539 epoch 4 - iter 21/73 - loss 0.65942557 - samples/sec: 17.38 - lr: 0.020000\n",
+      "2021-09-21 21:31:20,964 epoch 4 - iter 28/73 - loss 0.65611196 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 21:31:21,359 epoch 4 - iter 35/73 - loss 0.66748418 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 21:31:21,767 epoch 4 - iter 42/73 - loss 0.66262870 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 21:31:22,163 epoch 4 - iter 49/73 - loss 0.66140670 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:31:22,538 epoch 4 - iter 56/73 - loss 0.65416816 - samples/sec: 18.66 - lr: 0.020000\n",
+      "2021-09-21 21:31:22,881 epoch 4 - iter 63/73 - loss 0.65885357 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 21:31:23,296 epoch 4 - iter 70/73 - loss 0.65952510 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 21:31:23,466 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:23,466 EPOCH 4 done: loss 0.6590 - lr 0.0200000\n",
+      "2021-09-21 21:31:23,708 DEV : loss 0.4381343126296997 - score 0.0\n",
+      "2021-09-21 21:31:23,709 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:31:23,711 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:24,150 epoch 5 - iter 7/73 - loss 0.67779981 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 21:31:24,504 epoch 5 - iter 14/73 - loss 0.66121703 - samples/sec: 19.78 - lr: 0.020000\n",
+      "2021-09-21 21:31:24,890 epoch 5 - iter 21/73 - loss 0.66140928 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 21:31:25,286 epoch 5 - iter 28/73 - loss 0.66148098 - samples/sec: 17.69 - lr: 0.020000\n",
+      "2021-09-21 21:31:25,693 epoch 5 - iter 35/73 - loss 0.66027235 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 21:31:26,068 epoch 5 - iter 42/73 - loss 0.65179422 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 21:31:26,480 epoch 5 - iter 49/73 - loss 0.65316915 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 21:31:26,904 epoch 5 - iter 56/73 - loss 0.65808804 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 21:31:27,319 epoch 5 - iter 63/73 - loss 0.65485703 - samples/sec: 16.87 - lr: 0.020000\n",
+      "2021-09-21 21:31:27,739 epoch 5 - iter 70/73 - loss 0.65182459 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 21:31:27,899 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:27,900 EPOCH 5 done: loss 0.6544 - lr 0.0200000\n",
+      "2021-09-21 21:31:28,160 DEV : loss 0.5032077431678772 - score 0.125\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:31:28,161 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:31:28,163 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:28,618 epoch 6 - iter 7/73 - loss 0.65898942 - samples/sec: 16.11 - lr: 0.010000\n",
+      "2021-09-21 21:31:29,021 epoch 6 - iter 14/73 - loss 0.64375247 - samples/sec: 17.40 - lr: 0.010000\n",
+      "2021-09-21 21:31:29,399 epoch 6 - iter 21/73 - loss 0.64187612 - samples/sec: 18.54 - lr: 0.010000\n",
+      "2021-09-21 21:31:29,768 epoch 6 - iter 28/73 - loss 0.64341848 - samples/sec: 18.99 - lr: 0.010000\n",
+      "2021-09-21 21:31:30,122 epoch 6 - iter 35/73 - loss 0.64052225 - samples/sec: 19.81 - lr: 0.010000\n",
+      "2021-09-21 21:31:30,483 epoch 6 - iter 42/73 - loss 0.64275572 - samples/sec: 19.42 - lr: 0.010000\n",
+      "2021-09-21 21:31:30,873 epoch 6 - iter 49/73 - loss 0.64476936 - samples/sec: 17.99 - lr: 0.010000\n",
+      "2021-09-21 21:31:31,221 epoch 6 - iter 56/73 - loss 0.64340135 - samples/sec: 20.17 - lr: 0.010000\n",
+      "2021-09-21 21:31:31,628 epoch 6 - iter 63/73 - loss 0.63975915 - samples/sec: 17.21 - lr: 0.010000\n",
+      "2021-09-21 21:31:32,016 epoch 6 - iter 70/73 - loss 0.63600976 - samples/sec: 18.08 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:26,761 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:26,761 EPOCH 6 done: loss 0.6373 - lr 0.0200000\n",
-      "2021-09-08 01:43:26,979 DEV : loss 0.478633314371109 - score 0.125\n",
-      "2021-09-08 01:43:26,981 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:31:32,188 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:32,188 EPOCH 6 done: loss 0.6360 - lr 0.0100000\n",
+      "2021-09-21 21:31:32,461 DEV : loss 0.4766077995300293 - score 0.125\n",
+      "2021-09-21 21:31:32,461 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:31:32,464 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:32,876 epoch 7 - iter 7/73 - loss 0.64191643 - samples/sec: 18.27 - lr: 0.010000\n",
+      "2021-09-21 21:31:33,291 epoch 7 - iter 14/73 - loss 0.61678290 - samples/sec: 16.88 - lr: 0.010000\n",
+      "2021-09-21 21:31:33,642 epoch 7 - iter 21/73 - loss 0.61940096 - samples/sec: 19.96 - lr: 0.010000\n",
+      "2021-09-21 21:31:34,041 epoch 7 - iter 28/73 - loss 0.62041435 - samples/sec: 17.57 - lr: 0.010000\n",
+      "2021-09-21 21:31:34,432 epoch 7 - iter 35/73 - loss 0.63352354 - samples/sec: 17.93 - lr: 0.010000\n",
+      "2021-09-21 21:31:34,830 epoch 7 - iter 42/73 - loss 0.64243401 - samples/sec: 17.62 - lr: 0.010000\n",
+      "2021-09-21 21:31:35,205 epoch 7 - iter 49/73 - loss 0.64470204 - samples/sec: 18.72 - lr: 0.010000\n",
+      "2021-09-21 21:31:35,569 epoch 7 - iter 56/73 - loss 0.64695554 - samples/sec: 19.27 - lr: 0.010000\n",
+      "2021-09-21 21:31:35,977 epoch 7 - iter 63/73 - loss 0.64464587 - samples/sec: 17.15 - lr: 0.010000\n",
+      "2021-09-21 21:31:36,362 epoch 7 - iter 70/73 - loss 0.64337603 - samples/sec: 18.22 - lr: 0.010000\n",
+      "2021-09-21 21:31:36,521 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:36,522 EPOCH 7 done: loss 0.6443 - lr 0.0100000\n",
+      "2021-09-21 21:31:36,760 DEV : loss 0.5121702551841736 - score 0.125\n",
+      "2021-09-21 21:31:36,761 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:31:36,763 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:37,184 epoch 8 - iter 7/73 - loss 0.64052463 - samples/sec: 18.19 - lr: 0.010000\n",
+      "2021-09-21 21:31:37,567 epoch 8 - iter 14/73 - loss 0.63705850 - samples/sec: 18.30 - lr: 0.010000\n",
+      "2021-09-21 21:31:37,943 epoch 8 - iter 21/73 - loss 0.63751211 - samples/sec: 18.63 - lr: 0.010000\n",
+      "2021-09-21 21:31:38,362 epoch 8 - iter 28/73 - loss 0.63150768 - samples/sec: 16.75 - lr: 0.010000\n",
+      "2021-09-21 21:31:38,751 epoch 8 - iter 35/73 - loss 0.63560494 - samples/sec: 18.02 - lr: 0.010000\n",
+      "2021-09-21 21:31:39,156 epoch 8 - iter 42/73 - loss 0.63867804 - samples/sec: 17.31 - lr: 0.010000\n",
+      "2021-09-21 21:31:39,563 epoch 8 - iter 49/73 - loss 0.64070028 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 21:31:39,997 epoch 8 - iter 56/73 - loss 0.63982983 - samples/sec: 16.18 - lr: 0.010000\n",
+      "2021-09-21 21:31:40,400 epoch 8 - iter 63/73 - loss 0.64321593 - samples/sec: 17.36 - lr: 0.010000\n",
+      "2021-09-21 21:31:40,746 epoch 8 - iter 70/73 - loss 0.64451260 - samples/sec: 20.31 - lr: 0.010000\n",
+      "2021-09-21 21:31:40,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:40,938 EPOCH 8 done: loss 0.6447 - lr 0.0100000\n",
+      "2021-09-21 21:31:41,190 DEV : loss 0.5046018958091736 - score 0.125\n",
+      "2021-09-21 21:31:41,191 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:31:41,193 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:41,641 epoch 9 - iter 7/73 - loss 0.63919005 - samples/sec: 16.96 - lr: 0.010000\n",
+      "2021-09-21 21:31:42,051 epoch 9 - iter 14/73 - loss 0.64670874 - samples/sec: 17.10 - lr: 0.010000\n",
+      "2021-09-21 21:31:42,445 epoch 9 - iter 21/73 - loss 0.64352611 - samples/sec: 17.77 - lr: 0.010000\n",
+      "2021-09-21 21:31:42,847 epoch 9 - iter 28/73 - loss 0.63723836 - samples/sec: 17.46 - lr: 0.010000\n",
+      "2021-09-21 21:31:43,219 epoch 9 - iter 35/73 - loss 0.63611946 - samples/sec: 18.82 - lr: 0.010000\n",
+      "2021-09-21 21:31:43,660 epoch 9 - iter 42/73 - loss 0.63259800 - samples/sec: 15.92 - lr: 0.010000\n",
+      "2021-09-21 21:31:44,044 epoch 9 - iter 49/73 - loss 0.62881292 - samples/sec: 18.25 - lr: 0.010000\n",
+      "2021-09-21 21:31:44,429 epoch 9 - iter 56/73 - loss 0.63046732 - samples/sec: 18.21 - lr: 0.010000\n",
+      "2021-09-21 21:31:44,857 epoch 9 - iter 63/73 - loss 0.62974879 - samples/sec: 16.38 - lr: 0.010000\n",
+      "2021-09-21 21:31:45,279 epoch 9 - iter 70/73 - loss 0.63250785 - samples/sec: 16.60 - lr: 0.010000\n",
+      "2021-09-21 21:31:45,446 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:45,446 EPOCH 9 done: loss 0.6325 - lr 0.0100000\n",
+      "2021-09-21 21:31:45,692 DEV : loss 0.48676973581314087 - score 0.25\n",
+      "2021-09-21 21:31:45,695 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:43:31,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:31,656 epoch 7 - iter 7/73 - loss 0.64603736 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:43:32,066 epoch 7 - iter 14/73 - loss 0.63977477 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:43:32,452 epoch 7 - iter 21/73 - loss 0.64020247 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:43:32,803 epoch 7 - iter 28/73 - loss 0.63493500 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 01:43:33,173 epoch 7 - iter 35/73 - loss 0.63404234 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:43:33,543 epoch 7 - iter 42/73 - loss 0.63471842 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 01:43:33,917 epoch 7 - iter 49/73 - loss 0.63579395 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 01:43:34,284 epoch 7 - iter 56/73 - loss 0.63481663 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:43:34,661 epoch 7 - iter 63/73 - loss 0.63388169 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 01:43:35,037 epoch 7 - iter 70/73 - loss 0.63501128 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 01:43:35,186 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:35,186 EPOCH 7 done: loss 0.6348 - lr 0.0200000\n",
-      "2021-09-08 01:43:35,393 DEV : loss 0.4505234658718109 - score 0.0\n",
-      "2021-09-08 01:43:35,394 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:43:35,395 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:35,801 epoch 8 - iter 7/73 - loss 0.63388011 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 01:43:36,106 epoch 8 - iter 14/73 - loss 0.62187641 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:43:36,409 epoch 8 - iter 21/73 - loss 0.63714576 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:43:36,712 epoch 8 - iter 28/73 - loss 0.63662268 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 01:43:37,026 epoch 8 - iter 35/73 - loss 0.64267277 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 01:43:37,345 epoch 8 - iter 42/73 - loss 0.64074776 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 01:43:37,660 epoch 8 - iter 49/73 - loss 0.64206437 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 01:43:37,985 epoch 8 - iter 56/73 - loss 0.64378812 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:43:38,295 epoch 8 - iter 63/73 - loss 0.63917938 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:43:38,602 epoch 8 - iter 70/73 - loss 0.63879823 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:43:38,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:38,736 EPOCH 8 done: loss 0.6391 - lr 0.0200000\n",
-      "2021-09-08 01:43:38,968 DEV : loss 0.43890005350112915 - score 0.0\n",
-      "2021-09-08 01:43:38,969 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:43:39,048 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:39,368 epoch 9 - iter 7/73 - loss 0.66543335 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 01:43:39,670 epoch 9 - iter 14/73 - loss 0.65656097 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:43:39,976 epoch 9 - iter 21/73 - loss 0.64649212 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:43:40,291 epoch 9 - iter 28/73 - loss 0.65094666 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 01:43:40,601 epoch 9 - iter 35/73 - loss 0.65180656 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 01:43:40,906 epoch 9 - iter 42/73 - loss 0.64906631 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:43:41,208 epoch 9 - iter 49/73 - loss 0.64815906 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:43:41,530 epoch 9 - iter 56/73 - loss 0.64546476 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 01:43:41,855 epoch 9 - iter 63/73 - loss 0.64310890 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:43:42,164 epoch 9 - iter 70/73 - loss 0.64283636 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 01:43:42,296 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:42,296 EPOCH 9 done: loss 0.6424 - lr 0.0200000\n",
-      "2021-09-08 01:43:42,539 DEV : loss 0.5190622806549072 - score 0.0\n",
-      "2021-09-08 01:43:42,539 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:43:42,617 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:42,935 epoch 10 - iter 7/73 - loss 0.64193959 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 01:43:43,236 epoch 10 - iter 14/73 - loss 0.63651942 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 01:43:43,541 epoch 10 - iter 21/73 - loss 0.63356752 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:43:43,853 epoch 10 - iter 28/73 - loss 0.63345735 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 01:43:44,163 epoch 10 - iter 35/73 - loss 0.63858930 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 01:43:44,470 epoch 10 - iter 42/73 - loss 0.63446195 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:43:44,781 epoch 10 - iter 49/73 - loss 0.63891142 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:43:45,085 epoch 10 - iter 56/73 - loss 0.64131086 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 01:43:45,411 epoch 10 - iter 63/73 - loss 0.64055551 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 01:43:45,739 epoch 10 - iter 70/73 - loss 0.63983752 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 01:43:45,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:45,880 EPOCH 10 done: loss 0.6392 - lr 0.0200000\n",
-      "2021-09-08 01:43:46,053 DEV : loss 0.5057432055473328 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:43:46,054 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:43:49,848 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:49,849 Testing using best model ...\n",
-      "2021-09-08 01:43:49,907 loading file None/best-model.pt\n",
+      "2021-09-21 21:31:49,696 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:50,015 epoch 10 - iter 7/73 - loss 0.62222958 - samples/sec: 23.08 - lr: 0.010000\n",
+      "2021-09-21 21:31:50,315 epoch 10 - iter 14/73 - loss 0.62985291 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 21:31:50,619 epoch 10 - iter 21/73 - loss 0.63404395 - samples/sec: 23.09 - lr: 0.010000\n",
+      "2021-09-21 21:31:50,919 epoch 10 - iter 28/73 - loss 0.63291188 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 21:31:51,219 epoch 10 - iter 35/73 - loss 0.62641726 - samples/sec: 23.41 - lr: 0.010000\n",
+      "2021-09-21 21:31:51,520 epoch 10 - iter 42/73 - loss 0.62665153 - samples/sec: 23.27 - lr: 0.010000\n",
+      "2021-09-21 21:31:51,819 epoch 10 - iter 49/73 - loss 0.62509815 - samples/sec: 23.46 - lr: 0.010000\n",
+      "2021-09-21 21:31:52,122 epoch 10 - iter 56/73 - loss 0.62838598 - samples/sec: 23.17 - lr: 0.010000\n",
+      "2021-09-21 21:31:52,422 epoch 10 - iter 63/73 - loss 0.62702962 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 21:31:52,721 epoch 10 - iter 70/73 - loss 0.62644700 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 21:31:52,852 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:52,852 EPOCH 10 done: loss 0.6269 - lr 0.0100000\n",
+      "2021-09-21 21:31:52,996 DEV : loss 0.4365270733833313 - score 0.25\n",
+      "2021-09-21 21:31:52,996 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:32:14,838 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:14,882 Testing using best model ...\n",
+      "2021-09-21 21:32:14,883 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:43:58,114 \t0.1111\n",
-      "2021-09-08 01:43:58,115 \n",
+      "2021-09-21 21:32:21,360 \t0.1111\n",
+      "2021-09-21 21:32:21,361 \n",
       "Results:\n",
       "- F-score (micro) 0.1111\n",
-      "- F-score (macro) 0.0247\n",
+      "- F-score (macro) 0.0741\n",
       "- Accuracy 0.1111\n",
       "\n",
       "By class:\n",
@@ -6891,45 +6914,45 @@
       "\n",
       "                                            the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
       "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         0\n",
-      "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         1\n",
-      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         1\n",
+      "                         a strong positive emotion of regard and affection     1.0000    0.5000    0.6667         2\n",
+      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         2\n",
       "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
       "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         2\n",
-      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         2\n",
-      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         2\n",
-      "                    emotions experienced when not in a state of well-being     0.1250    1.0000    0.2222         1\n",
+      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         1\n",
+      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         0\n",
+      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         2\n",
       "\n",
       "                                                                 micro avg     0.1111    0.1111    0.1111         9\n",
-      "                                                                 macro avg     0.0139    0.1111    0.0247         9\n",
-      "                                                              weighted avg     0.0139    0.1111    0.0247         9\n",
+      "                                                                 macro avg     0.1111    0.0556    0.0741         9\n",
+      "                                                              weighted avg     0.2222    0.1111    0.1481         9\n",
       "                                                               samples avg     0.1111    0.1111    0.1111         9\n",
       "\n",
-      "2021-09-08 01:43:58,115 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:32:21,362 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:51,927 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 21:33:39,558 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:44:55,872 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:33:43,491 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 35471.63it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 48826.37it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:55,877 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
-      "2021-09-08 01:44:55,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:55,899 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:33:43,495 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
+      "2021-09-21 21:33:43,504 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:43,506 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7242,25 +7265,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:55,900 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:55,900 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:44:55,900 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:55,901 Parameters:\n",
-      "2021-09-08 01:44:55,901  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:44:55,901  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:44:55,902  - patience: \"3\"\n",
-      "2021-09-08 01:44:55,902  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:44:55,902  - max_epochs: \"10\"\n",
-      "2021-09-08 01:44:55,903  - shuffle: \"True\"\n",
-      "2021-09-08 01:44:55,903  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:44:55,903  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:44:55,904 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:55,904 Model training base path: \"None\"\n",
-      "2021-09-08 01:44:55,904 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:55,905 Device: cuda:1\n",
-      "2021-09-08 01:44:55,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:55,905 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:44:55,912 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:33:43,507 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:43,507 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:33:43,507 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:43,508 Parameters:\n",
+      "2021-09-21 21:33:43,508  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:33:43,508  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:33:43,509  - patience: \"3\"\n",
+      "2021-09-21 21:33:43,509  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:33:43,509  - max_epochs: \"10\"\n",
+      "2021-09-21 21:33:43,509  - shuffle: \"True\"\n",
+      "2021-09-21 21:33:43,510  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:33:43,510  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:33:43,510 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:43,510 Model training base path: \"None\"\n",
+      "2021-09-21 21:33:43,511 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:43,511 Device: cuda:0\n",
+      "2021-09-21 21:33:43,511 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:43,512 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:33:43,518 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7274,197 +7297,197 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:56,296 epoch 1 - iter 7/73 - loss 0.34384484 - samples/sec: 19.29 - lr: 0.020000\n",
-      "2021-09-08 01:44:56,659 epoch 1 - iter 14/73 - loss 0.47995270 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:44:57,029 epoch 1 - iter 21/73 - loss 0.54895419 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:44:57,404 epoch 1 - iter 28/73 - loss 0.55421793 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:44:57,764 epoch 1 - iter 35/73 - loss 0.55342730 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 01:44:58,137 epoch 1 - iter 42/73 - loss 0.53763504 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 01:44:58,491 epoch 1 - iter 49/73 - loss 0.53983209 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 01:44:58,867 epoch 1 - iter 56/73 - loss 0.54910255 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:44:59,240 epoch 1 - iter 63/73 - loss 0.57196225 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 01:44:59,592 epoch 1 - iter 70/73 - loss 0.57120637 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:44:59,743 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:59,744 EPOCH 1 done: loss 0.5513 - lr 0.0200000\n",
-      "2021-09-08 01:45:00,071 DEV : loss 0.6199005246162415 - score 0.25\n",
-      "2021-09-08 01:45:00,072 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:33:43,847 epoch 1 - iter 7/73 - loss 0.35006697 - samples/sec: 22.28 - lr: 0.020000\n",
+      "2021-09-21 21:33:44,164 epoch 1 - iter 14/73 - loss 0.59754091 - samples/sec: 22.13 - lr: 0.020000\n",
+      "2021-09-21 21:33:44,486 epoch 1 - iter 21/73 - loss 0.59855393 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 21:33:44,807 epoch 1 - iter 28/73 - loss 0.57816335 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 21:33:45,115 epoch 1 - iter 35/73 - loss 0.51878652 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 21:33:45,440 epoch 1 - iter 42/73 - loss 0.54296132 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 21:33:45,761 epoch 1 - iter 49/73 - loss 0.59832714 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 21:33:46,086 epoch 1 - iter 56/73 - loss 0.58476609 - samples/sec: 21.62 - lr: 0.020000\n",
+      "2021-09-21 21:33:46,407 epoch 1 - iter 63/73 - loss 0.58305293 - samples/sec: 21.83 - lr: 0.020000\n",
+      "2021-09-21 21:33:46,732 epoch 1 - iter 70/73 - loss 0.57693085 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 21:33:46,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:46,871 EPOCH 1 done: loss 0.5670 - lr 0.0200000\n",
+      "2021-09-21 21:33:47,010 DEV : loss 0.8574800491333008 - score 0.125\n",
+      "2021-09-21 21:33:47,011 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:04,119 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:04,480 epoch 2 - iter 7/73 - loss 0.92753983 - samples/sec: 20.71 - lr: 0.020000\n",
-      "2021-09-08 01:45:04,826 epoch 2 - iter 14/73 - loss 0.85242321 - samples/sec: 20.31 - lr: 0.020000\n",
-      "2021-09-08 01:45:05,183 epoch 2 - iter 21/73 - loss 0.84304258 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:45:05,521 epoch 2 - iter 28/73 - loss 0.82189026 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:45:05,852 epoch 2 - iter 35/73 - loss 0.80672281 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 01:45:06,180 epoch 2 - iter 42/73 - loss 0.79522264 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:45:06,496 epoch 2 - iter 49/73 - loss 0.77407063 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 01:45:06,826 epoch 2 - iter 56/73 - loss 0.76625015 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:45:07,150 epoch 2 - iter 63/73 - loss 0.76075495 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:45:07,486 epoch 2 - iter 70/73 - loss 0.75643355 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 01:45:07,629 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:07,630 EPOCH 2 done: loss 0.7549 - lr 0.0200000\n",
-      "2021-09-08 01:45:07,760 DEV : loss 0.5975160002708435 - score 0.25\n",
-      "2021-09-08 01:45:07,761 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:33:51,033 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:51,378 epoch 2 - iter 7/73 - loss 0.90672737 - samples/sec: 21.32 - lr: 0.020000\n",
+      "2021-09-21 21:33:51,704 epoch 2 - iter 14/73 - loss 0.81698565 - samples/sec: 21.49 - lr: 0.020000\n",
+      "2021-09-21 21:33:52,027 epoch 2 - iter 21/73 - loss 0.77647743 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 21:33:52,350 epoch 2 - iter 28/73 - loss 0.74626078 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 21:33:52,675 epoch 2 - iter 35/73 - loss 0.74646311 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 21:33:52,985 epoch 2 - iter 42/73 - loss 0.72368923 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 21:33:53,298 epoch 2 - iter 49/73 - loss 0.71832793 - samples/sec: 22.44 - lr: 0.020000\n",
+      "2021-09-21 21:33:53,599 epoch 2 - iter 56/73 - loss 0.71102621 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:33:53,898 epoch 2 - iter 63/73 - loss 0.70078773 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:33:54,196 epoch 2 - iter 70/73 - loss 0.69673583 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 21:33:54,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:54,327 EPOCH 2 done: loss 0.6939 - lr 0.0200000\n",
+      "2021-09-21 21:33:55,634 DEV : loss 0.4205796718597412 - score 0.125\n",
+      "2021-09-21 21:33:55,635 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:11,684 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:12,019 epoch 3 - iter 7/73 - loss 0.66084941 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 01:45:12,337 epoch 3 - iter 14/73 - loss 0.65420273 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:45:12,658 epoch 3 - iter 21/73 - loss 0.65332291 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 01:45:12,980 epoch 3 - iter 28/73 - loss 0.65440244 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 01:45:13,296 epoch 3 - iter 35/73 - loss 0.65666764 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:45:13,619 epoch 3 - iter 42/73 - loss 0.65700602 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 01:45:13,936 epoch 3 - iter 49/73 - loss 0.65481719 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:45:14,255 epoch 3 - iter 56/73 - loss 0.65249650 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 01:45:14,570 epoch 3 - iter 63/73 - loss 0.65257421 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 01:45:14,917 epoch 3 - iter 70/73 - loss 0.65548844 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:15,075 EPOCH 3 done: loss 0.6546 - lr 0.0200000\n",
-      "2021-09-08 01:45:15,276 DEV : loss 0.4714202582836151 - score 0.125\n",
-      "2021-09-08 01:45:15,276 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:45:15,279 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:15,691 epoch 4 - iter 7/73 - loss 0.63046761 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 01:45:16,085 epoch 4 - iter 14/73 - loss 0.64695670 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:45:16,404 epoch 4 - iter 21/73 - loss 0.63056739 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 01:45:16,724 epoch 4 - iter 28/73 - loss 0.64248861 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 01:45:17,047 epoch 4 - iter 35/73 - loss 0.65020433 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 01:45:17,370 epoch 4 - iter 42/73 - loss 0.66257059 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:45:17,693 epoch 4 - iter 49/73 - loss 0.65860851 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 01:45:18,011 epoch 4 - iter 56/73 - loss 0.65612885 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:45:18,326 epoch 4 - iter 63/73 - loss 0.65317420 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 01:45:18,652 epoch 4 - iter 70/73 - loss 0.65059092 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:45:18,790 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:18,790 EPOCH 4 done: loss 0.6502 - lr 0.0200000\n",
-      "2021-09-08 01:45:19,027 DEV : loss 0.4908803105354309 - score 0.0\n",
-      "2021-09-08 01:45:19,027 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:45:19,110 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:19,456 epoch 5 - iter 7/73 - loss 0.67672285 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:45:19,821 epoch 5 - iter 14/73 - loss 0.66724110 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,181 epoch 5 - iter 21/73 - loss 0.65788987 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,560 epoch 5 - iter 28/73 - loss 0.65177662 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,985 epoch 5 - iter 35/73 - loss 0.64733675 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,426 epoch 5 - iter 42/73 - loss 0.65541098 - samples/sec: 15.91 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,858 epoch 5 - iter 49/73 - loss 0.65941961 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,295 epoch 5 - iter 56/73 - loss 0.66069675 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,728 epoch 5 - iter 63/73 - loss 0.65678527 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,141 epoch 5 - iter 70/73 - loss 0.65680711 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,328 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:23,329 EPOCH 5 done: loss 0.6572 - lr 0.0200000\n",
-      "2021-09-08 01:45:23,554 DEV : loss 0.47426941990852356 - score 0.25\n",
-      "2021-09-08 01:45:23,555 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:33:59,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:00,168 epoch 3 - iter 7/73 - loss 0.64465838 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 21:34:00,555 epoch 3 - iter 14/73 - loss 0.64246390 - samples/sec: 18.12 - lr: 0.020000\n",
+      "2021-09-21 21:34:00,959 epoch 3 - iter 21/73 - loss 0.64116665 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 21:34:01,375 epoch 3 - iter 28/73 - loss 0.64127005 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 21:34:01,790 epoch 3 - iter 35/73 - loss 0.64166665 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:34:02,209 epoch 3 - iter 42/73 - loss 0.64345788 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 21:34:02,627 epoch 3 - iter 49/73 - loss 0.64754528 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 21:34:03,026 epoch 3 - iter 56/73 - loss 0.64928865 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 21:34:03,435 epoch 3 - iter 63/73 - loss 0.64097379 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:34:03,816 epoch 3 - iter 70/73 - loss 0.64224150 - samples/sec: 18.43 - lr: 0.020000\n",
+      "2021-09-21 21:34:03,985 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:03,985 EPOCH 3 done: loss 0.6436 - lr 0.0200000\n",
+      "2021-09-21 21:34:04,219 DEV : loss 0.41230344772338867 - score 0.125\n",
+      "2021-09-21 21:34:04,220 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:27,451 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:27,858 epoch 6 - iter 7/73 - loss 0.68793635 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 01:45:28,241 epoch 6 - iter 14/73 - loss 0.68061173 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:45:28,627 epoch 6 - iter 21/73 - loss 0.66449916 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:45:28,997 epoch 6 - iter 28/73 - loss 0.65594322 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 01:45:29,403 epoch 6 - iter 35/73 - loss 0.65262372 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 01:45:29,779 epoch 6 - iter 42/73 - loss 0.65695213 - samples/sec: 18.65 - lr: 0.020000\n",
-      "2021-09-08 01:45:30,161 epoch 6 - iter 49/73 - loss 0.66059225 - samples/sec: 18.41 - lr: 0.020000\n",
-      "2021-09-08 01:45:30,535 epoch 6 - iter 56/73 - loss 0.66267919 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:45:30,934 epoch 6 - iter 63/73 - loss 0.66217417 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:45:31,326 epoch 6 - iter 70/73 - loss 0.66021926 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:45:31,495 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:34:08,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:08,716 epoch 4 - iter 7/73 - loss 0.61185265 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 21:34:09,144 epoch 4 - iter 14/73 - loss 0.62410769 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:34:09,569 epoch 4 - iter 21/73 - loss 0.61918439 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:34:09,970 epoch 4 - iter 28/73 - loss 0.61970790 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 21:34:10,380 epoch 4 - iter 35/73 - loss 0.63188383 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 21:34:10,785 epoch 4 - iter 42/73 - loss 0.61829918 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 21:34:11,181 epoch 4 - iter 49/73 - loss 0.62018720 - samples/sec: 17.69 - lr: 0.020000\n",
+      "2021-09-21 21:34:11,569 epoch 4 - iter 56/73 - loss 0.61772827 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 21:34:11,967 epoch 4 - iter 63/73 - loss 0.62968625 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 21:34:12,376 epoch 4 - iter 70/73 - loss 0.63297458 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:34:12,539 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:12,539 EPOCH 4 done: loss 0.6355 - lr 0.0200000\n",
+      "2021-09-21 21:34:12,811 DEV : loss 0.5378357768058777 - score 0.125\n",
+      "2021-09-21 21:34:12,813 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:34:12,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:13,239 epoch 5 - iter 7/73 - loss 0.68580789 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 21:34:13,658 epoch 5 - iter 14/73 - loss 0.67340912 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 21:34:14,058 epoch 5 - iter 21/73 - loss 0.66167300 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:34:14,443 epoch 5 - iter 28/73 - loss 0.65732667 - samples/sec: 18.20 - lr: 0.020000\n",
+      "2021-09-21 21:34:14,840 epoch 5 - iter 35/73 - loss 0.66103690 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 21:34:15,243 epoch 5 - iter 42/73 - loss 0.65751107 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 21:34:15,640 epoch 5 - iter 49/73 - loss 0.65572452 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:34:16,062 epoch 5 - iter 56/73 - loss 0.65555269 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 21:34:16,470 epoch 5 - iter 63/73 - loss 0.65295127 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:34:16,860 epoch 5 - iter 70/73 - loss 0.65067787 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 21:34:17,035 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:17,035 EPOCH 5 done: loss 0.6521 - lr 0.0200000\n",
+      "2021-09-21 21:34:17,275 DEV : loss 0.5072906613349915 - score 0.125\n",
+      "2021-09-21 21:34:17,276 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:34:17,279 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:17,702 epoch 6 - iter 7/73 - loss 0.64960836 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:34:18,086 epoch 6 - iter 14/73 - loss 0.65922064 - samples/sec: 18.22 - lr: 0.020000\n",
+      "2021-09-21 21:34:18,491 epoch 6 - iter 21/73 - loss 0.64686874 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 21:34:18,894 epoch 6 - iter 28/73 - loss 0.63689160 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 21:34:19,294 epoch 6 - iter 35/73 - loss 0.63908932 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 21:34:19,692 epoch 6 - iter 42/73 - loss 0.64213913 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 21:34:20,095 epoch 6 - iter 49/73 - loss 0.64413485 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:34:20,430 epoch 6 - iter 56/73 - loss 0.64217916 - samples/sec: 20.89 - lr: 0.020000\n",
+      "2021-09-21 21:34:20,737 epoch 6 - iter 63/73 - loss 0.64181276 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 21:34:21,044 epoch 6 - iter 70/73 - loss 0.64292878 - samples/sec: 22.86 - lr: 0.020000\n",
+      "2021-09-21 21:34:21,177 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:45:31,495 EPOCH 6 done: loss 0.6592 - lr 0.0200000\n",
-      "2021-09-08 01:45:31,693 DEV : loss 0.46383607387542725 - score 0.25\n",
-      "2021-09-08 01:45:31,694 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:45:38,594 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:38,983 epoch 7 - iter 7/73 - loss 0.63286555 - samples/sec: 19.09 - lr: 0.020000\n",
-      "2021-09-08 01:45:39,323 epoch 7 - iter 14/73 - loss 0.64118510 - samples/sec: 20.66 - lr: 0.020000\n",
-      "2021-09-08 01:45:39,651 epoch 7 - iter 21/73 - loss 0.64617351 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 01:45:39,974 epoch 7 - iter 28/73 - loss 0.65015821 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 01:45:40,296 epoch 7 - iter 35/73 - loss 0.64502523 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 01:45:40,624 epoch 7 - iter 42/73 - loss 0.64395458 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:45:40,939 epoch 7 - iter 49/73 - loss 0.64996221 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 01:45:41,251 epoch 7 - iter 56/73 - loss 0.64532085 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 01:45:41,557 epoch 7 - iter 63/73 - loss 0.64072998 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 01:45:41,864 epoch 7 - iter 70/73 - loss 0.64570551 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:45:41,998 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:41,999 EPOCH 7 done: loss 0.6469 - lr 0.0200000\n",
-      "2021-09-08 01:45:42,253 DEV : loss 0.44627976417541504 - score 0.25\n",
-      "2021-09-08 01:45:42,254 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:34:21,177 EPOCH 6 done: loss 0.6424 - lr 0.0200000\n",
+      "2021-09-21 21:34:21,429 DEV : loss 0.4405454099178314 - score 0.125\n",
+      "2021-09-21 21:34:21,430 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:34:21,512 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:21,835 epoch 7 - iter 7/73 - loss 0.66835338 - samples/sec: 22.76 - lr: 0.020000\n",
+      "2021-09-21 21:34:22,142 epoch 7 - iter 14/73 - loss 0.63594925 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 21:34:22,443 epoch 7 - iter 21/73 - loss 0.63497459 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 21:34:22,745 epoch 7 - iter 28/73 - loss 0.64310668 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 21:34:23,045 epoch 7 - iter 35/73 - loss 0.64114941 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 21:34:23,350 epoch 7 - iter 42/73 - loss 0.63742556 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 21:34:23,649 epoch 7 - iter 49/73 - loss 0.64343768 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:34:23,948 epoch 7 - iter 56/73 - loss 0.64244871 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 21:34:24,249 epoch 7 - iter 63/73 - loss 0.64739165 - samples/sec: 23.35 - lr: 0.020000\n",
+      "2021-09-21 21:34:24,550 epoch 7 - iter 70/73 - loss 0.64733744 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 21:34:24,685 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:24,685 EPOCH 7 done: loss 0.6469 - lr 0.0200000\n",
+      "2021-09-21 21:34:24,895 DEV : loss 0.47915610671043396 - score 0.125\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:34:24,896 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:34:24,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:25,279 epoch 8 - iter 7/73 - loss 0.61772681 - samples/sec: 22.82 - lr: 0.010000\n",
+      "2021-09-21 21:34:25,581 epoch 8 - iter 14/73 - loss 0.61643560 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:34:25,879 epoch 8 - iter 21/73 - loss 0.63681775 - samples/sec: 23.48 - lr: 0.010000\n",
+      "2021-09-21 21:34:26,178 epoch 8 - iter 28/73 - loss 0.64276106 - samples/sec: 23.50 - lr: 0.010000\n",
+      "2021-09-21 21:34:26,481 epoch 8 - iter 35/73 - loss 0.63707670 - samples/sec: 23.16 - lr: 0.010000\n",
+      "2021-09-21 21:34:26,779 epoch 8 - iter 42/73 - loss 0.63566218 - samples/sec: 23.50 - lr: 0.010000\n",
+      "2021-09-21 21:34:27,078 epoch 8 - iter 49/73 - loss 0.63442401 - samples/sec: 23.49 - lr: 0.010000\n",
+      "2021-09-21 21:34:27,379 epoch 8 - iter 56/73 - loss 0.63446469 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:34:27,678 epoch 8 - iter 63/73 - loss 0.63612194 - samples/sec: 23.44 - lr: 0.010000\n",
+      "2021-09-21 21:34:27,979 epoch 8 - iter 70/73 - loss 0.63495755 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:34:28,108 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:28,109 EPOCH 8 done: loss 0.6345 - lr 0.0100000\n",
+      "2021-09-21 21:34:28,247 DEV : loss 0.46050897240638733 - score 0.25\n",
+      "2021-09-21 21:34:28,248 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:47,977 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:48,302 epoch 8 - iter 7/73 - loss 0.69448127 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 01:45:48,608 epoch 8 - iter 14/73 - loss 0.65726106 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:45:48,910 epoch 8 - iter 21/73 - loss 0.65197034 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:45:49,220 epoch 8 - iter 28/73 - loss 0.65749129 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 01:45:49,523 epoch 8 - iter 35/73 - loss 0.65320725 - samples/sec: 23.12 - lr: 0.020000\n",
-      "2021-09-08 01:45:49,827 epoch 8 - iter 42/73 - loss 0.65582319 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 01:45:50,135 epoch 8 - iter 49/73 - loss 0.65746145 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:45:50,479 epoch 8 - iter 56/73 - loss 0.65380591 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 01:45:50,862 epoch 8 - iter 63/73 - loss 0.65168779 - samples/sec: 18.33 - lr: 0.020000\n",
-      "2021-09-08 01:45:51,236 epoch 8 - iter 70/73 - loss 0.65114012 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 01:45:51,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:51,402 EPOCH 8 done: loss 0.6493 - lr 0.0200000\n",
-      "2021-09-08 01:45:51,538 DEV : loss 0.476962149143219 - score 0.125\n",
-      "2021-09-08 01:45:51,539 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:45:51,541 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:51,871 epoch 9 - iter 7/73 - loss 0.66002030 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,196 epoch 9 - iter 14/73 - loss 0.64556522 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,524 epoch 9 - iter 21/73 - loss 0.64275942 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,850 epoch 9 - iter 28/73 - loss 0.64072109 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:45:53,174 epoch 9 - iter 35/73 - loss 0.63932885 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 01:45:53,524 epoch 9 - iter 42/73 - loss 0.64204378 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 01:45:53,876 epoch 9 - iter 49/73 - loss 0.64297141 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 01:45:54,200 epoch 9 - iter 56/73 - loss 0.64248894 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 01:45:54,535 epoch 9 - iter 63/73 - loss 0.64298841 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 01:45:54,860 epoch 9 - iter 70/73 - loss 0.64049289 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 01:45:55,003 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:55,003 EPOCH 9 done: loss 0.6396 - lr 0.0200000\n",
-      "2021-09-08 01:45:55,268 DEV : loss 0.5699548721313477 - score 0.125\n",
-      "2021-09-08 01:45:55,269 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:45:55,346 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:55,702 epoch 10 - iter 7/73 - loss 0.63458390 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 01:45:56,019 epoch 10 - iter 14/73 - loss 0.63857087 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:45:56,347 epoch 10 - iter 21/73 - loss 0.64033990 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:45:56,672 epoch 10 - iter 28/73 - loss 0.63260132 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 01:45:57,022 epoch 10 - iter 35/73 - loss 0.63872341 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 01:45:57,360 epoch 10 - iter 42/73 - loss 0.63819209 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 01:45:57,689 epoch 10 - iter 49/73 - loss 0.63896433 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 01:45:58,016 epoch 10 - iter 56/73 - loss 0.63989470 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:45:58,341 epoch 10 - iter 63/73 - loss 0.64093008 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 01:45:58,657 epoch 10 - iter 70/73 - loss 0.63842749 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 01:45:58,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:58,793 EPOCH 10 done: loss 0.6384 - lr 0.0200000\n",
-      "2021-09-08 01:45:58,960 DEV : loss 0.528999924659729 - score 0.125\n",
-      "2021-09-08 01:45:58,961 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:46:03,080 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:03,081 Testing using best model ...\n",
-      "2021-09-08 01:46:03,082 loading file None/best-model.pt\n",
+      "2021-09-21 21:34:32,240 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:32,689 epoch 9 - iter 7/73 - loss 0.64285843 - samples/sec: 16.23 - lr: 0.010000\n",
+      "2021-09-21 21:34:33,087 epoch 9 - iter 14/73 - loss 0.66579112 - samples/sec: 17.60 - lr: 0.010000\n",
+      "2021-09-21 21:34:33,486 epoch 9 - iter 21/73 - loss 0.65591906 - samples/sec: 17.58 - lr: 0.010000\n",
+      "2021-09-21 21:34:33,923 epoch 9 - iter 28/73 - loss 0.65180436 - samples/sec: 16.03 - lr: 0.010000\n",
+      "2021-09-21 21:34:34,306 epoch 9 - iter 35/73 - loss 0.65412135 - samples/sec: 18.31 - lr: 0.010000\n",
+      "2021-09-21 21:34:34,698 epoch 9 - iter 42/73 - loss 0.65297352 - samples/sec: 17.90 - lr: 0.010000\n",
+      "2021-09-21 21:34:35,101 epoch 9 - iter 49/73 - loss 0.65010901 - samples/sec: 17.41 - lr: 0.010000\n",
+      "2021-09-21 21:34:35,490 epoch 9 - iter 56/73 - loss 0.64661200 - samples/sec: 18.03 - lr: 0.010000\n",
+      "2021-09-21 21:34:35,880 epoch 9 - iter 63/73 - loss 0.64582573 - samples/sec: 17.97 - lr: 0.010000\n",
+      "2021-09-21 21:34:36,263 epoch 9 - iter 70/73 - loss 0.64128887 - samples/sec: 18.31 - lr: 0.010000\n",
+      "2021-09-21 21:34:36,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:36,433 EPOCH 9 done: loss 0.6401 - lr 0.0100000\n",
+      "2021-09-21 21:34:36,688 DEV : loss 0.4900439977645874 - score 0.125\n",
+      "2021-09-21 21:34:36,689 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:34:36,691 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:37,097 epoch 10 - iter 7/73 - loss 0.62473389 - samples/sec: 18.55 - lr: 0.010000\n",
+      "2021-09-21 21:34:37,500 epoch 10 - iter 14/73 - loss 0.64024101 - samples/sec: 17.40 - lr: 0.010000\n",
+      "2021-09-21 21:34:37,897 epoch 10 - iter 21/73 - loss 0.63482298 - samples/sec: 17.68 - lr: 0.010000\n",
+      "2021-09-21 21:34:38,270 epoch 10 - iter 28/73 - loss 0.64118222 - samples/sec: 18.77 - lr: 0.010000\n",
+      "2021-09-21 21:34:38,657 epoch 10 - iter 35/73 - loss 0.64293637 - samples/sec: 18.15 - lr: 0.010000\n",
+      "2021-09-21 21:34:39,018 epoch 10 - iter 42/73 - loss 0.64691965 - samples/sec: 19.39 - lr: 0.010000\n",
+      "2021-09-21 21:34:39,397 epoch 10 - iter 49/73 - loss 0.64644220 - samples/sec: 18.49 - lr: 0.010000\n",
+      "2021-09-21 21:34:39,809 epoch 10 - iter 56/73 - loss 0.64260590 - samples/sec: 17.03 - lr: 0.010000\n",
+      "2021-09-21 21:34:40,213 epoch 10 - iter 63/73 - loss 0.64367491 - samples/sec: 17.36 - lr: 0.010000\n",
+      "2021-09-21 21:34:40,618 epoch 10 - iter 70/73 - loss 0.64546783 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 21:34:40,782 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:40,783 EPOCH 10 done: loss 0.6463 - lr 0.0100000\n",
+      "2021-09-21 21:34:41,061 DEV : loss 0.5041202902793884 - score 0.125\n",
+      "2021-09-21 21:34:41,062 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:34:45,115 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:45,115 Testing using best model ...\n",
+      "2021-09-21 21:34:45,117 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:46:07,924 \t0.1111\n",
-      "2021-09-08 01:46:07,924 \n",
+      "2021-09-21 21:34:50,340 \t0.1111\n",
+      "2021-09-21 21:34:50,341 \n",
       "Results:\n",
       "- F-score (micro) 0.1111\n",
-      "- F-score (macro) 0.0222\n",
+      "- F-score (macro) 0.0247\n",
       "- Accuracy 0.1111\n",
       "\n",
       "By class:\n",
       "                                                                            precision    recall  f1-score   support\n",
       "\n",
-      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         2\n",
-      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         1\n",
-      "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         2\n",
-      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         1\n",
-      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
-      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         1\n",
-      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
+      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         1\n",
+      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         2\n",
+      "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         0\n",
+      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         0\n",
+      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         1\n",
+      "         a feeling that is oriented toward some real or supposed grievance     0.1250    1.0000    0.2222         1\n",
+      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         1\n",
       "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         1\n",
-      "                    emotions experienced when not in a state of well-being     0.1111    1.0000    0.2000         1\n",
+      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         2\n",
       "\n",
       "                                                                 micro avg     0.1111    0.1111    0.1111         9\n",
-      "                                                                 macro avg     0.0123    0.1111    0.0222         9\n",
-      "                                                              weighted avg     0.0123    0.1111    0.0222         9\n",
+      "                                                                 macro avg     0.0139    0.1111    0.0247         9\n",
+      "                                                              weighted avg     0.0139    0.1111    0.0247         9\n",
       "                                                               samples avg     0.1111    0.1111    0.1111         9\n",
       "\n",
-      "2021-09-08 01:46:07,925 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:04,088 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n"
+      "2021-09-21 21:34:50,341 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:45,136 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n"
      ]
     },
     {
@@ -7472,23 +7495,23 @@
      "output_type": "stream",
      "text": [
       "init TARS\n",
-      "2021-09-08 01:47:08,042 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:35:49,043 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 33255.94it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 42529.11it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:08,047 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
-      "2021-09-08 01:47:08,060 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:08,062 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:35:49,047 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
+      "2021-09-21 21:35:49,058 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:49,060 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7801,25 +7824,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:08,062 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:08,063 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:47:08,063 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:08,063 Parameters:\n",
-      "2021-09-08 01:47:08,064  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:47:08,064  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:47:08,064  - patience: \"3\"\n",
-      "2021-09-08 01:47:08,065  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:47:08,065  - max_epochs: \"10\"\n",
-      "2021-09-08 01:47:08,065  - shuffle: \"True\"\n",
-      "2021-09-08 01:47:08,066  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:47:08,066  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:47:08,066 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:08,067 Model training base path: \"None\"\n",
-      "2021-09-08 01:47:08,067 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:08,067 Device: cuda:1\n",
-      "2021-09-08 01:47:08,068 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:08,068 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:47:08,077 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:35:49,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:49,061 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:35:49,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:49,061 Parameters:\n",
+      "2021-09-21 21:35:49,062  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:35:49,062  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:35:49,062  - patience: \"3\"\n",
+      "2021-09-21 21:35:49,063  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:35:49,063  - max_epochs: \"10\"\n",
+      "2021-09-21 21:35:49,063  - shuffle: \"True\"\n",
+      "2021-09-21 21:35:49,063  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:35:49,064  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:35:49,064 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:49,064 Model training base path: \"None\"\n",
+      "2021-09-21 21:35:49,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:49,065 Device: cuda:0\n",
+      "2021-09-21 21:35:49,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:49,065 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:35:49,072 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7833,220 +7856,220 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:08,427 epoch 1 - iter 7/73 - loss 0.37050278 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 01:47:08,765 epoch 1 - iter 14/73 - loss 0.44212076 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 01:47:09,096 epoch 1 - iter 21/73 - loss 0.41895367 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 01:47:09,440 epoch 1 - iter 28/73 - loss 0.48445163 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 01:47:09,783 epoch 1 - iter 35/73 - loss 0.51096196 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 01:47:10,130 epoch 1 - iter 42/73 - loss 0.46898826 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:47:10,484 epoch 1 - iter 49/73 - loss 0.49033015 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 01:47:10,831 epoch 1 - iter 56/73 - loss 0.52509309 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,225 epoch 1 - iter 63/73 - loss 0.55627366 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,663 epoch 1 - iter 70/73 - loss 0.57252063 - samples/sec: 16.01 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,847 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,847 EPOCH 1 done: loss 0.5665 - lr 0.0200000\n",
-      "2021-09-08 01:47:12,231 DEV : loss 0.7782091498374939 - score 0.125\n",
-      "2021-09-08 01:47:12,232 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:47:19,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:20,414 epoch 2 - iter 7/73 - loss 0.74421622 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,813 epoch 2 - iter 14/73 - loss 0.76053019 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,210 epoch 2 - iter 21/73 - loss 0.76419422 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,579 epoch 2 - iter 28/73 - loss 0.75953978 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,921 epoch 2 - iter 35/73 - loss 0.74138973 - samples/sec: 20.52 - lr: 0.020000\n",
-      "2021-09-08 01:47:22,279 epoch 2 - iter 42/73 - loss 0.74387940 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 01:47:22,642 epoch 2 - iter 49/73 - loss 0.74409583 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:47:22,992 epoch 2 - iter 56/73 - loss 0.74749629 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 01:47:23,340 epoch 2 - iter 63/73 - loss 0.74771261 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 01:47:23,674 epoch 2 - iter 70/73 - loss 0.74361046 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 01:47:23,821 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:23,822 EPOCH 2 done: loss 0.7389 - lr 0.0200000\n",
-      "2021-09-08 01:47:24,054 DEV : loss 0.42425984144210815 - score 0.25\n",
-      "2021-09-08 01:47:24,055 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:35:49,398 epoch 1 - iter 7/73 - loss 0.31998413 - samples/sec: 22.47 - lr: 0.020000\n",
+      "2021-09-21 21:35:49,720 epoch 1 - iter 14/73 - loss 0.51357424 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 21:35:50,041 epoch 1 - iter 21/73 - loss 0.61155109 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 21:35:50,365 epoch 1 - iter 28/73 - loss 0.62775753 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 21:35:50,680 epoch 1 - iter 35/73 - loss 0.63252208 - samples/sec: 22.25 - lr: 0.020000\n",
+      "2021-09-21 21:35:50,992 epoch 1 - iter 42/73 - loss 0.63964106 - samples/sec: 22.51 - lr: 0.020000\n",
+      "2021-09-21 21:35:51,294 epoch 1 - iter 49/73 - loss 0.63704400 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 21:35:51,611 epoch 1 - iter 56/73 - loss 0.63495947 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 21:35:51,925 epoch 1 - iter 63/73 - loss 0.63135975 - samples/sec: 22.38 - lr: 0.020000\n",
+      "2021-09-21 21:35:52,233 epoch 1 - iter 70/73 - loss 0.63241449 - samples/sec: 22.73 - lr: 0.020000\n",
+      "2021-09-21 21:35:52,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:52,370 EPOCH 1 done: loss 0.6331 - lr 0.0200000\n",
+      "2021-09-21 21:35:52,496 DEV : loss 0.49112430214881897 - score 0.125\n",
+      "2021-09-21 21:35:52,497 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:47:29,090 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:29,461 epoch 3 - iter 7/73 - loss 0.64924907 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 01:47:29,864 epoch 3 - iter 14/73 - loss 0.64380966 - samples/sec: 17.38 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,272 epoch 3 - iter 21/73 - loss 0.64327524 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,670 epoch 3 - iter 28/73 - loss 0.64574588 - samples/sec: 17.64 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,992 epoch 3 - iter 35/73 - loss 0.64950765 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 01:47:31,318 epoch 3 - iter 42/73 - loss 0.64818786 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 01:47:31,637 epoch 3 - iter 49/73 - loss 0.64618211 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 01:47:31,961 epoch 3 - iter 56/73 - loss 0.64971403 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 01:47:32,287 epoch 3 - iter 63/73 - loss 0.65393201 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 01:47:32,614 epoch 3 - iter 70/73 - loss 0.65023964 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:47:32,753 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:32,754 EPOCH 3 done: loss 0.6508 - lr 0.0200000\n",
-      "2021-09-08 01:47:37,869 DEV : loss 0.49002525210380554 - score 0.125\n",
-      "2021-09-08 01:47:37,871 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:47:37,892 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:38,352 epoch 4 - iter 7/73 - loss 0.67727370 - samples/sec: 15.99 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,739 epoch 4 - iter 14/73 - loss 0.67446247 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,146 epoch 4 - iter 21/73 - loss 0.67338559 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,527 epoch 4 - iter 28/73 - loss 0.67076801 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,924 epoch 4 - iter 35/73 - loss 0.66987711 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:47:40,314 epoch 4 - iter 42/73 - loss 0.66434786 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 01:47:40,704 epoch 4 - iter 49/73 - loss 0.66339968 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:47:41,081 epoch 4 - iter 56/73 - loss 0.66032267 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 01:47:41,505 epoch 4 - iter 63/73 - loss 0.65608941 - samples/sec: 16.55 - lr: 0.020000\n",
-      "2021-09-08 01:47:41,905 epoch 4 - iter 70/73 - loss 0.65622452 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 01:47:42,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:42,078 EPOCH 4 done: loss 0.6567 - lr 0.0200000\n",
-      "2021-09-08 01:47:42,377 DEV : loss 0.4152340292930603 - score 0.125\n",
-      "2021-09-08 01:47:42,378 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:47:42,469 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:42,907 epoch 5 - iter 7/73 - loss 0.66874376 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:47:43,324 epoch 5 - iter 14/73 - loss 0.68131537 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:47:43,722 epoch 5 - iter 21/73 - loss 0.67426155 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 01:47:44,073 epoch 5 - iter 28/73 - loss 0.66633686 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 01:47:44,463 epoch 5 - iter 35/73 - loss 0.65647575 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 01:47:44,853 epoch 5 - iter 42/73 - loss 0.65839452 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 01:47:45,261 epoch 5 - iter 49/73 - loss 0.65577045 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 01:47:45,603 epoch 5 - iter 56/73 - loss 0.65556234 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 01:47:45,942 epoch 5 - iter 63/73 - loss 0.65342484 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 01:47:46,270 epoch 5 - iter 70/73 - loss 0.65225014 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 01:47:46,409 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:46,410 EPOCH 5 done: loss 0.6513 - lr 0.0200000\n",
-      "2021-09-08 01:47:46,635 DEV : loss 0.47850510478019714 - score 0.125\n",
-      "2021-09-08 01:47:46,636 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:47:46,706 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:47,043 epoch 6 - iter 7/73 - loss 0.63327209 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:47:47,363 epoch 6 - iter 14/73 - loss 0.64675358 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 01:47:47,689 epoch 6 - iter 21/73 - loss 0.63493538 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 01:47:48,007 epoch 6 - iter 28/73 - loss 0.64884796 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 01:47:48,327 epoch 6 - iter 35/73 - loss 0.65036259 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 01:47:48,650 epoch 6 - iter 42/73 - loss 0.65035533 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 01:47:48,969 epoch 6 - iter 49/73 - loss 0.65145250 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 01:47:49,291 epoch 6 - iter 56/73 - loss 0.65132995 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:47:49,611 epoch 6 - iter 63/73 - loss 0.65226251 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 01:47:49,935 epoch 6 - iter 70/73 - loss 0.65062631 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:47:50,074 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:35:56,505 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:56,826 epoch 2 - iter 7/73 - loss 0.61954533 - samples/sec: 22.96 - lr: 0.020000\n",
+      "2021-09-21 21:35:57,138 epoch 2 - iter 14/73 - loss 0.69700967 - samples/sec: 22.51 - lr: 0.020000\n",
+      "2021-09-21 21:35:57,439 epoch 2 - iter 21/73 - loss 0.67674932 - samples/sec: 23.32 - lr: 0.020000\n",
+      "2021-09-21 21:35:57,746 epoch 2 - iter 28/73 - loss 0.67196484 - samples/sec: 22.81 - lr: 0.020000\n",
+      "2021-09-21 21:35:58,050 epoch 2 - iter 35/73 - loss 0.66612933 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 21:35:58,355 epoch 2 - iter 42/73 - loss 0.66654382 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 21:35:58,657 epoch 2 - iter 49/73 - loss 0.66189867 - samples/sec: 23.25 - lr: 0.020000\n",
+      "2021-09-21 21:35:58,959 epoch 2 - iter 56/73 - loss 0.66034708 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 21:35:59,260 epoch 2 - iter 63/73 - loss 0.65843869 - samples/sec: 23.32 - lr: 0.020000\n",
+      "2021-09-21 21:35:59,558 epoch 2 - iter 70/73 - loss 0.65436035 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 21:35:59,687 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:35:59,688 EPOCH 2 done: loss 0.6542 - lr 0.0200000\n",
+      "2021-09-21 21:35:59,815 DEV : loss 0.525117039680481 - score 0.125\n",
+      "2021-09-21 21:35:59,816 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:35:59,818 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:00,138 epoch 3 - iter 7/73 - loss 0.79094013 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 21:36:00,443 epoch 3 - iter 14/73 - loss 0.73034671 - samples/sec: 22.96 - lr: 0.020000\n",
+      "2021-09-21 21:36:00,751 epoch 3 - iter 21/73 - loss 0.68774790 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 21:36:01,048 epoch 3 - iter 28/73 - loss 0.69078691 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 21:36:01,347 epoch 3 - iter 35/73 - loss 0.68361768 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:36:01,650 epoch 3 - iter 42/73 - loss 0.67452839 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 21:36:01,960 epoch 3 - iter 49/73 - loss 0.67806936 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 21:36:02,270 epoch 3 - iter 56/73 - loss 0.67536622 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 21:36:02,570 epoch 3 - iter 63/73 - loss 0.66999430 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 21:36:02,871 epoch 3 - iter 70/73 - loss 0.66503211 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 21:36:03,000 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:03,001 EPOCH 3 done: loss 0.6659 - lr 0.0200000\n",
+      "2021-09-21 21:36:03,126 DEV : loss 0.480905681848526 - score 0.0\n",
+      "2021-09-21 21:36:03,127 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:36:03,129 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:03,442 epoch 4 - iter 7/73 - loss 0.69838786 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 21:36:03,742 epoch 4 - iter 14/73 - loss 0.67737418 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:36:04,040 epoch 4 - iter 21/73 - loss 0.66746571 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 21:36:04,340 epoch 4 - iter 28/73 - loss 0.65716543 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 21:36:04,642 epoch 4 - iter 35/73 - loss 0.65404623 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 21:36:04,941 epoch 4 - iter 42/73 - loss 0.65537599 - samples/sec: 23.48 - lr: 0.020000\n",
+      "2021-09-21 21:36:05,242 epoch 4 - iter 49/73 - loss 0.65669043 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 21:36:05,540 epoch 4 - iter 56/73 - loss 0.65792081 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 21:36:05,839 epoch 4 - iter 63/73 - loss 0.65829337 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 21:36:06,140 epoch 4 - iter 70/73 - loss 0.65666414 - samples/sec: 23.27 - lr: 0.020000\n",
+      "2021-09-21 21:36:06,269 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:06,269 EPOCH 4 done: loss 0.6573 - lr 0.0200000\n",
+      "2021-09-21 21:36:06,397 DEV : loss 0.5383766889572144 - score 0.0\n",
+      "2021-09-21 21:36:06,397 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:36:06,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:06,713 epoch 5 - iter 7/73 - loss 0.63464372 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:36:07,011 epoch 5 - iter 14/73 - loss 0.63420309 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 21:36:07,312 epoch 5 - iter 21/73 - loss 0.63976049 - samples/sec: 23.26 - lr: 0.020000\n",
+      "2021-09-21 21:36:07,612 epoch 5 - iter 28/73 - loss 0.63665454 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 21:36:07,910 epoch 5 - iter 35/73 - loss 0.60767423 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 21:36:08,210 epoch 5 - iter 42/73 - loss 0.61454665 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 21:36:08,507 epoch 5 - iter 49/73 - loss 0.62786909 - samples/sec: 23.59 - lr: 0.020000\n",
+      "2021-09-21 21:36:08,806 epoch 5 - iter 56/73 - loss 0.63338193 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 21:36:09,108 epoch 5 - iter 63/73 - loss 0.63655195 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 21:36:09,407 epoch 5 - iter 70/73 - loss 0.63957515 - samples/sec: 23.50 - lr: 0.020000\n",
+      "2021-09-21 21:36:09,539 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:09,539 EPOCH 5 done: loss 0.6426 - lr 0.0200000\n",
+      "2021-09-21 21:36:09,665 DEV : loss 0.5256564617156982 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:36:09,666 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:36:09,668 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:09,981 epoch 6 - iter 7/73 - loss 0.62692437 - samples/sec: 23.45 - lr: 0.010000\n",
+      "2021-09-21 21:36:10,280 epoch 6 - iter 14/73 - loss 0.64091072 - samples/sec: 23.46 - lr: 0.010000\n",
+      "2021-09-21 21:36:10,580 epoch 6 - iter 21/73 - loss 0.63475349 - samples/sec: 23.40 - lr: 0.010000\n",
+      "2021-09-21 21:36:10,878 epoch 6 - iter 28/73 - loss 0.63485271 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 21:36:11,180 epoch 6 - iter 35/73 - loss 0.63905244 - samples/sec: 23.19 - lr: 0.010000\n",
+      "2021-09-21 21:36:11,479 epoch 6 - iter 42/73 - loss 0.64123217 - samples/sec: 23.49 - lr: 0.010000\n",
+      "2021-09-21 21:36:11,779 epoch 6 - iter 49/73 - loss 0.63936400 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:36:12,079 epoch 6 - iter 56/73 - loss 0.63893225 - samples/sec: 23.40 - lr: 0.010000\n",
+      "2021-09-21 21:36:12,376 epoch 6 - iter 63/73 - loss 0.63564338 - samples/sec: 23.59 - lr: 0.010000\n",
+      "2021-09-21 21:36:12,678 epoch 6 - iter 70/73 - loss 0.63722085 - samples/sec: 23.25 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:50,075 EPOCH 6 done: loss 0.6513 - lr 0.0200000\n",
-      "2021-09-08 01:47:50,298 DEV : loss 0.474532812833786 - score 0.125\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:47:50,299 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:47:50,356 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:50,698 epoch 7 - iter 7/73 - loss 0.63721329 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 01:47:51,017 epoch 7 - iter 14/73 - loss 0.64449156 - samples/sec: 22.03 - lr: 0.010000\n",
-      "2021-09-08 01:47:51,338 epoch 7 - iter 21/73 - loss 0.63879531 - samples/sec: 21.86 - lr: 0.010000\n",
-      "2021-09-08 01:47:51,655 epoch 7 - iter 28/73 - loss 0.64449186 - samples/sec: 22.13 - lr: 0.010000\n",
-      "2021-09-08 01:47:51,970 epoch 7 - iter 35/73 - loss 0.64078150 - samples/sec: 22.26 - lr: 0.010000\n",
-      "2021-09-08 01:47:52,292 epoch 7 - iter 42/73 - loss 0.63782640 - samples/sec: 21.80 - lr: 0.010000\n",
-      "2021-09-08 01:47:52,608 epoch 7 - iter 49/73 - loss 0.64201041 - samples/sec: 22.20 - lr: 0.010000\n",
-      "2021-09-08 01:47:52,930 epoch 7 - iter 56/73 - loss 0.64446297 - samples/sec: 21.80 - lr: 0.010000\n",
-      "2021-09-08 01:47:53,247 epoch 7 - iter 63/73 - loss 0.64234946 - samples/sec: 22.10 - lr: 0.010000\n",
-      "2021-09-08 01:47:53,567 epoch 7 - iter 70/73 - loss 0.64316584 - samples/sec: 21.93 - lr: 0.010000\n",
-      "2021-09-08 01:47:53,709 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:53,710 EPOCH 7 done: loss 0.6425 - lr 0.0100000\n",
-      "2021-09-08 01:47:53,845 DEV : loss 0.4798814654350281 - score 0.125\n",
-      "2021-09-08 01:47:53,846 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:47:53,848 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:54,193 epoch 8 - iter 7/73 - loss 0.63188117 - samples/sec: 21.21 - lr: 0.010000\n",
-      "2021-09-08 01:47:54,527 epoch 8 - iter 14/73 - loss 0.62210418 - samples/sec: 21.02 - lr: 0.010000\n",
-      "2021-09-08 01:47:54,862 epoch 8 - iter 21/73 - loss 0.63147947 - samples/sec: 20.94 - lr: 0.010000\n",
-      "2021-09-08 01:47:55,189 epoch 8 - iter 28/73 - loss 0.63284257 - samples/sec: 21.50 - lr: 0.010000\n",
-      "2021-09-08 01:47:55,533 epoch 8 - iter 35/73 - loss 0.63842849 - samples/sec: 20.39 - lr: 0.010000\n",
-      "2021-09-08 01:47:55,896 epoch 8 - iter 42/73 - loss 0.64123438 - samples/sec: 19.36 - lr: 0.010000\n",
-      "2021-09-08 01:47:56,238 epoch 8 - iter 49/73 - loss 0.64377470 - samples/sec: 20.47 - lr: 0.010000\n",
-      "2021-09-08 01:47:56,583 epoch 8 - iter 56/73 - loss 0.64287428 - samples/sec: 20.36 - lr: 0.010000\n",
-      "2021-09-08 01:47:56,920 epoch 8 - iter 63/73 - loss 0.64094091 - samples/sec: 20.83 - lr: 0.010000\n",
-      "2021-09-08 01:47:57,262 epoch 8 - iter 70/73 - loss 0.64117060 - samples/sec: 20.54 - lr: 0.010000\n",
-      "2021-09-08 01:47:57,411 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:57,412 EPOCH 8 done: loss 0.6410 - lr 0.0100000\n",
-      "2021-09-08 01:47:57,594 DEV : loss 0.47959965467453003 - score 0.125\n",
-      "2021-09-08 01:47:57,595 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:47:57,597 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:57,963 epoch 9 - iter 7/73 - loss 0.62452680 - samples/sec: 20.20 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,302 epoch 9 - iter 14/73 - loss 0.63451416 - samples/sec: 20.73 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,654 epoch 9 - iter 21/73 - loss 0.62546699 - samples/sec: 19.89 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,989 epoch 9 - iter 28/73 - loss 0.63777306 - samples/sec: 20.94 - lr: 0.010000\n",
-      "2021-09-08 01:47:59,335 epoch 9 - iter 35/73 - loss 0.64056976 - samples/sec: 20.30 - lr: 0.010000\n",
-      "2021-09-08 01:47:59,688 epoch 9 - iter 42/73 - loss 0.64061201 - samples/sec: 19.85 - lr: 0.010000\n",
-      "2021-09-08 01:48:00,029 epoch 9 - iter 49/73 - loss 0.64039914 - samples/sec: 20.59 - lr: 0.010000\n",
-      "2021-09-08 01:48:00,383 epoch 9 - iter 56/73 - loss 0.63710965 - samples/sec: 19.86 - lr: 0.010000\n",
-      "2021-09-08 01:48:00,735 epoch 9 - iter 63/73 - loss 0.64020353 - samples/sec: 19.91 - lr: 0.010000\n",
-      "2021-09-08 01:48:01,091 epoch 9 - iter 70/73 - loss 0.63822338 - samples/sec: 19.71 - lr: 0.010000\n",
-      "2021-09-08 01:48:01,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:01,236 EPOCH 9 done: loss 0.6371 - lr 0.0100000\n",
-      "2021-09-08 01:48:01,425 DEV : loss 0.4740344285964966 - score 0.125\n",
-      "2021-09-08 01:48:01,426 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:48:01,428 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:01,792 epoch 10 - iter 7/73 - loss 0.63143696 - samples/sec: 20.15 - lr: 0.010000\n",
-      "2021-09-08 01:48:02,125 epoch 10 - iter 14/73 - loss 0.64187044 - samples/sec: 21.10 - lr: 0.010000\n",
-      "2021-09-08 01:48:02,485 epoch 10 - iter 21/73 - loss 0.63216276 - samples/sec: 19.49 - lr: 0.010000\n",
-      "2021-09-08 01:48:02,829 epoch 10 - iter 28/73 - loss 0.63668736 - samples/sec: 20.40 - lr: 0.010000\n",
-      "2021-09-08 01:48:03,175 epoch 10 - iter 35/73 - loss 0.63279294 - samples/sec: 20.29 - lr: 0.010000\n",
-      "2021-09-08 01:48:03,522 epoch 10 - iter 42/73 - loss 0.63291470 - samples/sec: 20.20 - lr: 0.010000\n",
-      "2021-09-08 01:48:03,875 epoch 10 - iter 49/73 - loss 0.62908004 - samples/sec: 19.96 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,239 epoch 10 - iter 56/73 - loss 0.63145830 - samples/sec: 19.31 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,609 epoch 10 - iter 63/73 - loss 0.63968979 - samples/sec: 19.00 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,991 epoch 10 - iter 70/73 - loss 0.64106072 - samples/sec: 18.34 - lr: 0.010000\n",
-      "2021-09-08 01:48:05,155 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:05,156 EPOCH 10 done: loss 0.6420 - lr 0.0100000\n",
-      "2021-09-08 01:48:05,333 DEV : loss 0.48521342873573303 - score 0.125\n",
+      "2021-09-21 21:36:12,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:12,807 EPOCH 6 done: loss 0.6363 - lr 0.0100000\n",
+      "2021-09-21 21:36:12,933 DEV : loss 0.47600993514060974 - score 0.125\n",
+      "2021-09-21 21:36:12,934 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:36:17,130 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:17,448 epoch 7 - iter 7/73 - loss 0.63529813 - samples/sec: 23.18 - lr: 0.010000\n",
+      "2021-09-21 21:36:17,748 epoch 7 - iter 14/73 - loss 0.64242051 - samples/sec: 23.40 - lr: 0.010000\n",
+      "2021-09-21 21:36:18,045 epoch 7 - iter 21/73 - loss 0.63900810 - samples/sec: 23.60 - lr: 0.010000\n",
+      "2021-09-21 21:36:18,347 epoch 7 - iter 28/73 - loss 0.64395550 - samples/sec: 23.17 - lr: 0.010000\n",
+      "2021-09-21 21:36:18,646 epoch 7 - iter 35/73 - loss 0.64239688 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 21:36:18,946 epoch 7 - iter 42/73 - loss 0.64283025 - samples/sec: 23.33 - lr: 0.010000\n",
+      "2021-09-21 21:36:19,244 epoch 7 - iter 49/73 - loss 0.64510746 - samples/sec: 23.57 - lr: 0.010000\n",
+      "2021-09-21 21:36:19,542 epoch 7 - iter 56/73 - loss 0.64402088 - samples/sec: 23.49 - lr: 0.010000\n",
+      "2021-09-21 21:36:19,844 epoch 7 - iter 63/73 - loss 0.64608621 - samples/sec: 23.25 - lr: 0.010000\n",
+      "2021-09-21 21:36:20,145 epoch 7 - iter 70/73 - loss 0.64671195 - samples/sec: 23.32 - lr: 0.010000\n",
+      "2021-09-21 21:36:20,273 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:20,273 EPOCH 7 done: loss 0.6461 - lr 0.0100000\n",
+      "2021-09-21 21:36:20,401 DEV : loss 0.45898228883743286 - score 0.0\n",
+      "2021-09-21 21:36:20,401 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:36:20,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:20,718 epoch 8 - iter 7/73 - loss 0.62226072 - samples/sec: 23.34 - lr: 0.010000\n",
+      "2021-09-21 21:36:21,018 epoch 8 - iter 14/73 - loss 0.62955691 - samples/sec: 23.31 - lr: 0.010000\n",
+      "2021-09-21 21:36:21,318 epoch 8 - iter 21/73 - loss 0.63787196 - samples/sec: 23.41 - lr: 0.010000\n",
+      "2021-09-21 21:36:21,617 epoch 8 - iter 28/73 - loss 0.63414474 - samples/sec: 23.45 - lr: 0.010000\n",
+      "2021-09-21 21:36:21,920 epoch 8 - iter 35/73 - loss 0.63416950 - samples/sec: 23.18 - lr: 0.010000\n",
+      "2021-09-21 21:36:22,219 epoch 8 - iter 42/73 - loss 0.63243879 - samples/sec: 23.46 - lr: 0.010000\n",
+      "2021-09-21 21:36:22,519 epoch 8 - iter 49/73 - loss 0.63645448 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:36:22,817 epoch 8 - iter 56/73 - loss 0.63666060 - samples/sec: 23.56 - lr: 0.010000\n",
+      "2021-09-21 21:36:23,114 epoch 8 - iter 63/73 - loss 0.63775862 - samples/sec: 23.58 - lr: 0.010000\n",
+      "2021-09-21 21:36:23,415 epoch 8 - iter 70/73 - loss 0.63821707 - samples/sec: 23.32 - lr: 0.010000\n",
+      "2021-09-21 21:36:23,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:23,544 EPOCH 8 done: loss 0.6371 - lr 0.0100000\n",
+      "2021-09-21 21:36:23,671 DEV : loss 0.46090689301490784 - score 0.0\n",
+      "2021-09-21 21:36:23,672 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:36:23,674 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:23,990 epoch 9 - iter 7/73 - loss 0.68006840 - samples/sec: 23.19 - lr: 0.010000\n",
+      "2021-09-21 21:36:24,289 epoch 9 - iter 14/73 - loss 0.66310889 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 21:36:24,589 epoch 9 - iter 21/73 - loss 0.65183624 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 21:36:24,892 epoch 9 - iter 28/73 - loss 0.65090626 - samples/sec: 23.17 - lr: 0.010000\n",
+      "2021-09-21 21:36:25,190 epoch 9 - iter 35/73 - loss 0.65575508 - samples/sec: 23.53 - lr: 0.010000\n",
+      "2021-09-21 21:36:25,488 epoch 9 - iter 42/73 - loss 0.65097153 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 21:36:25,789 epoch 9 - iter 49/73 - loss 0.64710979 - samples/sec: 23.31 - lr: 0.010000\n",
+      "2021-09-21 21:36:26,087 epoch 9 - iter 56/73 - loss 0.65004266 - samples/sec: 23.50 - lr: 0.010000\n",
+      "2021-09-21 21:36:26,388 epoch 9 - iter 63/73 - loss 0.64642189 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:36:26,688 epoch 9 - iter 70/73 - loss 0.64653155 - samples/sec: 23.42 - lr: 0.010000\n",
+      "2021-09-21 21:36:26,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:26,818 EPOCH 9 done: loss 0.6461 - lr 0.0100000\n",
+      "2021-09-21 21:36:26,944 DEV : loss 0.4868776798248291 - score 0.0\n",
+      "2021-09-21 21:36:26,945 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:36:26,947 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:27,264 epoch 10 - iter 7/73 - loss 0.64327199 - samples/sec: 23.13 - lr: 0.010000\n",
+      "2021-09-21 21:36:27,563 epoch 10 - iter 14/73 - loss 0.62126430 - samples/sec: 23.46 - lr: 0.010000\n",
+      "2021-09-21 21:36:27,865 epoch 10 - iter 21/73 - loss 0.60613051 - samples/sec: 23.24 - lr: 0.010000\n",
+      "2021-09-21 21:36:28,163 epoch 10 - iter 28/73 - loss 0.61552010 - samples/sec: 23.48 - lr: 0.010000\n",
+      "2021-09-21 21:36:28,461 epoch 10 - iter 35/73 - loss 0.61592121 - samples/sec: 23.61 - lr: 0.010000\n",
+      "2021-09-21 21:36:28,762 epoch 10 - iter 42/73 - loss 0.61949400 - samples/sec: 23.27 - lr: 0.010000\n",
+      "2021-09-21 21:36:29,060 epoch 10 - iter 49/73 - loss 0.62288081 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 21:36:29,360 epoch 10 - iter 56/73 - loss 0.62966004 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:36:29,659 epoch 10 - iter 63/73 - loss 0.63348944 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 21:36:29,959 epoch 10 - iter 70/73 - loss 0.63532370 - samples/sec: 23.33 - lr: 0.010000\n",
+      "2021-09-21 21:36:30,088 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:30,088 EPOCH 10 done: loss 0.6355 - lr 0.0100000\n",
+      "2021-09-21 21:36:30,213 DEV : loss 0.47869423031806946 - score 0.0\n",
       "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:48:05,334 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:48:09,840 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:09,841 Testing using best model ...\n",
-      "2021-09-08 01:48:09,882 loading file None/best-model.pt\n",
+      "2021-09-21 21:36:30,214 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:36:33,858 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:36:33,859 Testing using best model ...\n",
+      "2021-09-21 21:36:33,860 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:48:17,724 \t0.1111\n",
-      "2021-09-08 01:48:17,725 \n",
+      "2021-09-21 21:36:38,700 \t0.1111\n",
+      "2021-09-21 21:36:38,701 \n",
       "Results:\n",
       "- F-score (micro) 0.1111\n",
-      "- F-score (macro) 0.0247\n",
+      "- F-score (macro) 0.0222\n",
       "- Accuracy 0.1111\n",
       "\n",
       "By class:\n",
       "                                                                            precision    recall  f1-score   support\n",
       "\n",
       "                                            the emotion of great happiness     0.0000    0.0000    0.0000         1\n",
-      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         0\n",
-      "                         a strong positive emotion of regard and affection     0.1429    0.5000    0.2222         2\n",
+      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         2\n",
+      "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         1\n",
       "                                                strong feelings of dislike     0.0000    0.0000    0.0000         0\n",
-      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
-      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
-      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         1\n",
-      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         3\n",
-      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         2\n",
+      "    an emotion experienced in anticipation of some specific pain or danger     0.1111    1.0000    0.2000         1\n",
+      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         1\n",
+      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
+      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         2\n",
+      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                                                 micro avg     0.1111    0.1111    0.1111         9\n",
-      "                                                                 macro avg     0.0159    0.0556    0.0247         9\n",
-      "                                                              weighted avg     0.0317    0.1111    0.0494         9\n",
+      "                                                                 macro avg     0.0123    0.1111    0.0222         9\n",
+      "                                                              weighted avg     0.0123    0.1111    0.0222         9\n",
       "                                                               samples avg     0.1111    0.1111    0.1111         9\n",
       "\n",
-      "2021-09-08 01:48:17,725 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:16,340 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n"
+      "2021-09-21 21:36:38,701 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2021-09-21 21:37:24,497 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:49:21,112 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:37:28,415 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 27884.95it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 47043.21it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:21,118 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
-      "2021-09-08 01:49:21,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,301 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:37:28,419 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n",
+      "2021-09-21 21:37:28,430 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:28,432 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8359,24 +8382,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:21,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,302 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 01:49:21,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,303 Parameters:\n",
-      "2021-09-08 01:49:21,303  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:49:21,304  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:49:21,304  - patience: \"3\"\n",
-      "2021-09-08 01:49:21,304  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:49:21,305  - max_epochs: \"10\"\n",
-      "2021-09-08 01:49:21,305  - shuffle: \"True\"\n",
-      "2021-09-08 01:49:21,305  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:49:21,306  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:49:21,306 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,307 Model training base path: \"None\"\n",
-      "2021-09-08 01:49:21,307 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,307 Device: cuda:1\n",
-      "2021-09-08 01:49:21,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,308 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:37:28,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:28,432 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 21:37:28,433 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:28,433 Parameters:\n",
+      "2021-09-21 21:37:28,433  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:37:28,434  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:37:28,434  - patience: \"3\"\n",
+      "2021-09-21 21:37:28,434  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:37:28,434  - max_epochs: \"10\"\n",
+      "2021-09-21 21:37:28,435  - shuffle: \"True\"\n",
+      "2021-09-21 21:37:28,435  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:37:28,435  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:37:28,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:28,436 Model training base path: \"None\"\n",
+      "2021-09-21 21:37:28,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:28,436 Device: cuda:0\n",
+      "2021-09-21 21:37:28,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:28,437 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:37:28,443 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -8390,204 +8414,197 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:21,510 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:22,073 epoch 1 - iter 7/73 - loss 0.30287902 - samples/sec: 12.85 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,483 epoch 1 - iter 14/73 - loss 0.25878052 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,876 epoch 1 - iter 21/73 - loss 0.49988292 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,239 epoch 1 - iter 28/73 - loss 0.50927227 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,630 epoch 1 - iter 35/73 - loss 0.53737605 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,048 epoch 1 - iter 42/73 - loss 0.55617266 - samples/sec: 16.77 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,451 epoch 1 - iter 49/73 - loss 0.56099333 - samples/sec: 17.38 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,853 epoch 1 - iter 56/73 - loss 0.52068088 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,263 epoch 1 - iter 63/73 - loss 0.54226627 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,666 epoch 1 - iter 70/73 - loss 0.55136197 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,833 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:25,834 EPOCH 1 done: loss 0.5318 - lr 0.0200000\n",
-      "2021-09-08 01:49:26,023 DEV : loss 0.8720791935920715 - score 0.125\n",
-      "2021-09-08 01:49:26,024 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:37:28,773 epoch 1 - iter 7/73 - loss 0.31028479 - samples/sec: 22.26 - lr: 0.020000\n",
+      "2021-09-21 21:37:29,088 epoch 1 - iter 14/73 - loss 0.35496941 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 21:37:29,413 epoch 1 - iter 21/73 - loss 0.48568500 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 21:37:29,729 epoch 1 - iter 28/73 - loss 0.48280019 - samples/sec: 22.20 - lr: 0.020000\n",
+      "2021-09-21 21:37:30,052 epoch 1 - iter 35/73 - loss 0.52856931 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 21:37:30,373 epoch 1 - iter 42/73 - loss 0.56958130 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 21:37:30,693 epoch 1 - iter 49/73 - loss 0.57772834 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 21:37:31,017 epoch 1 - iter 56/73 - loss 0.54905968 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 21:37:31,333 epoch 1 - iter 63/73 - loss 0.54662406 - samples/sec: 22.20 - lr: 0.020000\n",
+      "2021-09-21 21:37:31,650 epoch 1 - iter 70/73 - loss 0.57396337 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 21:37:31,785 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:31,785 EPOCH 1 done: loss 0.5748 - lr 0.0200000\n",
+      "2021-09-21 21:37:31,913 DEV : loss 0.7900391221046448 - score 0.0\n",
+      "2021-09-21 21:37:31,914 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:49:39,567 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:39,931 epoch 2 - iter 7/73 - loss 1.31635232 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 01:49:40,282 epoch 2 - iter 14/73 - loss 1.00444612 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 01:49:40,643 epoch 2 - iter 21/73 - loss 0.94608983 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:49:41,016 epoch 2 - iter 28/73 - loss 0.91621531 - samples/sec: 18.82 - lr: 0.020000\n",
-      "2021-09-08 01:49:41,372 epoch 2 - iter 35/73 - loss 0.86534877 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 01:49:41,728 epoch 2 - iter 42/73 - loss 0.83901356 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 01:49:42,071 epoch 2 - iter 49/73 - loss 0.81668517 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 01:49:42,410 epoch 2 - iter 56/73 - loss 0.79693083 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 01:49:42,738 epoch 2 - iter 63/73 - loss 0.78061080 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 01:49:43,060 epoch 2 - iter 70/73 - loss 0.76963883 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 01:49:43,199 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:43,200 EPOCH 2 done: loss 0.7654 - lr 0.0200000\n",
-      "2021-09-08 01:49:43,450 DEV : loss 0.45862480998039246 - score 0.125\n",
-      "2021-09-08 01:49:43,451 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:37:36,088 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:36,429 epoch 2 - iter 7/73 - loss 0.84197265 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 21:37:36,747 epoch 2 - iter 14/73 - loss 0.78641659 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 21:37:37,048 epoch 2 - iter 21/73 - loss 0.73985867 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 21:37:37,353 epoch 2 - iter 28/73 - loss 0.72869107 - samples/sec: 23.02 - lr: 0.020000\n",
+      "2021-09-21 21:37:37,663 epoch 2 - iter 35/73 - loss 0.71886446 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 21:37:37,964 epoch 2 - iter 42/73 - loss 0.70670752 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:37:38,275 epoch 2 - iter 49/73 - loss 0.70560775 - samples/sec: 22.52 - lr: 0.020000\n",
+      "2021-09-21 21:37:38,584 epoch 2 - iter 56/73 - loss 0.70114114 - samples/sec: 22.75 - lr: 0.020000\n",
+      "2021-09-21 21:37:38,883 epoch 2 - iter 63/73 - loss 0.69265116 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 21:37:39,189 epoch 2 - iter 70/73 - loss 0.69302445 - samples/sec: 22.86 - lr: 0.020000\n",
+      "2021-09-21 21:37:39,324 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:39,325 EPOCH 2 done: loss 0.6937 - lr 0.0200000\n",
+      "2021-09-21 21:37:39,453 DEV : loss 0.5163292288780212 - score 0.0\n",
+      "2021-09-21 21:37:39,454 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:49:51,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:51,975 epoch 3 - iter 7/73 - loss 0.65263834 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 01:49:52,296 epoch 3 - iter 14/73 - loss 0.64635484 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:49:52,619 epoch 3 - iter 21/73 - loss 0.64340919 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:49:52,934 epoch 3 - iter 28/73 - loss 0.64239701 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 01:49:53,259 epoch 3 - iter 35/73 - loss 0.64901189 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:49:53,577 epoch 3 - iter 42/73 - loss 0.65268096 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:49:53,903 epoch 3 - iter 49/73 - loss 0.65273955 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 01:49:54,222 epoch 3 - iter 56/73 - loss 0.65027323 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 01:49:54,541 epoch 3 - iter 63/73 - loss 0.64435509 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 01:49:54,866 epoch 3 - iter 70/73 - loss 0.64573176 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:49:55,004 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:55,487 EPOCH 3 done: loss 0.6469 - lr 0.0200000\n",
-      "2021-09-08 01:49:55,805 DEV : loss 0.44344109296798706 - score 0.125\n",
-      "2021-09-08 01:49:55,807 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:37:44,028 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:44,352 epoch 3 - iter 7/73 - loss 0.65141299 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 21:37:44,654 epoch 3 - iter 14/73 - loss 0.66383963 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 21:37:44,956 epoch 3 - iter 21/73 - loss 0.67627811 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 21:37:45,254 epoch 3 - iter 28/73 - loss 0.66864487 - samples/sec: 23.50 - lr: 0.020000\n",
+      "2021-09-21 21:37:45,552 epoch 3 - iter 35/73 - loss 0.67036496 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 21:37:45,854 epoch 3 - iter 42/73 - loss 0.66742124 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 21:37:46,154 epoch 3 - iter 49/73 - loss 0.66438209 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 21:37:46,456 epoch 3 - iter 56/73 - loss 0.66188735 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 21:37:46,756 epoch 3 - iter 63/73 - loss 0.66055851 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 21:37:47,060 epoch 3 - iter 70/73 - loss 0.66187552 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 21:37:47,192 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:47,192 EPOCH 3 done: loss 0.6600 - lr 0.0200000\n",
+      "2021-09-21 21:37:48,323 DEV : loss 0.4540066123008728 - score 0.0\n",
+      "2021-09-21 21:37:48,324 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:50:00,233 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:00,584 epoch 4 - iter 7/73 - loss 0.73430493 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 01:50:00,905 epoch 4 - iter 14/73 - loss 0.69906160 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 01:50:01,241 epoch 4 - iter 21/73 - loss 0.66873572 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 01:50:01,571 epoch 4 - iter 28/73 - loss 0.66003016 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:50:01,901 epoch 4 - iter 35/73 - loss 0.65793653 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:50:02,248 epoch 4 - iter 42/73 - loss 0.66010787 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 01:50:02,589 epoch 4 - iter 49/73 - loss 0.65658209 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 01:50:02,921 epoch 4 - iter 56/73 - loss 0.65582394 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 01:50:03,266 epoch 4 - iter 63/73 - loss 0.65102209 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 01:50:03,611 epoch 4 - iter 70/73 - loss 0.65984051 - samples/sec: 20.33 - lr: 0.020000\n",
-      "2021-09-08 01:50:03,761 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:03,762 EPOCH 4 done: loss 0.6584 - lr 0.0200000\n",
-      "2021-09-08 01:50:03,933 DEV : loss 0.4936113953590393 - score 0.125\n",
-      "2021-09-08 01:50:03,934 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:50:03,936 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:04,293 epoch 5 - iter 7/73 - loss 0.66968228 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 01:50:04,641 epoch 5 - iter 14/73 - loss 0.65308404 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 01:50:04,986 epoch 5 - iter 21/73 - loss 0.66271102 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 01:50:05,325 epoch 5 - iter 28/73 - loss 0.65938400 - samples/sec: 20.71 - lr: 0.020000\n",
-      "2021-09-08 01:50:05,662 epoch 5 - iter 35/73 - loss 0.65470330 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:50:05,989 epoch 5 - iter 42/73 - loss 0.65582337 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 01:50:06,322 epoch 5 - iter 49/73 - loss 0.65310768 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 01:50:06,656 epoch 5 - iter 56/73 - loss 0.65310267 - samples/sec: 21.05 - lr: 0.020000\n",
-      "2021-09-08 01:50:06,982 epoch 5 - iter 63/73 - loss 0.65075085 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 01:50:07,324 epoch 5 - iter 70/73 - loss 0.65122417 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 01:50:07,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:07,464 EPOCH 5 done: loss 0.6517 - lr 0.0200000\n",
-      "2021-09-08 01:50:07,639 DEV : loss 0.5007027983665466 - score 0.125\n",
-      "2021-09-08 01:50:07,640 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:50:07,644 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:07,999 epoch 6 - iter 7/73 - loss 0.63558705 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:50:08,333 epoch 6 - iter 14/73 - loss 0.64686446 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 01:50:08,654 epoch 6 - iter 21/73 - loss 0.64544940 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 01:50:08,979 epoch 6 - iter 28/73 - loss 0.64050433 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 01:50:09,322 epoch 6 - iter 35/73 - loss 0.64072373 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 01:50:09,664 epoch 6 - iter 42/73 - loss 0.64149453 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 01:50:09,987 epoch 6 - iter 49/73 - loss 0.64333692 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 01:50:10,317 epoch 6 - iter 56/73 - loss 0.64227585 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:50:10,642 epoch 6 - iter 63/73 - loss 0.64190552 - samples/sec: 21.62 - lr: 0.020000\n"
+      "2021-09-21 21:37:52,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:52,756 epoch 4 - iter 7/73 - loss 0.63730583 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 21:37:53,056 epoch 4 - iter 14/73 - loss 0.65680495 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 21:37:53,359 epoch 4 - iter 21/73 - loss 0.64255255 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 21:37:53,658 epoch 4 - iter 28/73 - loss 0.64210371 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 21:37:53,955 epoch 4 - iter 35/73 - loss 0.63749777 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 21:37:54,257 epoch 4 - iter 42/73 - loss 0.64315478 - samples/sec: 23.26 - lr: 0.020000\n",
+      "2021-09-21 21:37:54,556 epoch 4 - iter 49/73 - loss 0.64562716 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 21:37:54,854 epoch 4 - iter 56/73 - loss 0.64568784 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 21:37:55,157 epoch 4 - iter 63/73 - loss 0.64720542 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 21:37:55,455 epoch 4 - iter 70/73 - loss 0.64981344 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 21:37:55,584 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:37:55,584 EPOCH 4 done: loss 0.6514 - lr 0.0200000\n",
+      "2021-09-21 21:37:55,715 DEV : loss 0.4424375593662262 - score 0.0\n",
+      "2021-09-21 21:37:55,716 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:37:59,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:00,070 epoch 5 - iter 7/73 - loss 0.64537099 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 21:38:00,370 epoch 5 - iter 14/73 - loss 0.65869132 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 21:38:00,671 epoch 5 - iter 21/73 - loss 0.64399725 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:38:00,976 epoch 5 - iter 28/73 - loss 0.64341548 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 21:38:01,275 epoch 5 - iter 35/73 - loss 0.63952712 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 21:38:01,573 epoch 5 - iter 42/73 - loss 0.63488572 - samples/sec: 23.57 - lr: 0.020000\n",
+      "2021-09-21 21:38:01,875 epoch 5 - iter 49/73 - loss 0.64200379 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 21:38:02,183 epoch 5 - iter 56/73 - loss 0.64585160 - samples/sec: 22.78 - lr: 0.020000\n",
+      "2021-09-21 21:38:02,497 epoch 5 - iter 63/73 - loss 0.64614934 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 21:38:02,799 epoch 5 - iter 70/73 - loss 0.64729079 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 21:38:02,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:02,930 EPOCH 5 done: loss 0.6447 - lr 0.0200000\n",
+      "2021-09-21 21:38:03,059 DEV : loss 0.4866659939289093 - score 0.0\n",
+      "2021-09-21 21:38:03,059 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:38:03,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:03,376 epoch 6 - iter 7/73 - loss 0.64754024 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:38:03,676 epoch 6 - iter 14/73 - loss 0.65708193 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:38:03,980 epoch 6 - iter 21/73 - loss 0.66131790 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 21:38:04,279 epoch 6 - iter 28/73 - loss 0.65383199 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 21:38:04,578 epoch 6 - iter 35/73 - loss 0.65412542 - samples/sec: 23.44 - lr: 0.020000\n",
+      "2021-09-21 21:38:04,881 epoch 6 - iter 42/73 - loss 0.66118000 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 21:38:05,182 epoch 6 - iter 49/73 - loss 0.65589096 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 21:38:05,486 epoch 6 - iter 56/73 - loss 0.65329748 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 21:38:05,789 epoch 6 - iter 63/73 - loss 0.65128322 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 21:38:06,094 epoch 6 - iter 70/73 - loss 0.65013695 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 21:38:06,224 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:50:10,984 epoch 6 - iter 70/73 - loss 0.64302753 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:50:11,143 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:11,144 EPOCH 6 done: loss 0.6422 - lr 0.0200000\n",
-      "2021-09-08 01:50:11,425 DEV : loss 0.49201470613479614 - score 0.25\n",
-      "2021-09-08 01:50:11,426 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:50:17,855 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:18,196 epoch 7 - iter 7/73 - loss 0.63081314 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:50:18,496 epoch 7 - iter 14/73 - loss 0.63627798 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 01:50:18,795 epoch 7 - iter 21/73 - loss 0.63746648 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 01:50:19,100 epoch 7 - iter 28/73 - loss 0.63421062 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:50:19,398 epoch 7 - iter 35/73 - loss 0.63881237 - samples/sec: 23.55 - lr: 0.020000\n",
-      "2021-09-08 01:50:19,699 epoch 7 - iter 42/73 - loss 0.63965242 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 01:50:19,997 epoch 7 - iter 49/73 - loss 0.63959347 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 01:50:20,295 epoch 7 - iter 56/73 - loss 0.64016286 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 01:50:20,603 epoch 7 - iter 63/73 - loss 0.63946868 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:50:20,906 epoch 7 - iter 70/73 - loss 0.64056941 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 01:50:21,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:21,037 EPOCH 7 done: loss 0.6396 - lr 0.0200000\n",
-      "2021-09-08 01:50:21,911 DEV : loss 0.49904608726501465 - score 0.125\n",
-      "2021-09-08 01:50:21,912 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:50:21,936 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:22,339 epoch 8 - iter 7/73 - loss 0.65023817 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,733 epoch 8 - iter 14/73 - loss 0.64361717 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 01:50:23,137 epoch 8 - iter 21/73 - loss 0.64078764 - samples/sec: 17.37 - lr: 0.020000\n",
-      "2021-09-08 01:50:23,484 epoch 8 - iter 28/73 - loss 0.64906733 - samples/sec: 20.21 - lr: 0.020000\n",
-      "2021-09-08 01:50:23,824 epoch 8 - iter 35/73 - loss 0.64853238 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 01:50:24,163 epoch 8 - iter 42/73 - loss 0.64718551 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 01:50:24,491 epoch 8 - iter 49/73 - loss 0.64445549 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:50:24,824 epoch 8 - iter 56/73 - loss 0.64462611 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:50:25,152 epoch 8 - iter 63/73 - loss 0.64387252 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 01:50:25,485 epoch 8 - iter 70/73 - loss 0.64396534 - samples/sec: 21.05 - lr: 0.020000\n",
-      "2021-09-08 01:50:25,627 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:25,628 EPOCH 8 done: loss 0.6434 - lr 0.0200000\n",
-      "2021-09-08 01:50:25,851 DEV : loss 0.46925342082977295 - score 0.125\n",
-      "2021-09-08 01:50:25,852 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:50:25,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:26,287 epoch 9 - iter 7/73 - loss 0.62935019 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 01:50:26,618 epoch 9 - iter 14/73 - loss 0.63529413 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 01:50:26,948 epoch 9 - iter 21/73 - loss 0.63261915 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:50:27,281 epoch 9 - iter 28/73 - loss 0.64112891 - samples/sec: 21.05 - lr: 0.020000\n",
-      "2021-09-08 01:50:27,652 epoch 9 - iter 35/73 - loss 0.63937008 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,050 epoch 9 - iter 42/73 - loss 0.63999108 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,443 epoch 9 - iter 49/73 - loss 0.63936466 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,823 epoch 9 - iter 56/73 - loss 0.63748582 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 01:50:29,185 epoch 9 - iter 63/73 - loss 0.63876673 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 01:50:29,546 epoch 9 - iter 70/73 - loss 0.63916022 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:50:29,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:29,689 EPOCH 9 done: loss 0.6397 - lr 0.0200000\n",
-      "2021-09-08 01:50:29,941 DEV : loss 0.5136486887931824 - score 0.0\n",
-      "2021-09-08 01:50:29,942 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:50:30,014 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:30,361 epoch 10 - iter 7/73 - loss 0.66343295 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 01:50:30,689 epoch 10 - iter 14/73 - loss 0.65693479 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:50:31,021 epoch 10 - iter 21/73 - loss 0.65068091 - samples/sec: 21.11 - lr: 0.020000\n",
-      "2021-09-08 01:50:31,349 epoch 10 - iter 28/73 - loss 0.65229651 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 01:50:31,681 epoch 10 - iter 35/73 - loss 0.64982299 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:50:32,009 epoch 10 - iter 42/73 - loss 0.64450444 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:50:32,337 epoch 10 - iter 49/73 - loss 0.64587946 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:50:32,670 epoch 10 - iter 56/73 - loss 0.64637408 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,001 epoch 10 - iter 63/73 - loss 0.64505484 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,399 epoch 10 - iter 70/73 - loss 0.64412956 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,569 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:33,569 EPOCH 10 done: loss 0.6431 - lr 0.0200000\n",
-      "2021-09-08 01:50:33,777 DEV : loss 0.48680323362350464 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:50:33,778 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:50:37,903 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:37,904 Testing using best model ...\n",
-      "2021-09-08 01:50:37,930 loading file None/best-model.pt\n",
+      "2021-09-21 21:38:06,225 EPOCH 6 done: loss 0.6492 - lr 0.0200000\n",
+      "2021-09-21 21:38:06,353 DEV : loss 0.46900931000709534 - score 0.0\n",
+      "2021-09-21 21:38:06,354 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:38:06,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:06,670 epoch 7 - iter 7/73 - loss 0.64255115 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 21:38:06,968 epoch 7 - iter 14/73 - loss 0.64488162 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 21:38:07,266 epoch 7 - iter 21/73 - loss 0.64207332 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 21:38:07,566 epoch 7 - iter 28/73 - loss 0.64212112 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:38:07,867 epoch 7 - iter 35/73 - loss 0.64104064 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:38:08,169 epoch 7 - iter 42/73 - loss 0.65052299 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 21:38:08,470 epoch 7 - iter 49/73 - loss 0.65387400 - samples/sec: 23.32 - lr: 0.020000\n",
+      "2021-09-21 21:38:08,767 epoch 7 - iter 56/73 - loss 0.64969547 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 21:38:09,071 epoch 7 - iter 63/73 - loss 0.64845871 - samples/sec: 23.09 - lr: 0.020000\n",
+      "2021-09-21 21:38:09,369 epoch 7 - iter 70/73 - loss 0.64531211 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 21:38:09,497 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:09,498 EPOCH 7 done: loss 0.6477 - lr 0.0200000\n",
+      "2021-09-21 21:38:09,627 DEV : loss 0.4743664264678955 - score 0.0\n",
+      "2021-09-21 21:38:09,628 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:38:09,630 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:09,941 epoch 8 - iter 7/73 - loss 0.67011516 - samples/sec: 23.57 - lr: 0.020000\n",
+      "2021-09-21 21:38:10,238 epoch 8 - iter 14/73 - loss 0.65652491 - samples/sec: 23.57 - lr: 0.020000\n",
+      "2021-09-21 21:38:10,538 epoch 8 - iter 21/73 - loss 0.65198557 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 21:38:10,834 epoch 8 - iter 28/73 - loss 0.64893498 - samples/sec: 23.68 - lr: 0.020000\n",
+      "2021-09-21 21:38:11,131 epoch 8 - iter 35/73 - loss 0.64712984 - samples/sec: 23.62 - lr: 0.020000\n",
+      "2021-09-21 21:38:11,434 epoch 8 - iter 42/73 - loss 0.64495250 - samples/sec: 23.16 - lr: 0.020000\n",
+      "2021-09-21 21:38:11,731 epoch 8 - iter 49/73 - loss 0.65293939 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 21:38:12,030 epoch 8 - iter 56/73 - loss 0.64902429 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:38:12,328 epoch 8 - iter 63/73 - loss 0.64698390 - samples/sec: 23.55 - lr: 0.020000\n",
+      "2021-09-21 21:38:12,629 epoch 8 - iter 70/73 - loss 0.65448170 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 21:38:12,757 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:12,758 EPOCH 8 done: loss 0.6535 - lr 0.0200000\n",
+      "2021-09-21 21:38:12,885 DEV : loss 0.4581139385700226 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:38:12,886 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:38:12,888 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:13,201 epoch 9 - iter 7/73 - loss 0.62711780 - samples/sec: 23.45 - lr: 0.010000\n",
+      "2021-09-21 21:38:13,498 epoch 9 - iter 14/73 - loss 0.62908873 - samples/sec: 23.58 - lr: 0.010000\n",
+      "2021-09-21 21:38:13,799 epoch 9 - iter 21/73 - loss 0.65211550 - samples/sec: 23.32 - lr: 0.010000\n",
+      "2021-09-21 21:38:14,095 epoch 9 - iter 28/73 - loss 0.65014576 - samples/sec: 23.65 - lr: 0.010000\n",
+      "2021-09-21 21:38:14,394 epoch 9 - iter 35/73 - loss 0.65092426 - samples/sec: 23.50 - lr: 0.010000\n",
+      "2021-09-21 21:38:14,693 epoch 9 - iter 42/73 - loss 0.64502720 - samples/sec: 23.41 - lr: 0.010000\n",
+      "2021-09-21 21:38:14,993 epoch 9 - iter 49/73 - loss 0.64388401 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 21:38:15,289 epoch 9 - iter 56/73 - loss 0.64186959 - samples/sec: 23.63 - lr: 0.010000\n",
+      "2021-09-21 21:38:15,589 epoch 9 - iter 63/73 - loss 0.63864486 - samples/sec: 23.40 - lr: 0.010000\n",
+      "2021-09-21 21:38:15,888 epoch 9 - iter 70/73 - loss 0.63890536 - samples/sec: 23.49 - lr: 0.010000\n",
+      "2021-09-21 21:38:16,015 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:16,015 EPOCH 9 done: loss 0.6398 - lr 0.0100000\n",
+      "2021-09-21 21:38:16,145 DEV : loss 0.5428746938705444 - score 0.0\n",
+      "2021-09-21 21:38:16,146 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:38:16,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:16,460 epoch 10 - iter 7/73 - loss 0.65379059 - samples/sec: 23.45 - lr: 0.010000\n",
+      "2021-09-21 21:38:16,760 epoch 10 - iter 14/73 - loss 0.66309967 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 21:38:17,056 epoch 10 - iter 21/73 - loss 0.64802162 - samples/sec: 23.70 - lr: 0.010000\n",
+      "2021-09-21 21:38:17,355 epoch 10 - iter 28/73 - loss 0.63199651 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 21:38:17,655 epoch 10 - iter 35/73 - loss 0.63563000 - samples/sec: 23.38 - lr: 0.010000\n",
+      "2021-09-21 21:38:17,953 epoch 10 - iter 42/73 - loss 0.63885581 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 21:38:18,254 epoch 10 - iter 49/73 - loss 0.64393031 - samples/sec: 23.31 - lr: 0.010000\n",
+      "2021-09-21 21:38:18,552 epoch 10 - iter 56/73 - loss 0.63799922 - samples/sec: 23.56 - lr: 0.010000\n",
+      "2021-09-21 21:38:18,853 epoch 10 - iter 63/73 - loss 0.63945285 - samples/sec: 23.29 - lr: 0.010000\n",
+      "2021-09-21 21:38:19,149 epoch 10 - iter 70/73 - loss 0.63889779 - samples/sec: 23.67 - lr: 0.010000\n",
+      "2021-09-21 21:38:19,277 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:19,278 EPOCH 10 done: loss 0.6398 - lr 0.0100000\n",
+      "2021-09-21 21:38:19,406 DEV : loss 0.5058469176292419 - score 0.0\n",
+      "2021-09-21 21:38:19,407 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:38:23,324 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:38:23,325 Testing using best model ...\n",
+      "2021-09-21 21:38:23,327 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:50:46,655 \t0.1111\n",
-      "2021-09-08 01:50:46,655 \n",
+      "2021-09-21 21:38:28,068 \t0.1111\n",
+      "2021-09-21 21:38:28,068 \n",
       "Results:\n",
       "- F-score (micro) 0.1111\n",
-      "- F-score (macro) 0.0741\n",
+      "- F-score (macro) 0.0222\n",
       "- Accuracy 0.1111\n",
       "\n",
       "By class:\n",
       "                                                                            precision    recall  f1-score   support\n",
       "\n",
       "                                            the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
-      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         1\n",
+      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         2\n",
       "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         0\n",
-      "                                                strong feelings of dislike     1.0000    0.5000    0.6667         2\n",
-      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         2\n",
-      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         2\n",
-      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         1\n",
-      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         1\n",
-      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         0\n",
+      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         2\n",
+      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         1\n",
+      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         1\n",
+      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
+      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.1111    1.0000    0.2000         1\n",
+      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         2\n",
       "\n",
       "                                                                 micro avg     0.1111    0.1111    0.1111         9\n",
-      "                                                                 macro avg     0.1111    0.0556    0.0741         9\n",
-      "                                                              weighted avg     0.2222    0.1111    0.1481         9\n",
+      "                                                                 macro avg     0.0123    0.1111    0.0222         9\n",
+      "                                                              weighted avg     0.0123    0.1111    0.0222         9\n",
       "                                                               samples avg     0.1111    0.1111    0.1111         9\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:50:46,656 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.11609977324263039\n"
+      "\n",
+      "2021-09-21 21:38:28,068 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.11843213475866537\n"
      ]
     }
    ],
@@ -8661,6 +8678,26 @@
     "\n",
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}') "
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "7e1bdfb0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.13216715257531583, 0.11402656300615484, 0.1098153547133139, 0.1198574667962423, 0.11629413670229997]\n",
+      "0.007601464100376415\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/fewshot/emotion_yinetall_fewshot.ipynb b/fewshot/emotion_yinetall_fewshot.ipynb
index 330ce42..5d8f984 100644
--- a/fewshot/emotion_yinetall_fewshot.ipynb
+++ b/fewshot/emotion_yinetall_fewshot.ipynb
@@ -70,25 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:43,311 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:02:23,968 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:21:53,475 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:02:30,951 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 23707.79it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 54314.73it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:53,481 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 01:21:53,488 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:53,490 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:02:30,954 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 21:02:30,957 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:30,959 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -401,25 +401,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:53,490 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:53,491 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:21:53,491 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:53,491 Parameters:\n",
-      "2021-09-08 01:21:53,492  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:21:53,492  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:21:53,492  - patience: \"3\"\n",
-      "2021-09-08 01:21:53,493  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:21:53,493  - max_epochs: \"10\"\n",
-      "2021-09-08 01:21:53,493  - shuffle: \"True\"\n",
-      "2021-09-08 01:21:53,494  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:21:53,494  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:21:53,494 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:53,495 Model training base path: \"temp\"\n",
-      "2021-09-08 01:21:53,495 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:53,495 Device: cuda:0\n",
-      "2021-09-08 01:21:53,496 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:53,496 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:21:53,505 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:02:30,960 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:30,960 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:02:30,960 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:30,960 Parameters:\n",
+      "2021-09-21 21:02:30,961  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:02:30,961  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:02:30,961  - patience: \"3\"\n",
+      "2021-09-21 21:02:30,962  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:02:30,962  - max_epochs: \"10\"\n",
+      "2021-09-21 21:02:30,962  - shuffle: \"True\"\n",
+      "2021-09-21 21:02:30,962  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:02:30,963  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:02:30,963 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:30,963 Model training base path: \"temp\"\n",
+      "2021-09-21 21:02:30,963 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:30,964 Device: cuda:0\n",
+      "2021-09-21 21:02:30,964 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:30,964 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:02:30,971 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -433,212 +433,222 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:21:53,708 epoch 1 - iter 3/32 - loss 1.01867877 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:21:53,885 epoch 1 - iter 6/32 - loss 0.58681531 - samples/sec: 17.07 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,028 epoch 1 - iter 9/32 - loss 0.41496814 - samples/sec: 21.11 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,206 epoch 1 - iter 12/32 - loss 0.62932548 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,355 epoch 1 - iter 15/32 - loss 0.70570313 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,547 epoch 1 - iter 18/32 - loss 0.71355491 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,691 epoch 1 - iter 21/32 - loss 0.71131812 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 01:21:54,894 epoch 1 - iter 24/32 - loss 0.66055016 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 01:21:55,048 epoch 1 - iter 27/32 - loss 0.68211372 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 01:21:55,231 epoch 1 - iter 30/32 - loss 0.65115632 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:21:55,354 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:21:55,354 EPOCH 1 done: loss 0.6263 - lr 0.0200000\n",
-      "2021-09-08 01:21:55,424 DEV : loss 0.6909924745559692 - score 0.5\n",
-      "2021-09-08 01:21:55,425 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:02:31,163 epoch 1 - iter 3/32 - loss 1.31410356 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 21:02:31,330 epoch 1 - iter 6/32 - loss 0.76244358 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 21:02:31,481 epoch 1 - iter 9/32 - loss 0.55799371 - samples/sec: 19.89 - lr: 0.020000\n",
+      "2021-09-21 21:02:31,643 epoch 1 - iter 12/32 - loss 1.13835438 - samples/sec: 18.63 - lr: 0.020000\n",
+      "2021-09-21 21:02:31,800 epoch 1 - iter 15/32 - loss 1.04471228 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 21:02:31,965 epoch 1 - iter 18/32 - loss 1.08265130 - samples/sec: 18.28 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,119 epoch 1 - iter 21/32 - loss 0.97608914 - samples/sec: 19.54 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,277 epoch 1 - iter 24/32 - loss 0.88435811 - samples/sec: 18.99 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,436 epoch 1 - iter 27/32 - loss 0.92295715 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,611 epoch 1 - iter 30/32 - loss 0.89324757 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 21:02:32,721 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:32,722 EPOCH 1 done: loss 0.8528 - lr 0.0200000\n",
+      "2021-09-21 21:02:32,793 DEV : loss 1.1843901872634888 - score 0.25\n",
+      "2021-09-21 21:02:32,794 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:22:15,039 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:15,237 epoch 2 - iter 3/32 - loss 0.57789491 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 01:22:15,387 epoch 2 - iter 6/32 - loss 0.70234103 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 01:22:15,559 epoch 2 - iter 9/32 - loss 0.60920162 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 01:22:15,700 epoch 2 - iter 12/32 - loss 0.55411816 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:22:15,896 epoch 2 - iter 15/32 - loss 0.46668748 - samples/sec: 15.32 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,049 epoch 2 - iter 18/32 - loss 0.50349297 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,204 epoch 2 - iter 21/32 - loss 0.54024572 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,409 epoch 2 - iter 24/32 - loss 0.49538995 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,548 epoch 2 - iter 27/32 - loss 0.44612632 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,718 epoch 2 - iter 30/32 - loss 0.45576823 - samples/sec: 17.72 - lr: 0.020000\n",
-      "2021-09-08 01:22:16,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:16,831 EPOCH 2 done: loss 0.4360 - lr 0.0200000\n",
-      "2021-09-08 01:22:16,897 DEV : loss 0.6510898470878601 - score 0.5\n",
-      "2021-09-08 01:22:16,899 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:02:38,446 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:38,627 epoch 2 - iter 3/32 - loss 0.82969674 - samples/sec: 18.29 - lr: 0.020000\n",
+      "2021-09-21 21:02:38,763 epoch 2 - iter 6/32 - loss 0.52474891 - samples/sec: 22.03 - lr: 0.020000\n",
+      "2021-09-21 21:02:38,900 epoch 2 - iter 9/32 - loss 0.55172017 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 21:02:39,036 epoch 2 - iter 12/32 - loss 0.48693577 - samples/sec: 22.21 - lr: 0.020000\n",
+      "2021-09-21 21:02:39,169 epoch 2 - iter 15/32 - loss 0.40103213 - samples/sec: 22.69 - lr: 0.020000\n",
+      "2021-09-21 21:02:39,304 epoch 2 - iter 18/32 - loss 0.44057886 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 21:02:39,439 epoch 2 - iter 21/32 - loss 0.41605254 - samples/sec: 22.39 - lr: 0.020000\n",
+      "2021-09-21 21:02:39,579 epoch 2 - iter 24/32 - loss 0.47231260 - samples/sec: 21.45 - lr: 0.020000\n",
+      "2021-09-21 21:02:39,727 epoch 2 - iter 27/32 - loss 0.49700968 - samples/sec: 20.38 - lr: 0.020000\n",
+      "2021-09-21 21:02:39,894 epoch 2 - iter 30/32 - loss 0.50988958 - samples/sec: 18.05 - lr: 0.020000\n",
+      "2021-09-21 21:02:40,008 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:40,008 EPOCH 2 done: loss 0.5933 - lr 0.0200000\n",
+      "2021-09-21 21:02:40,095 DEV : loss 0.6711782217025757 - score 0.5\n",
+      "2021-09-21 21:02:40,096 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:22:21,770 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:21,971 epoch 3 - iter 3/32 - loss 0.21759303 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 01:22:22,112 epoch 3 - iter 6/32 - loss 0.13862671 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:22:22,285 epoch 3 - iter 9/32 - loss 0.28403053 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 01:22:22,439 epoch 3 - iter 12/32 - loss 0.26594156 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 01:22:22,588 epoch 3 - iter 15/32 - loss 0.26916916 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:22:22,790 epoch 3 - iter 18/32 - loss 0.33173585 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 01:22:22,934 epoch 3 - iter 21/32 - loss 0.31104073 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,111 epoch 3 - iter 24/32 - loss 0.29549900 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,276 epoch 3 - iter 27/32 - loss 0.29524768 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,428 epoch 3 - iter 30/32 - loss 0.31080321 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 01:22:23,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:23,573 EPOCH 3 done: loss 0.2986 - lr 0.0200000\n",
-      "2021-09-08 01:22:23,642 DEV : loss 0.23364125192165375 - score 1.0\n",
-      "2021-09-08 01:22:23,643 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:22:27,653 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:27,826 epoch 4 - iter 3/32 - loss 0.38370688 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 01:22:27,982 epoch 4 - iter 6/32 - loss 0.28675002 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,149 epoch 4 - iter 9/32 - loss 0.22869163 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,283 epoch 4 - iter 12/32 - loss 0.17298335 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,460 epoch 4 - iter 15/32 - loss 0.16295577 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,609 epoch 4 - iter 18/32 - loss 0.29810427 - samples/sec: 20.29 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,786 epoch 4 - iter 21/32 - loss 0.29872316 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:22:28,922 epoch 4 - iter 24/32 - loss 0.26326533 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 01:22:29,123 epoch 4 - iter 27/32 - loss 0.24179438 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 01:22:29,261 epoch 4 - iter 30/32 - loss 0.26180854 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 01:22:29,366 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:29,367 EPOCH 4 done: loss 0.2521 - lr 0.0200000\n",
-      "2021-09-08 01:22:29,459 DEV : loss 0.4963023066520691 - score 0.75\n",
-      "2021-09-08 01:22:29,460 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:22:29,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:29,637 epoch 5 - iter 3/32 - loss 0.18653988 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 01:22:29,786 epoch 5 - iter 6/32 - loss 0.17456159 - samples/sec: 20.26 - lr: 0.020000\n",
-      "2021-09-08 01:22:29,953 epoch 5 - iter 9/32 - loss 0.11698149 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:22:30,088 epoch 5 - iter 12/32 - loss 0.08969215 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 01:22:30,272 epoch 5 - iter 15/32 - loss 0.13476736 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:22:30,434 epoch 5 - iter 18/32 - loss 0.11749023 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 01:22:30,584 epoch 5 - iter 21/32 - loss 0.10570157 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 01:22:30,759 epoch 5 - iter 24/32 - loss 0.16618766 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 01:22:30,901 epoch 5 - iter 27/32 - loss 0.15630422 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 01:22:31,084 epoch 5 - iter 30/32 - loss 0.16624399 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:22:31,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:31,179 EPOCH 5 done: loss 0.1724 - lr 0.0200000\n",
-      "2021-09-08 01:22:31,355 DEV : loss 0.5717053413391113 - score 0.75\n",
-      "2021-09-08 01:22:31,356 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:22:31,428 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:31,640 epoch 6 - iter 3/32 - loss 0.04485154 - samples/sec: 15.26 - lr: 0.020000\n",
-      "2021-09-08 01:22:31,791 epoch 6 - iter 6/32 - loss 0.11718107 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 01:22:31,935 epoch 6 - iter 9/32 - loss 0.09975295 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 01:22:32,101 epoch 6 - iter 12/32 - loss 0.08240182 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 01:22:32,248 epoch 6 - iter 15/32 - loss 0.13749846 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 01:22:32,451 epoch 6 - iter 18/32 - loss 0.12610368 - samples/sec: 14.86 - lr: 0.020000\n",
-      "2021-09-08 01:22:32,588 epoch 6 - iter 21/32 - loss 0.16999891 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 01:22:32,765 epoch 6 - iter 24/32 - loss 0.15197942 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 01:22:32,928 epoch 6 - iter 27/32 - loss 0.16650186 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 01:22:33,077 epoch 6 - iter 30/32 - loss 0.15820654 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 01:22:33,202 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:02:45,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:46,008 epoch 3 - iter 3/32 - loss 0.28045478 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 21:02:46,180 epoch 3 - iter 6/32 - loss 0.55472446 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 21:02:46,340 epoch 3 - iter 9/32 - loss 0.52331758 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 21:02:46,502 epoch 3 - iter 12/32 - loss 0.47103918 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:02:46,671 epoch 3 - iter 15/32 - loss 0.40125191 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 21:02:46,837 epoch 3 - iter 18/32 - loss 0.50679419 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 21:02:46,990 epoch 3 - iter 21/32 - loss 0.45675570 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 21:02:47,156 epoch 3 - iter 24/32 - loss 0.45404155 - samples/sec: 18.14 - lr: 0.020000\n",
+      "2021-09-21 21:02:47,312 epoch 3 - iter 27/32 - loss 0.40767151 - samples/sec: 19.23 - lr: 0.020000\n",
+      "2021-09-21 21:02:47,467 epoch 3 - iter 30/32 - loss 0.38768642 - samples/sec: 19.45 - lr: 0.020000\n",
+      "2021-09-21 21:02:47,568 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:47,568 EPOCH 3 done: loss 0.3640 - lr 0.0200000\n",
+      "2021-09-21 21:02:47,679 DEV : loss 1.1471188068389893 - score 0.25\n",
+      "2021-09-21 21:02:47,679 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:02:47,682 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:47,861 epoch 4 - iter 3/32 - loss 0.01522250 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 21:02:48,023 epoch 4 - iter 6/32 - loss 0.31893728 - samples/sec: 18.63 - lr: 0.020000\n",
+      "2021-09-21 21:02:48,180 epoch 4 - iter 9/32 - loss 0.37028934 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:02:48,336 epoch 4 - iter 12/32 - loss 0.28247752 - samples/sec: 19.27 - lr: 0.020000\n",
+      "2021-09-21 21:02:48,490 epoch 4 - iter 15/32 - loss 0.22703720 - samples/sec: 19.55 - lr: 0.020000\n",
+      "2021-09-21 21:02:48,652 epoch 4 - iter 18/32 - loss 0.28874031 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 21:02:48,809 epoch 4 - iter 21/32 - loss 0.24870897 - samples/sec: 19.17 - lr: 0.020000\n",
+      "2021-09-21 21:02:48,973 epoch 4 - iter 24/32 - loss 0.36685648 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 21:02:49,132 epoch 4 - iter 27/32 - loss 0.33122891 - samples/sec: 18.99 - lr: 0.020000\n",
+      "2021-09-21 21:02:49,294 epoch 4 - iter 30/32 - loss 0.32262887 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 21:02:49,407 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:49,407 EPOCH 4 done: loss 0.3381 - lr 0.0200000\n",
+      "2021-09-21 21:02:49,497 DEV : loss 0.8644821643829346 - score 0.5\n",
+      "2021-09-21 21:02:49,498 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:02:49,511 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:49,695 epoch 5 - iter 3/32 - loss 0.11272039 - samples/sec: 18.32 - lr: 0.020000\n",
+      "2021-09-21 21:02:49,856 epoch 5 - iter 6/32 - loss 0.07152061 - samples/sec: 18.80 - lr: 0.020000\n",
+      "2021-09-21 21:02:50,021 epoch 5 - iter 9/32 - loss 0.12761469 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 21:02:50,187 epoch 5 - iter 12/32 - loss 0.14185830 - samples/sec: 18.22 - lr: 0.020000\n",
+      "2021-09-21 21:02:50,350 epoch 5 - iter 15/32 - loss 0.11394036 - samples/sec: 18.44 - lr: 0.020000\n",
+      "2021-09-21 21:02:50,515 epoch 5 - iter 18/32 - loss 0.09624926 - samples/sec: 18.30 - lr: 0.020000\n",
+      "2021-09-21 21:02:50,681 epoch 5 - iter 21/32 - loss 0.12219017 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 21:02:50,848 epoch 5 - iter 24/32 - loss 0.12033368 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 21:02:51,016 epoch 5 - iter 27/32 - loss 0.13558014 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:02:51,153 epoch 5 - iter 30/32 - loss 0.12263978 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 21:02:51,248 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:51,248 EPOCH 5 done: loss 0.1428 - lr 0.0200000\n",
+      "2021-09-21 21:02:51,330 DEV : loss 0.9961050152778625 - score 0.5\n",
+      "2021-09-21 21:02:51,331 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:02:51,334 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:51,485 epoch 6 - iter 3/32 - loss 0.00479265 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 21:02:51,631 epoch 6 - iter 6/32 - loss 0.14342531 - samples/sec: 20.75 - lr: 0.020000\n",
+      "2021-09-21 21:02:51,772 epoch 6 - iter 9/32 - loss 0.21855802 - samples/sec: 21.41 - lr: 0.020000\n",
+      "2021-09-21 21:02:51,913 epoch 6 - iter 12/32 - loss 0.16504804 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 21:02:52,053 epoch 6 - iter 15/32 - loss 0.15428973 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 21:02:52,191 epoch 6 - iter 18/32 - loss 0.12909946 - samples/sec: 21.88 - lr: 0.020000\n",
+      "2021-09-21 21:02:52,332 epoch 6 - iter 21/32 - loss 0.12334244 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 21:02:52,473 epoch 6 - iter 24/32 - loss 0.11069003 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 21:02:52,614 epoch 6 - iter 27/32 - loss 0.12032142 - samples/sec: 21.48 - lr: 0.020000\n",
+      "2021-09-21 21:02:52,754 epoch 6 - iter 30/32 - loss 0.11778079 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 21:02:52,851 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:22:33,202 EPOCH 6 done: loss 0.1484 - lr 0.0200000\n",
-      "2021-09-08 01:22:33,284 DEV : loss 0.3321877717971802 - score 0.75\n",
-      "2021-09-08 01:22:33,284 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:22:33,286 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:33,488 epoch 7 - iter 3/32 - loss 0.55122197 - samples/sec: 16.15 - lr: 0.020000\n",
-      "2021-09-08 01:22:33,626 epoch 7 - iter 6/32 - loss 0.28649311 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 01:22:33,807 epoch 7 - iter 9/32 - loss 0.19447283 - samples/sec: 16.62 - lr: 0.020000\n",
-      "2021-09-08 01:22:33,964 epoch 7 - iter 12/32 - loss 0.15579739 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,106 epoch 7 - iter 15/32 - loss 0.12472122 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,283 epoch 7 - iter 18/32 - loss 0.14050221 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,420 epoch 7 - iter 21/32 - loss 0.12140665 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,611 epoch 7 - iter 24/32 - loss 0.10700386 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,752 epoch 7 - iter 27/32 - loss 0.10580448 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:22:34,910 epoch 7 - iter 30/32 - loss 0.09626063 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:22:35,032 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:35,033 EPOCH 7 done: loss 0.1202 - lr 0.0200000\n",
-      "2021-09-08 01:22:35,099 DEV : loss 0.48255717754364014 - score 0.75\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:22:35,100 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:22:35,103 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:35,304 epoch 8 - iter 3/32 - loss 0.09626638 - samples/sec: 16.32 - lr: 0.010000\n",
-      "2021-09-08 01:22:35,441 epoch 8 - iter 6/32 - loss 0.09688847 - samples/sec: 21.93 - lr: 0.010000\n",
-      "2021-09-08 01:22:35,617 epoch 8 - iter 9/32 - loss 0.14374179 - samples/sec: 17.09 - lr: 0.010000\n",
-      "2021-09-08 01:22:35,773 epoch 8 - iter 12/32 - loss 0.11050598 - samples/sec: 19.42 - lr: 0.010000\n",
-      "2021-09-08 01:22:35,919 epoch 8 - iter 15/32 - loss 0.11960812 - samples/sec: 20.52 - lr: 0.010000\n",
-      "2021-09-08 01:22:36,105 epoch 8 - iter 18/32 - loss 0.10041715 - samples/sec: 16.23 - lr: 0.010000\n",
-      "2021-09-08 01:22:36,243 epoch 8 - iter 21/32 - loss 0.09971031 - samples/sec: 21.77 - lr: 0.010000\n",
-      "2021-09-08 01:22:36,401 epoch 8 - iter 24/32 - loss 0.08772263 - samples/sec: 19.14 - lr: 0.010000\n",
-      "2021-09-08 01:22:36,565 epoch 8 - iter 27/32 - loss 0.07800898 - samples/sec: 18.34 - lr: 0.010000\n",
-      "2021-09-08 01:22:36,702 epoch 8 - iter 30/32 - loss 0.07057489 - samples/sec: 21.95 - lr: 0.010000\n",
-      "2021-09-08 01:22:36,835 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:36,836 EPOCH 8 done: loss 0.0752 - lr 0.0100000\n",
-      "2021-09-08 01:22:36,936 DEV : loss 0.18235667049884796 - score 1.0\n",
-      "2021-09-08 01:22:36,937 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:22:41,231 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:41,428 epoch 9 - iter 3/32 - loss 0.09819789 - samples/sec: 16.45 - lr: 0.010000\n",
-      "2021-09-08 01:22:41,563 epoch 9 - iter 6/32 - loss 0.04970462 - samples/sec: 22.37 - lr: 0.010000\n",
-      "2021-09-08 01:22:41,735 epoch 9 - iter 9/32 - loss 0.04974359 - samples/sec: 17.51 - lr: 0.010000\n",
-      "2021-09-08 01:22:41,899 epoch 9 - iter 12/32 - loss 0.04051941 - samples/sec: 18.36 - lr: 0.010000\n",
-      "2021-09-08 01:22:42,040 epoch 9 - iter 15/32 - loss 0.03327994 - samples/sec: 21.44 - lr: 0.010000\n",
-      "2021-09-08 01:22:42,221 epoch 9 - iter 18/32 - loss 0.02791706 - samples/sec: 16.62 - lr: 0.010000\n",
-      "2021-09-08 01:22:42,356 epoch 9 - iter 21/32 - loss 0.02409175 - samples/sec: 22.28 - lr: 0.010000\n",
-      "2021-09-08 01:22:42,554 epoch 9 - iter 24/32 - loss 0.03228722 - samples/sec: 15.19 - lr: 0.010000\n",
-      "2021-09-08 01:22:42,723 epoch 9 - iter 27/32 - loss 0.03527517 - samples/sec: 17.80 - lr: 0.010000\n",
-      "2021-09-08 01:22:42,874 epoch 9 - iter 30/32 - loss 0.03228143 - samples/sec: 19.93 - lr: 0.010000\n",
-      "2021-09-08 01:22:42,999 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:43,000 EPOCH 9 done: loss 0.0303 - lr 0.0100000\n",
-      "2021-09-08 01:22:43,097 DEV : loss 0.0717034637928009 - score 1.0\n",
-      "2021-09-08 01:22:43,098 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:02:52,851 EPOCH 6 done: loss 0.1122 - lr 0.0200000\n",
+      "2021-09-21 21:02:53,027 DEV : loss 0.6627135276794434 - score 0.75\n",
+      "2021-09-21 21:02:53,028 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:22:46,947 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:47,103 epoch 10 - iter 3/32 - loss 0.00241309 - samples/sec: 21.96 - lr: 0.010000\n",
-      "2021-09-08 01:22:47,269 epoch 10 - iter 6/32 - loss 0.00541481 - samples/sec: 18.22 - lr: 0.010000\n",
-      "2021-09-08 01:22:47,421 epoch 10 - iter 9/32 - loss 0.06623700 - samples/sec: 19.79 - lr: 0.010000\n",
-      "2021-09-08 01:22:47,591 epoch 10 - iter 12/32 - loss 0.05093894 - samples/sec: 17.72 - lr: 0.010000\n",
-      "2021-09-08 01:22:47,745 epoch 10 - iter 15/32 - loss 0.04116887 - samples/sec: 19.54 - lr: 0.010000\n",
-      "2021-09-08 01:22:47,921 epoch 10 - iter 18/32 - loss 0.04053816 - samples/sec: 17.10 - lr: 0.010000\n",
-      "2021-09-08 01:22:48,094 epoch 10 - iter 21/32 - loss 0.03490909 - samples/sec: 17.42 - lr: 0.010000\n",
-      "2021-09-08 01:22:48,228 epoch 10 - iter 24/32 - loss 0.03095224 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 01:22:48,423 epoch 10 - iter 27/32 - loss 0.02756659 - samples/sec: 15.41 - lr: 0.010000\n",
-      "2021-09-08 01:22:48,558 epoch 10 - iter 30/32 - loss 0.02483469 - samples/sec: 22.37 - lr: 0.010000\n",
-      "2021-09-08 01:22:48,656 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:48,656 EPOCH 10 done: loss 0.0268 - lr 0.0100000\n",
-      "2021-09-08 01:22:48,873 DEV : loss 0.042188405990600586 - score 1.0\n",
-      "2021-09-08 01:22:48,874 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:22:59,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:22:59,610 Testing using best model ...\n",
-      "2021-09-08 01:22:59,611 loading file temp/best-model.pt\n",
+      "2021-09-21 21:03:07,737 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:07,909 epoch 7 - iter 3/32 - loss 0.00179054 - samples/sec: 19.08 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,072 epoch 7 - iter 6/32 - loss 0.01261651 - samples/sec: 18.42 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,246 epoch 7 - iter 9/32 - loss 0.16873304 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,416 epoch 7 - iter 12/32 - loss 0.19752349 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,569 epoch 7 - iter 15/32 - loss 0.15970135 - samples/sec: 19.65 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,728 epoch 7 - iter 18/32 - loss 0.26456057 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,877 epoch 7 - iter 21/32 - loss 0.22709027 - samples/sec: 20.22 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,040 epoch 7 - iter 24/32 - loss 0.23887155 - samples/sec: 18.41 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,183 epoch 7 - iter 27/32 - loss 0.29893991 - samples/sec: 21.19 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,315 epoch 7 - iter 30/32 - loss 0.27310218 - samples/sec: 22.82 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,405 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:09,405 EPOCH 7 done: loss 0.2584 - lr 0.0200000\n",
+      "2021-09-21 21:03:09,616 DEV : loss 0.7419806718826294 - score 0.75\n",
+      "2021-09-21 21:03:09,617 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:03:09,713 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:09,878 epoch 8 - iter 3/32 - loss 0.00278774 - samples/sec: 20.03 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,041 epoch 8 - iter 6/32 - loss 0.08658148 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,191 epoch 8 - iter 9/32 - loss 0.17843726 - samples/sec: 20.01 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,350 epoch 8 - iter 12/32 - loss 0.20189757 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,519 epoch 8 - iter 15/32 - loss 0.25530468 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,674 epoch 8 - iter 18/32 - loss 0.21439191 - samples/sec: 19.43 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,802 epoch 8 - iter 21/32 - loss 0.18531977 - samples/sec: 23.50 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,934 epoch 8 - iter 24/32 - loss 0.16327802 - samples/sec: 22.85 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,081 epoch 8 - iter 27/32 - loss 0.14529953 - samples/sec: 20.54 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,239 epoch 8 - iter 30/32 - loss 0.15184400 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,345 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:11,346 EPOCH 8 done: loss 0.1424 - lr 0.0200000\n",
+      "2021-09-21 21:03:11,547 DEV : loss 1.1134755611419678 - score 0.5\n",
+      "2021-09-21 21:03:11,548 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:03:11,551 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:11,731 epoch 9 - iter 3/32 - loss 0.09281843 - samples/sec: 18.92 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,892 epoch 9 - iter 6/32 - loss 0.04683923 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,051 epoch 9 - iter 9/32 - loss 0.06267134 - samples/sec: 18.89 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,207 epoch 9 - iter 12/32 - loss 0.04933613 - samples/sec: 19.33 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,355 epoch 9 - iter 15/32 - loss 0.04052587 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,483 epoch 9 - iter 18/32 - loss 0.03440828 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,611 epoch 9 - iter 21/32 - loss 0.02977873 - samples/sec: 23.58 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,739 epoch 9 - iter 24/32 - loss 0.02642372 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,867 epoch 9 - iter 27/32 - loss 0.02384066 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,995 epoch 9 - iter 30/32 - loss 0.02167432 - samples/sec: 23.60 - lr: 0.020000\n",
+      "2021-09-21 21:03:13,080 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:13,081 EPOCH 9 done: loss 0.0204 - lr 0.0200000\n",
+      "2021-09-21 21:03:14,693 DEV : loss 1.0707889795303345 - score 0.5\n",
+      "2021-09-21 21:03:14,694 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:03:14,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:15,003 epoch 10 - iter 3/32 - loss 0.06948371 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,165 epoch 10 - iter 6/32 - loss 0.03558833 - samples/sec: 18.64 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,327 epoch 10 - iter 9/32 - loss 0.02461491 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,498 epoch 10 - iter 12/32 - loss 0.01863825 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,665 epoch 10 - iter 15/32 - loss 0.10705170 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,828 epoch 10 - iter 18/32 - loss 0.08926220 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,000 epoch 10 - iter 21/32 - loss 0.07784789 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,158 epoch 10 - iter 24/32 - loss 0.06830046 - samples/sec: 19.02 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,309 epoch 10 - iter 27/32 - loss 0.06092489 - samples/sec: 20.02 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,460 epoch 10 - iter 30/32 - loss 0.05486643 - samples/sec: 19.95 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,566 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:16,567 EPOCH 10 done: loss 0.0517 - lr 0.0200000\n",
+      "2021-09-21 21:03:16,644 DEV : loss 1.028996467590332 - score 0.5\n",
+      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:03:16,645 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:03:21,287 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:21,288 Testing using best model ...\n",
+      "2021-09-21 21:03:21,311 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:23:04,732 \t0.5\n",
-      "2021-09-08 01:23:04,733 \n",
+      "2021-09-21 21:03:32,556 \t0.75\n",
+      "2021-09-21 21:03:32,556 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.2917\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.6667\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "     sadness     0.0000    0.0000    0.0000         0\n",
-      "    optimism     1.0000    0.3333    0.5000         3\n",
-      "       anger     0.0000    0.0000    0.0000         0\n",
+      "     sadness     1.0000    1.0000    1.0000         1\n",
+      "    optimism     1.0000    1.0000    1.0000         1\n",
+      "       anger     0.0000    0.0000    0.0000         1\n",
       "         joy     0.5000    1.0000    0.6667         1\n",
       "\n",
-      "   micro avg     0.5000    0.5000    0.5000         4\n",
-      "   macro avg     0.3750    0.3333    0.2917         4\n",
-      "weighted avg     0.8750    0.5000    0.5417         4\n",
-      " samples avg     0.5000    0.5000    0.5000         4\n",
+      "   micro avg     0.7500    0.7500    0.7500         4\n",
+      "   macro avg     0.6250    0.7500    0.6667         4\n",
+      "weighted avg     0.6250    0.7500    0.6667         4\n",
+      " samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 01:23:04,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:12,006 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:03:32,557 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:40,296 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:23:16,209 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:03:46,664 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 36/36 [00:00<00:00, 34387.37it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:03:46,667 [b'sadness', b'optimism', b'anger', b'joy']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 38568.31it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:23:16,212 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 01:23:16,222 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:16,224 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:03:46,900 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:46,902 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -951,241 +961,234 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:23:16,225 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:16,225 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:23:16,225 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:16,226 Parameters:\n",
-      "2021-09-08 01:23:16,226  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:23:16,226  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:23:16,227  - patience: \"3\"\n",
-      "2021-09-08 01:23:16,227  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:23:16,227  - max_epochs: \"10\"\n",
-      "2021-09-08 01:23:16,228  - shuffle: \"True\"\n",
-      "2021-09-08 01:23:16,228  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:23:16,228  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:23:16,229 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:16,229 Model training base path: \"temp\"\n",
-      "2021-09-08 01:23:16,229 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:16,230 Device: cuda:0\n",
-      "2021-09-08 01:23:16,230 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:16,230 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:23:16,239 ----------------------------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:23:16,433 epoch 1 - iter 3/32 - loss 0.44381352 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 01:23:16,578 epoch 1 - iter 6/32 - loss 0.73179172 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 01:23:16,766 epoch 1 - iter 9/32 - loss 0.69857435 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 01:23:16,910 epoch 1 - iter 12/32 - loss 0.83049008 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 01:23:17,098 epoch 1 - iter 15/32 - loss 0.76601903 - samples/sec: 16.04 - lr: 0.020000\n",
-      "2021-09-08 01:23:17,252 epoch 1 - iter 18/32 - loss 0.75335679 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 01:23:17,433 epoch 1 - iter 21/32 - loss 0.77570072 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 01:23:17,587 epoch 1 - iter 24/32 - loss 0.72196673 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 01:23:17,781 epoch 1 - iter 27/32 - loss 0.73620151 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 01:23:17,925 epoch 1 - iter 30/32 - loss 0.72995463 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 01:23:18,043 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:18,044 EPOCH 1 done: loss 0.6983 - lr 0.0200000\n",
-      "2021-09-08 01:23:18,143 DEV : loss 1.062010407447815 - score 0.5\n",
-      "2021-09-08 01:23:18,144 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:03:46,902 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:46,903 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:03:46,903 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:46,903 Parameters:\n",
+      "2021-09-21 21:03:46,903  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:03:46,904  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:03:46,904  - patience: \"3\"\n",
+      "2021-09-21 21:03:46,904  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:03:46,904  - max_epochs: \"10\"\n",
+      "2021-09-21 21:03:46,905  - shuffle: \"True\"\n",
+      "2021-09-21 21:03:46,905  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:03:46,905  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:03:46,906 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:46,906 Model training base path: \"temp\"\n",
+      "2021-09-21 21:03:46,906 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:46,906 Device: cuda:0\n",
+      "2021-09-21 21:03:46,907 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:46,907 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:03:47,084 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:47,265 epoch 1 - iter 3/32 - loss 0.79062030 - samples/sec: 18.81 - lr: 0.020000\n",
+      "2021-09-21 21:03:47,433 epoch 1 - iter 6/32 - loss 0.57300419 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:03:47,585 epoch 1 - iter 9/32 - loss 0.66075795 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 21:03:47,746 epoch 1 - iter 12/32 - loss 0.57794167 - samples/sec: 18.70 - lr: 0.020000\n",
+      "2021-09-21 21:03:47,916 epoch 1 - iter 15/32 - loss 0.54763509 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 21:03:48,068 epoch 1 - iter 18/32 - loss 0.61391532 - samples/sec: 19.74 - lr: 0.020000\n",
+      "2021-09-21 21:03:48,228 epoch 1 - iter 21/32 - loss 0.61703684 - samples/sec: 18.83 - lr: 0.020000\n",
+      "2021-09-21 21:03:48,397 epoch 1 - iter 24/32 - loss 0.64820205 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 21:03:48,555 epoch 1 - iter 27/32 - loss 0.69760358 - samples/sec: 19.08 - lr: 0.020000\n",
+      "2021-09-21 21:03:48,716 epoch 1 - iter 30/32 - loss 0.67759270 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 21:03:48,825 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:48,825 EPOCH 1 done: loss 0.6421 - lr 0.0200000\n",
+      "2021-09-21 21:03:48,999 DEV : loss 0.46343719959259033 - score 0.75\n",
+      "2021-09-21 21:03:49,000 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:23:25,438 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:25,614 epoch 2 - iter 3/32 - loss 1.00224195 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:23:25,784 epoch 2 - iter 6/32 - loss 0.97342273 - samples/sec: 17.70 - lr: 0.020000\n",
-      "2021-09-08 01:23:25,940 epoch 2 - iter 9/32 - loss 0.95446195 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:23:26,116 epoch 2 - iter 12/32 - loss 0.90604997 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:23:26,283 epoch 2 - iter 15/32 - loss 0.89280568 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:23:26,441 epoch 2 - iter 18/32 - loss 0.87160017 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 01:23:26,591 epoch 2 - iter 21/32 - loss 0.84804379 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 01:23:26,786 epoch 2 - iter 24/32 - loss 0.84580346 - samples/sec: 15.38 - lr: 0.020000\n",
-      "2021-09-08 01:23:26,931 epoch 2 - iter 27/32 - loss 0.83841469 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 01:23:27,097 epoch 2 - iter 30/32 - loss 0.82225585 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 01:23:27,207 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:27,208 EPOCH 2 done: loss 0.8142 - lr 0.0200000\n",
-      "2021-09-08 01:23:27,272 DEV : loss 0.5809824466705322 - score 0.5\n",
-      "2021-09-08 01:23:27,273 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:23:31,480 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:31,683 epoch 3 - iter 3/32 - loss 0.81045753 - samples/sec: 16.00 - lr: 0.020000\n",
-      "2021-09-08 01:23:31,822 epoch 3 - iter 6/32 - loss 0.75422652 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 01:23:31,990 epoch 3 - iter 9/32 - loss 0.71745671 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 01:23:32,149 epoch 3 - iter 12/32 - loss 0.71814678 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 01:23:32,293 epoch 3 - iter 15/32 - loss 0.72200278 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 01:23:32,429 epoch 3 - iter 18/32 - loss 0.71735633 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 01:23:32,561 epoch 3 - iter 21/32 - loss 0.70783509 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:23:32,692 epoch 3 - iter 24/32 - loss 0.70739561 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:23:32,830 epoch 3 - iter 27/32 - loss 0.70915741 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 01:23:32,963 epoch 3 - iter 30/32 - loss 0.70384673 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:23:33,057 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:33,058 EPOCH 3 done: loss 0.7032 - lr 0.0200000\n",
-      "2021-09-08 01:23:33,119 DEV : loss 0.5856149792671204 - score 0.0\n",
-      "2021-09-08 01:23:33,120 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:23:33,122 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:33,269 epoch 4 - iter 3/32 - loss 0.59950564 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 01:23:33,403 epoch 4 - iter 6/32 - loss 0.60581243 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 01:23:33,538 epoch 4 - iter 9/32 - loss 0.64795564 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 01:23:33,671 epoch 4 - iter 12/32 - loss 0.65833097 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:23:33,804 epoch 4 - iter 15/32 - loss 0.65472237 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 01:23:33,941 epoch 4 - iter 18/32 - loss 0.66941006 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:23:34,074 epoch 4 - iter 21/32 - loss 0.65784119 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:23:34,206 epoch 4 - iter 24/32 - loss 0.65765259 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 01:23:34,340 epoch 4 - iter 27/32 - loss 0.65383592 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 01:23:34,480 epoch 4 - iter 30/32 - loss 0.64869397 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 01:23:34,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:34,574 EPOCH 4 done: loss 0.6483 - lr 0.0200000\n",
-      "2021-09-08 01:23:34,635 DEV : loss 0.5942321419715881 - score 0.5\n",
-      "2021-09-08 01:23:34,636 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:23:34,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:34,784 epoch 5 - iter 3/32 - loss 0.61654913 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 01:23:34,923 epoch 5 - iter 6/32 - loss 0.69976417 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 01:23:35,062 epoch 5 - iter 9/32 - loss 0.71543525 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 01:23:35,194 epoch 5 - iter 12/32 - loss 0.69940736 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:23:35,327 epoch 5 - iter 15/32 - loss 0.69687902 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:23:35,463 epoch 5 - iter 18/32 - loss 0.67860282 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 01:23:35,601 epoch 5 - iter 21/32 - loss 0.69710262 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:23:35,736 epoch 5 - iter 24/32 - loss 0.68824934 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 01:23:35,871 epoch 5 - iter 27/32 - loss 0.68837840 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 01:23:36,004 epoch 5 - iter 30/32 - loss 0.68424158 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:23:36,094 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:36,094 EPOCH 5 done: loss 0.6862 - lr 0.0200000\n",
-      "2021-09-08 01:23:36,157 DEV : loss 0.7439314126968384 - score 0.5\n",
-      "2021-09-08 01:23:36,158 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:23:36,160 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:36,308 epoch 6 - iter 3/32 - loss 0.68572472 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 01:23:36,441 epoch 6 - iter 6/32 - loss 0.70571419 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:23:36,578 epoch 6 - iter 9/32 - loss 0.69748104 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 01:23:36,718 epoch 6 - iter 12/32 - loss 0.69145993 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 01:23:36,851 epoch 6 - iter 15/32 - loss 0.69382688 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:23:36,983 epoch 6 - iter 18/32 - loss 0.68322123 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:23:37,115 epoch 6 - iter 21/32 - loss 0.67920198 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:23:37,249 epoch 6 - iter 24/32 - loss 0.66934155 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 01:23:37,380 epoch 6 - iter 27/32 - loss 0.66969143 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:23:37,517 epoch 6 - iter 30/32 - loss 0.66723368 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 01:23:37,606 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:03:56,550 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:56,791 epoch 2 - iter 3/32 - loss 0.38394081 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 21:03:57,001 epoch 2 - iter 6/32 - loss 0.58243245 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 21:03:57,191 epoch 2 - iter 9/32 - loss 0.54135090 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 21:03:57,385 epoch 2 - iter 12/32 - loss 0.48159894 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:03:57,562 epoch 2 - iter 15/32 - loss 0.61747905 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 21:03:57,722 epoch 2 - iter 18/32 - loss 0.53312333 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 21:03:57,901 epoch 2 - iter 21/32 - loss 0.53758863 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 21:03:58,069 epoch 2 - iter 24/32 - loss 0.50638828 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:03:58,240 epoch 2 - iter 27/32 - loss 0.50083705 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:03:58,400 epoch 2 - iter 30/32 - loss 0.48820864 - samples/sec: 18.79 - lr: 0.020000\n",
+      "2021-09-21 21:03:58,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:58,513 EPOCH 2 done: loss 0.4643 - lr 0.0200000\n",
+      "2021-09-21 21:03:58,725 DEV : loss 0.5316171050071716 - score 0.75\n",
+      "2021-09-21 21:03:58,726 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:03:58,803 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:59,002 epoch 3 - iter 3/32 - loss 0.64986521 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 21:03:59,181 epoch 3 - iter 6/32 - loss 0.58576384 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 21:03:59,362 epoch 3 - iter 9/32 - loss 0.58424991 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 21:03:59,533 epoch 3 - iter 12/32 - loss 0.46716590 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 21:03:59,703 epoch 3 - iter 15/32 - loss 0.43909702 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:03:59,884 epoch 3 - iter 18/32 - loss 0.37494181 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 21:04:00,053 epoch 3 - iter 21/32 - loss 0.38940918 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 21:04:00,217 epoch 3 - iter 24/32 - loss 0.35300970 - samples/sec: 18.28 - lr: 0.020000\n",
+      "2021-09-21 21:04:00,391 epoch 3 - iter 27/32 - loss 0.34535241 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 21:04:00,557 epoch 3 - iter 30/32 - loss 0.34212041 - samples/sec: 18.17 - lr: 0.020000\n",
+      "2021-09-21 21:04:00,678 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:00,679 EPOCH 3 done: loss 0.3379 - lr 0.0200000\n",
+      "2021-09-21 21:04:02,722 DEV : loss 0.8036943078041077 - score 0.75\n",
+      "2021-09-21 21:04:02,723 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:04:02,742 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:03,000 epoch 4 - iter 3/32 - loss 0.07601874 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 21:04:03,220 epoch 4 - iter 6/32 - loss 0.09751596 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 21:04:03,448 epoch 4 - iter 9/32 - loss 0.06766842 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:04:03,659 epoch 4 - iter 12/32 - loss 0.05378079 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 21:04:03,887 epoch 4 - iter 15/32 - loss 0.04463035 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 21:04:04,120 epoch 4 - iter 18/32 - loss 0.15992588 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 21:04:04,350 epoch 4 - iter 21/32 - loss 0.14248169 - samples/sec: 13.13 - lr: 0.020000\n",
+      "2021-09-21 21:04:04,582 epoch 4 - iter 24/32 - loss 0.15157140 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:04:04,803 epoch 4 - iter 27/32 - loss 0.14155524 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:04:05,019 epoch 4 - iter 30/32 - loss 0.20775681 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 21:04:05,156 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:05,157 EPOCH 4 done: loss 0.2642 - lr 0.0200000\n",
+      "2021-09-21 21:04:05,336 DEV : loss 0.7853524684906006 - score 0.5\n",
+      "2021-09-21 21:04:05,337 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:04:05,341 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:05,613 epoch 5 - iter 3/32 - loss 0.26703619 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 21:04:05,825 epoch 5 - iter 6/32 - loss 0.17060493 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,055 epoch 5 - iter 9/32 - loss 0.11525408 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,263 epoch 5 - iter 12/32 - loss 0.18018193 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,472 epoch 5 - iter 15/32 - loss 0.14885089 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,668 epoch 5 - iter 18/32 - loss 0.20471571 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,898 epoch 5 - iter 21/32 - loss 0.25030576 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:04:07,120 epoch 5 - iter 24/32 - loss 0.23250699 - samples/sec: 13.55 - lr: 0.020000\n",
+      "2021-09-21 21:04:07,312 epoch 5 - iter 27/32 - loss 0.20789530 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 21:04:07,517 epoch 5 - iter 30/32 - loss 0.18759319 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 21:04:07,644 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:23:37,607 EPOCH 6 done: loss 0.6696 - lr 0.0200000\n",
-      "2021-09-08 01:23:37,670 DEV : loss 0.5791957974433899 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:23:37,671 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:23:37,682 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:37,829 epoch 7 - iter 3/32 - loss 0.61327157 - samples/sec: 22.48 - lr: 0.010000\n",
-      "2021-09-08 01:23:37,961 epoch 7 - iter 6/32 - loss 0.62313409 - samples/sec: 22.70 - lr: 0.010000\n",
-      "2021-09-08 01:23:38,093 epoch 7 - iter 9/32 - loss 0.63331111 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:23:38,228 epoch 7 - iter 12/32 - loss 0.62948726 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 01:23:38,361 epoch 7 - iter 15/32 - loss 0.62118058 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 01:23:38,503 epoch 7 - iter 18/32 - loss 0.62562685 - samples/sec: 21.29 - lr: 0.010000\n",
-      "2021-09-08 01:23:38,645 epoch 7 - iter 21/32 - loss 0.62378560 - samples/sec: 21.13 - lr: 0.010000\n",
-      "2021-09-08 01:23:38,790 epoch 7 - iter 24/32 - loss 0.62765842 - samples/sec: 20.91 - lr: 0.010000\n",
-      "2021-09-08 01:23:38,934 epoch 7 - iter 27/32 - loss 0.63723295 - samples/sec: 20.93 - lr: 0.010000\n",
-      "2021-09-08 01:23:39,077 epoch 7 - iter 30/32 - loss 0.63730850 - samples/sec: 21.03 - lr: 0.010000\n",
-      "2021-09-08 01:23:39,173 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:39,174 EPOCH 7 done: loss 0.6350 - lr 0.0100000\n",
-      "2021-09-08 01:23:39,262 DEV : loss 0.5787413716316223 - score 0.0\n",
-      "2021-09-08 01:23:39,263 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:23:39,271 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:39,418 epoch 8 - iter 3/32 - loss 0.64128840 - samples/sec: 22.56 - lr: 0.010000\n",
-      "2021-09-08 01:23:39,553 epoch 8 - iter 6/32 - loss 0.63832331 - samples/sec: 22.35 - lr: 0.010000\n",
-      "2021-09-08 01:23:39,684 epoch 8 - iter 9/32 - loss 0.63252117 - samples/sec: 22.93 - lr: 0.010000\n",
-      "2021-09-08 01:23:39,816 epoch 8 - iter 12/32 - loss 0.63583392 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 01:23:39,952 epoch 8 - iter 15/32 - loss 0.64285599 - samples/sec: 22.59 - lr: 0.010000\n",
-      "2021-09-08 01:23:40,084 epoch 8 - iter 18/32 - loss 0.64708884 - samples/sec: 22.78 - lr: 0.010000\n",
-      "2021-09-08 01:23:40,218 epoch 8 - iter 21/32 - loss 0.64516127 - samples/sec: 22.61 - lr: 0.010000\n",
-      "2021-09-08 01:23:40,352 epoch 8 - iter 24/32 - loss 0.64043632 - samples/sec: 22.55 - lr: 0.010000\n",
-      "2021-09-08 01:23:40,484 epoch 8 - iter 27/32 - loss 0.64034828 - samples/sec: 22.82 - lr: 0.010000\n",
-      "2021-09-08 01:23:40,615 epoch 8 - iter 30/32 - loss 0.64317019 - samples/sec: 22.92 - lr: 0.010000\n",
-      "2021-09-08 01:23:40,707 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,707 EPOCH 8 done: loss 0.6415 - lr 0.0100000\n",
-      "2021-09-08 01:23:40,772 DEV : loss 0.5654431581497192 - score 0.0\n",
-      "2021-09-08 01:23:40,773 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:23:40,816 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:40,961 epoch 9 - iter 3/32 - loss 0.65298223 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:23:41,093 epoch 9 - iter 6/32 - loss 0.64938015 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:23:41,225 epoch 9 - iter 9/32 - loss 0.64228919 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 01:23:41,357 epoch 9 - iter 12/32 - loss 0.63846252 - samples/sec: 22.87 - lr: 0.010000\n",
-      "2021-09-08 01:23:41,489 epoch 9 - iter 15/32 - loss 0.64038258 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:23:41,620 epoch 9 - iter 18/32 - loss 0.64035656 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 01:23:41,755 epoch 9 - iter 21/32 - loss 0.64442338 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 01:23:41,887 epoch 9 - iter 24/32 - loss 0.64300919 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 01:23:42,019 epoch 9 - iter 27/32 - loss 0.64126810 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 01:23:42,150 epoch 9 - iter 30/32 - loss 0.64780942 - samples/sec: 22.89 - lr: 0.010000\n",
-      "2021-09-08 01:23:42,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:42,240 EPOCH 9 done: loss 0.6461 - lr 0.0100000\n",
-      "2021-09-08 01:23:42,305 DEV : loss 0.5682466626167297 - score 0.0\n",
-      "2021-09-08 01:23:42,306 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:23:42,317 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:42,487 epoch 10 - iter 3/32 - loss 0.74058455 - samples/sec: 19.07 - lr: 0.010000\n",
-      "2021-09-08 01:23:42,637 epoch 10 - iter 6/32 - loss 0.70317151 - samples/sec: 20.09 - lr: 0.010000\n",
-      "2021-09-08 01:23:42,799 epoch 10 - iter 9/32 - loss 0.70550117 - samples/sec: 18.65 - lr: 0.010000\n",
-      "2021-09-08 01:23:42,947 epoch 10 - iter 12/32 - loss 0.70719087 - samples/sec: 20.32 - lr: 0.010000\n",
-      "2021-09-08 01:23:43,092 epoch 10 - iter 15/32 - loss 0.69885703 - samples/sec: 20.77 - lr: 0.010000\n",
-      "2021-09-08 01:23:43,247 epoch 10 - iter 18/32 - loss 0.68822849 - samples/sec: 19.36 - lr: 0.010000\n",
-      "2021-09-08 01:23:43,391 epoch 10 - iter 21/32 - loss 0.68700262 - samples/sec: 21.04 - lr: 0.010000\n",
-      "2021-09-08 01:23:43,559 epoch 10 - iter 24/32 - loss 0.68595911 - samples/sec: 17.87 - lr: 0.010000\n",
-      "2021-09-08 01:23:43,698 epoch 10 - iter 27/32 - loss 0.67973580 - samples/sec: 21.67 - lr: 0.010000\n",
-      "2021-09-08 01:23:43,866 epoch 10 - iter 30/32 - loss 0.67618385 - samples/sec: 17.89 - lr: 0.010000\n",
-      "2021-09-08 01:23:43,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:43,965 EPOCH 10 done: loss 0.6772 - lr 0.0100000\n",
-      "2021-09-08 01:23:44,058 DEV : loss 0.5846861600875854 - score 0.0\n",
+      "2021-09-21 21:04:07,645 EPOCH 5 done: loss 0.1764 - lr 0.0200000\n",
+      "2021-09-21 21:04:07,833 DEV : loss 0.6842709183692932 - score 0.75\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:04:07,837 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:04:07,853 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:08,099 epoch 6 - iter 3/32 - loss 0.20468706 - samples/sec: 13.40 - lr: 0.010000\n",
+      "2021-09-21 21:04:08,302 epoch 6 - iter 6/32 - loss 0.10582207 - samples/sec: 14.78 - lr: 0.010000\n",
+      "2021-09-21 21:04:08,522 epoch 6 - iter 9/32 - loss 0.07107986 - samples/sec: 13.73 - lr: 0.010000\n",
+      "2021-09-21 21:04:08,745 epoch 6 - iter 12/32 - loss 0.05391418 - samples/sec: 13.45 - lr: 0.010000\n",
+      "2021-09-21 21:04:08,944 epoch 6 - iter 15/32 - loss 0.05169437 - samples/sec: 15.14 - lr: 0.010000\n",
+      "2021-09-21 21:04:09,182 epoch 6 - iter 18/32 - loss 0.17647674 - samples/sec: 12.64 - lr: 0.010000\n",
+      "2021-09-21 21:04:09,406 epoch 6 - iter 21/32 - loss 0.15226405 - samples/sec: 13.46 - lr: 0.010000\n",
+      "2021-09-21 21:04:09,609 epoch 6 - iter 24/32 - loss 0.13340801 - samples/sec: 14.81 - lr: 0.010000\n",
+      "2021-09-21 21:04:09,833 epoch 6 - iter 27/32 - loss 0.17119948 - samples/sec: 13.40 - lr: 0.010000\n",
+      "2021-09-21 21:04:10,063 epoch 6 - iter 30/32 - loss 0.16960104 - samples/sec: 13.12 - lr: 0.010000\n",
+      "2021-09-21 21:04:10,203 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:10,203 EPOCH 6 done: loss 0.1592 - lr 0.0100000\n",
+      "2021-09-21 21:04:10,352 DEV : loss 0.6700452566146851 - score 1.0\n",
+      "2021-09-21 21:04:10,353 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:04:14,470 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:14,693 epoch 7 - iter 3/32 - loss 0.16044091 - samples/sec: 15.45 - lr: 0.010000\n",
+      "2021-09-21 21:04:14,874 epoch 7 - iter 6/32 - loss 0.11168996 - samples/sec: 16.57 - lr: 0.010000\n",
+      "2021-09-21 21:04:15,031 epoch 7 - iter 9/32 - loss 0.07531135 - samples/sec: 19.21 - lr: 0.010000\n",
+      "2021-09-21 21:04:15,178 epoch 7 - iter 12/32 - loss 0.05673302 - samples/sec: 20.49 - lr: 0.010000\n",
+      "2021-09-21 21:04:15,334 epoch 7 - iter 15/32 - loss 0.04606838 - samples/sec: 19.37 - lr: 0.010000\n",
+      "2021-09-21 21:04:15,482 epoch 7 - iter 18/32 - loss 0.04072279 - samples/sec: 20.34 - lr: 0.010000\n",
+      "2021-09-21 21:04:15,627 epoch 7 - iter 21/32 - loss 0.03722016 - samples/sec: 20.78 - lr: 0.010000\n",
+      "2021-09-21 21:04:15,780 epoch 7 - iter 24/32 - loss 0.03289068 - samples/sec: 19.63 - lr: 0.010000\n",
+      "2021-09-21 21:04:15,922 epoch 7 - iter 27/32 - loss 0.02940723 - samples/sec: 21.20 - lr: 0.010000\n",
+      "2021-09-21 21:04:16,081 epoch 7 - iter 30/32 - loss 0.05881272 - samples/sec: 18.91 - lr: 0.010000\n",
+      "2021-09-21 21:04:16,186 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:16,186 EPOCH 7 done: loss 0.0964 - lr 0.0100000\n",
+      "2021-09-21 21:04:16,371 DEV : loss 0.7547001838684082 - score 1.0\n",
+      "2021-09-21 21:04:16,373 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:04:16,454 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:16,625 epoch 8 - iter 3/32 - loss 0.03342672 - samples/sec: 19.59 - lr: 0.010000\n",
+      "2021-09-21 21:04:16,779 epoch 8 - iter 6/32 - loss 0.19054356 - samples/sec: 19.53 - lr: 0.010000\n",
+      "2021-09-21 21:04:16,936 epoch 8 - iter 9/32 - loss 0.22434962 - samples/sec: 19.19 - lr: 0.010000\n",
+      "2021-09-21 21:04:17,102 epoch 8 - iter 12/32 - loss 0.16958612 - samples/sec: 18.15 - lr: 0.010000\n",
+      "2021-09-21 21:04:17,265 epoch 8 - iter 15/32 - loss 0.13930497 - samples/sec: 18.44 - lr: 0.010000\n",
+      "2021-09-21 21:04:17,438 epoch 8 - iter 18/32 - loss 0.11682988 - samples/sec: 17.49 - lr: 0.010000\n",
+      "2021-09-21 21:04:17,588 epoch 8 - iter 21/32 - loss 0.10072164 - samples/sec: 20.14 - lr: 0.010000\n",
+      "2021-09-21 21:04:17,758 epoch 8 - iter 24/32 - loss 0.10359537 - samples/sec: 17.73 - lr: 0.010000\n",
+      "2021-09-21 21:04:17,897 epoch 8 - iter 27/32 - loss 0.09223072 - samples/sec: 21.72 - lr: 0.010000\n",
+      "2021-09-21 21:04:18,050 epoch 8 - iter 30/32 - loss 0.08392117 - samples/sec: 19.70 - lr: 0.010000\n",
+      "2021-09-21 21:04:18,160 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:18,161 EPOCH 8 done: loss 0.0788 - lr 0.0100000\n",
+      "2021-09-21 21:04:28,013 DEV : loss 0.7507801055908203 - score 0.75\n",
+      "2021-09-21 21:04:28,014 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:04:28,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:28,260 epoch 9 - iter 3/32 - loss 0.02441389 - samples/sec: 15.70 - lr: 0.010000\n",
+      "2021-09-21 21:04:28,433 epoch 9 - iter 6/32 - loss 0.03308497 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 21:04:28,611 epoch 9 - iter 9/32 - loss 0.02377836 - samples/sec: 17.01 - lr: 0.010000\n",
+      "2021-09-21 21:04:28,793 epoch 9 - iter 12/32 - loss 0.07210951 - samples/sec: 16.54 - lr: 0.010000\n",
+      "2021-09-21 21:04:28,976 epoch 9 - iter 15/32 - loss 0.05882271 - samples/sec: 16.45 - lr: 0.010000\n",
+      "2021-09-21 21:04:29,111 epoch 9 - iter 18/32 - loss 0.04917819 - samples/sec: 22.27 - lr: 0.010000\n",
+      "2021-09-21 21:04:29,268 epoch 9 - iter 21/32 - loss 0.04232087 - samples/sec: 19.27 - lr: 0.010000\n",
+      "2021-09-21 21:04:29,455 epoch 9 - iter 24/32 - loss 0.04306469 - samples/sec: 16.07 - lr: 0.010000\n",
+      "2021-09-21 21:04:29,636 epoch 9 - iter 27/32 - loss 0.03834233 - samples/sec: 16.68 - lr: 0.010000\n",
+      "2021-09-21 21:04:29,822 epoch 9 - iter 30/32 - loss 0.03865066 - samples/sec: 16.24 - lr: 0.010000\n",
+      "2021-09-21 21:04:29,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:29,939 EPOCH 9 done: loss 0.0363 - lr 0.0100000\n",
+      "2021-09-21 21:04:30,162 DEV : loss 0.7106984853744507 - score 0.75\n",
+      "2021-09-21 21:04:30,162 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:04:30,264 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:30,467 epoch 10 - iter 3/32 - loss 0.00179383 - samples/sec: 16.89 - lr: 0.010000\n",
+      "2021-09-21 21:04:30,643 epoch 10 - iter 6/32 - loss 0.00207503 - samples/sec: 17.05 - lr: 0.010000\n",
+      "2021-09-21 21:04:30,856 epoch 10 - iter 9/32 - loss 0.00192284 - samples/sec: 14.11 - lr: 0.010000\n",
+      "2021-09-21 21:04:31,074 epoch 10 - iter 12/32 - loss 0.00169961 - samples/sec: 13.85 - lr: 0.010000\n",
+      "2021-09-21 21:04:31,302 epoch 10 - iter 15/32 - loss 0.00229300 - samples/sec: 13.19 - lr: 0.010000\n",
+      "2021-09-21 21:04:31,536 epoch 10 - iter 18/32 - loss 0.06744659 - samples/sec: 12.87 - lr: 0.010000\n",
+      "2021-09-21 21:04:31,747 epoch 10 - iter 21/32 - loss 0.05870177 - samples/sec: 14.22 - lr: 0.010000\n",
+      "2021-09-21 21:04:31,920 epoch 10 - iter 24/32 - loss 0.05153206 - samples/sec: 17.45 - lr: 0.010000\n",
+      "2021-09-21 21:04:32,081 epoch 10 - iter 27/32 - loss 0.04591975 - samples/sec: 18.73 - lr: 0.010000\n",
+      "2021-09-21 21:04:32,233 epoch 10 - iter 30/32 - loss 0.04140625 - samples/sec: 19.72 - lr: 0.010000\n",
+      "2021-09-21 21:04:32,348 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:32,349 EPOCH 10 done: loss 0.0402 - lr 0.0100000\n",
+      "2021-09-21 21:04:32,566 DEV : loss 0.9140568971633911 - score 0.75\n",
       "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:23:44,059 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:23:52,020 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:23:52,020 Testing using best model ...\n",
-      "2021-09-08 01:23:52,022 loading file temp/best-model.pt\n",
+      "2021-09-21 21:04:32,566 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:04:36,705 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:36,705 Testing using best model ...\n",
+      "2021-09-21 21:04:36,707 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:23:57,596 \t0.0\n",
-      "2021-09-08 01:23:57,596 \n",
+      "2021-09-21 21:04:50,326 \t0.5\n",
+      "2021-09-21 21:04:50,327 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.5\n",
+      "- F-score (macro) 0.2\n",
+      "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "     sadness     0.0000    0.0000    0.0000         0\n",
-      "    optimism     0.0000    0.0000    0.0000         1\n",
-      "       anger     0.0000    0.0000    0.0000         2\n",
-      "         joy     0.0000    0.0000    0.0000         1\n",
-      "\n",
-      "   micro avg     0.0000    0.0000    0.0000         4\n",
-      "   macro avg     0.0000    0.0000    0.0000         4\n",
-      "weighted avg     0.0000    0.0000    0.0000         4\n",
-      " samples avg     0.0000    0.0000    0.0000         4\n",
+      "     sadness     0.0000    0.0000    0.0000         2\n",
+      "    optimism     0.0000    0.0000    0.0000         0\n",
+      "       anger     0.6667    1.0000    0.8000         2\n",
+      "         joy     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "2021-09-08 01:23:57,597 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:04,569 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "   micro avg     0.5000    0.5000    0.5000         4\n",
+      "   macro avg     0.1667    0.2500    0.2000         4\n",
+      "weighted avg     0.3333    0.5000    0.4000         4\n",
+      " samples avg     0.5000    0.5000    0.5000         4\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:04:50,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:02,342 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:24:08,710 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:05:06,865 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 42378.60it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 30878.31it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:08,713 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 01:24:08,730 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:08,732 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:05:06,869 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 21:05:06,919 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:06,921 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1498,25 +1501,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:08,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:08,732 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:24:08,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:08,733 Parameters:\n",
-      "2021-09-08 01:24:08,734  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:24:08,734  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:24:08,734  - patience: \"3\"\n",
-      "2021-09-08 01:24:08,735  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:24:08,735  - max_epochs: \"10\"\n",
-      "2021-09-08 01:24:08,735  - shuffle: \"True\"\n",
-      "2021-09-08 01:24:08,735  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:24:08,736  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:24:08,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:08,736 Model training base path: \"temp\"\n",
-      "2021-09-08 01:24:08,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:08,737 Device: cuda:0\n",
-      "2021-09-08 01:24:08,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:08,738 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:24:08,743 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:05:06,921 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:06,922 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:05:06,922 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:06,922 Parameters:\n",
+      "2021-09-21 21:05:06,923  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:05:06,923  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:05:06,923  - patience: \"3\"\n",
+      "2021-09-21 21:05:06,923  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:05:06,924  - max_epochs: \"10\"\n",
+      "2021-09-21 21:05:06,924  - shuffle: \"True\"\n",
+      "2021-09-21 21:05:06,924  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:05:06,924  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:05:06,925 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:06,925 Model training base path: \"temp\"\n",
+      "2021-09-21 21:05:06,925 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:06,926 Device: cuda:0\n",
+      "2021-09-21 21:05:06,926 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:06,926 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -1530,209 +1532,210 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:08,943 epoch 1 - iter 3/32 - loss 0.55130753 - samples/sec: 16.44 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,105 epoch 1 - iter 6/32 - loss 0.60386414 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,259 epoch 1 - iter 9/32 - loss 0.74882745 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,447 epoch 1 - iter 12/32 - loss 0.73362441 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,593 epoch 1 - iter 15/32 - loss 0.64645459 - samples/sec: 20.70 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,789 epoch 1 - iter 18/32 - loss 0.75209863 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 01:24:09,951 epoch 1 - iter 21/32 - loss 0.66328455 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:24:10,105 epoch 1 - iter 24/32 - loss 0.64651706 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 01:24:10,303 epoch 1 - iter 27/32 - loss 0.69336903 - samples/sec: 15.26 - lr: 0.020000\n",
-      "2021-09-08 01:24:10,466 epoch 1 - iter 30/32 - loss 0.64020187 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 01:24:10,582 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:10,583 EPOCH 1 done: loss 0.6031 - lr 0.0200000\n",
-      "2021-09-08 01:24:10,673 DEV : loss 0.6136522889137268 - score 0.5\n",
-      "2021-09-08 01:24:10,674 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:05:07,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:07,933 epoch 1 - iter 3/32 - loss 0.75272397 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 21:05:08,113 epoch 1 - iter 6/32 - loss 0.56085117 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:05:08,321 epoch 1 - iter 9/32 - loss 0.67537590 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 21:05:08,509 epoch 1 - iter 12/32 - loss 0.85872416 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 21:05:08,724 epoch 1 - iter 15/32 - loss 0.87570841 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 21:05:08,970 epoch 1 - iter 18/32 - loss 0.87626942 - samples/sec: 12.21 - lr: 0.020000\n",
+      "2021-09-21 21:05:09,182 epoch 1 - iter 21/32 - loss 0.81322210 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 21:05:09,387 epoch 1 - iter 24/32 - loss 0.72391325 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 21:05:09,602 epoch 1 - iter 27/32 - loss 0.67546148 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:05:09,793 epoch 1 - iter 30/32 - loss 0.67933350 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:05:09,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:09,940 EPOCH 1 done: loss 0.6584 - lr 0.0200000\n",
+      "2021-09-21 21:05:10,059 DEV : loss 0.7995777130126953 - score 0.5\n",
+      "2021-09-21 21:05:10,059 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:24:14,564 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:14,766 epoch 2 - iter 3/32 - loss 0.57513378 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:24:14,913 epoch 2 - iter 6/32 - loss 0.76863452 - samples/sec: 20.58 - lr: 0.020000\n",
-      "2021-09-08 01:24:15,090 epoch 2 - iter 9/32 - loss 0.61994580 - samples/sec: 17.02 - lr: 0.020000\n",
-      "2021-09-08 01:24:15,264 epoch 2 - iter 12/32 - loss 0.56673980 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:24:15,404 epoch 2 - iter 15/32 - loss 0.53863143 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 01:24:15,608 epoch 2 - iter 18/32 - loss 0.53680764 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 01:24:15,752 epoch 2 - iter 21/32 - loss 0.51006634 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 01:24:15,937 epoch 2 - iter 24/32 - loss 0.55919870 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 01:24:16,086 epoch 2 - iter 27/32 - loss 0.60888754 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 01:24:16,246 epoch 2 - iter 30/32 - loss 0.55930541 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 01:24:16,366 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:16,366 EPOCH 2 done: loss 0.5500 - lr 0.0200000\n",
-      "2021-09-08 01:24:16,438 DEV : loss 0.3102327585220337 - score 0.75\n",
-      "2021-09-08 01:24:16,438 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:05:17,380 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:17,607 epoch 2 - iter 3/32 - loss 0.52328988 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 21:05:17,816 epoch 2 - iter 6/32 - loss 0.49114313 - samples/sec: 14.41 - lr: 0.020000\n",
+      "2021-09-21 21:05:18,016 epoch 2 - iter 9/32 - loss 0.49249665 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:05:18,208 epoch 2 - iter 12/32 - loss 0.51197894 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:05:18,422 epoch 2 - iter 15/32 - loss 0.51056366 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 21:05:18,610 epoch 2 - iter 18/32 - loss 0.54737688 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 21:05:18,794 epoch 2 - iter 21/32 - loss 0.47369621 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 21:05:18,991 epoch 2 - iter 24/32 - loss 0.55285127 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 21:05:19,183 epoch 2 - iter 27/32 - loss 0.52472905 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 21:05:19,359 epoch 2 - iter 30/32 - loss 0.49429947 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 21:05:19,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:19,478 EPOCH 2 done: loss 0.4718 - lr 0.0200000\n",
+      "2021-09-21 21:05:19,679 DEV : loss 0.5585904121398926 - score 0.75\n",
+      "2021-09-21 21:05:19,680 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:24:20,482 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:20,642 epoch 3 - iter 3/32 - loss 0.33732248 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 01:24:20,789 epoch 3 - iter 6/32 - loss 0.24775925 - samples/sec: 20.47 - lr: 0.020000\n",
-      "2021-09-08 01:24:20,989 epoch 3 - iter 9/32 - loss 0.20335901 - samples/sec: 15.06 - lr: 0.020000\n",
-      "2021-09-08 01:24:21,127 epoch 3 - iter 12/32 - loss 0.17069361 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 01:24:21,307 epoch 3 - iter 15/32 - loss 0.20226192 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 01:24:21,476 epoch 3 - iter 18/32 - loss 0.17589513 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:24:21,632 epoch 3 - iter 21/32 - loss 0.15376805 - samples/sec: 19.29 - lr: 0.020000\n",
-      "2021-09-08 01:24:21,821 epoch 3 - iter 24/32 - loss 0.14016318 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 01:24:21,966 epoch 3 - iter 27/32 - loss 0.23235551 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:24:22,127 epoch 3 - iter 30/32 - loss 0.32176162 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 01:24:22,251 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:22,252 EPOCH 3 done: loss 0.3184 - lr 0.0200000\n",
-      "2021-09-08 01:24:22,317 DEV : loss 0.5926705598831177 - score 0.75\n",
-      "2021-09-08 01:24:22,318 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:24:22,322 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:22,526 epoch 4 - iter 3/32 - loss 0.17326387 - samples/sec: 16.19 - lr: 0.020000\n",
-      "2021-09-08 01:24:22,664 epoch 4 - iter 6/32 - loss 0.09237173 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 01:24:22,861 epoch 4 - iter 9/32 - loss 0.36524838 - samples/sec: 15.33 - lr: 0.020000\n",
-      "2021-09-08 01:24:23,013 epoch 4 - iter 12/32 - loss 0.30152763 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 01:24:23,163 epoch 4 - iter 15/32 - loss 0.24807045 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 01:24:23,340 epoch 4 - iter 18/32 - loss 0.21050240 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:24:23,483 epoch 4 - iter 21/32 - loss 0.19392760 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 01:24:23,656 epoch 4 - iter 24/32 - loss 0.19567954 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:24:23,801 epoch 4 - iter 27/32 - loss 0.21767686 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 01:24:23,963 epoch 4 - iter 30/32 - loss 0.19807006 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 01:24:24,096 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:24,097 EPOCH 4 done: loss 0.2304 - lr 0.0200000\n",
-      "2021-09-08 01:24:24,173 DEV : loss 0.7780532836914062 - score 0.75\n",
-      "2021-09-08 01:24:24,174 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:24:24,176 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:24,359 epoch 5 - iter 3/32 - loss 0.00404626 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 01:24:24,540 epoch 5 - iter 6/32 - loss 0.03939994 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 01:24:24,678 epoch 5 - iter 9/32 - loss 0.04510809 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 01:24:24,836 epoch 5 - iter 12/32 - loss 0.09298844 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:24:25,008 epoch 5 - iter 15/32 - loss 0.07492598 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:24:25,152 epoch 5 - iter 18/32 - loss 0.06413503 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 01:24:25,341 epoch 5 - iter 21/32 - loss 0.06321793 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 01:24:25,478 epoch 5 - iter 24/32 - loss 0.05820914 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 01:24:25,641 epoch 5 - iter 27/32 - loss 0.05297141 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 01:24:25,792 epoch 5 - iter 30/32 - loss 0.04776627 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 01:24:25,894 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:25,894 EPOCH 5 done: loss 0.0722 - lr 0.0200000\n",
-      "2021-09-08 01:24:25,988 DEV : loss 0.5250722765922546 - score 0.75\n",
-      "2021-09-08 01:24:25,989 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:24:25,992 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:26,176 epoch 6 - iter 3/32 - loss 0.02070920 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 01:24:26,322 epoch 6 - iter 6/32 - loss 0.39747097 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 01:24:26,509 epoch 6 - iter 9/32 - loss 0.27384012 - samples/sec: 16.08 - lr: 0.020000\n",
-      "2021-09-08 01:24:26,647 epoch 6 - iter 12/32 - loss 0.23580686 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 01:24:26,817 epoch 6 - iter 15/32 - loss 0.25313619 - samples/sec: 17.70 - lr: 0.020000\n",
-      "2021-09-08 01:24:26,984 epoch 6 - iter 18/32 - loss 0.21600724 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 01:24:27,118 epoch 6 - iter 21/32 - loss 0.18640196 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 01:24:27,314 epoch 6 - iter 24/32 - loss 0.16427554 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 01:24:27,454 epoch 6 - iter 27/32 - loss 0.14644667 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 01:24:27,645 epoch 6 - iter 30/32 - loss 0.13202931 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:24:27,736 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:05:28,614 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:28,864 epoch 3 - iter 3/32 - loss 0.09367975 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 21:05:29,126 epoch 3 - iter 6/32 - loss 0.46921744 - samples/sec: 11.50 - lr: 0.020000\n",
+      "2021-09-21 21:05:29,359 epoch 3 - iter 9/32 - loss 0.43129133 - samples/sec: 12.90 - lr: 0.020000\n",
+      "2021-09-21 21:05:29,547 epoch 3 - iter 12/32 - loss 0.34341310 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:05:29,760 epoch 3 - iter 15/32 - loss 0.27677797 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 21:05:29,975 epoch 3 - iter 18/32 - loss 0.26812304 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:05:30,243 epoch 3 - iter 21/32 - loss 0.34855411 - samples/sec: 11.21 - lr: 0.020000\n",
+      "2021-09-21 21:05:30,470 epoch 3 - iter 24/32 - loss 0.31291741 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 21:05:30,701 epoch 3 - iter 27/32 - loss 0.27934816 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 21:05:30,944 epoch 3 - iter 30/32 - loss 0.26394234 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 21:05:31,113 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:31,114 EPOCH 3 done: loss 0.2477 - lr 0.0200000\n",
+      "2021-09-21 21:05:31,275 DEV : loss 1.2779505252838135 - score 0.75\n",
+      "2021-09-21 21:05:31,277 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:05:31,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:31,546 epoch 4 - iter 3/32 - loss 0.07659558 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 21:05:31,769 epoch 4 - iter 6/32 - loss 0.03907288 - samples/sec: 13.48 - lr: 0.020000\n",
+      "2021-09-21 21:05:32,029 epoch 4 - iter 9/32 - loss 0.37811283 - samples/sec: 11.57 - lr: 0.020000\n",
+      "2021-09-21 21:05:32,278 epoch 4 - iter 12/32 - loss 0.35798333 - samples/sec: 12.08 - lr: 0.020000\n",
+      "2021-09-21 21:05:32,528 epoch 4 - iter 15/32 - loss 0.41814185 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 21:05:32,783 epoch 4 - iter 18/32 - loss 0.34866287 - samples/sec: 11.78 - lr: 0.020000\n",
+      "2021-09-21 21:05:33,050 epoch 4 - iter 21/32 - loss 0.35417195 - samples/sec: 11.27 - lr: 0.020000\n",
+      "2021-09-21 21:05:33,265 epoch 4 - iter 24/32 - loss 0.32326241 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 21:05:33,496 epoch 4 - iter 27/32 - loss 0.37683754 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:05:33,732 epoch 4 - iter 30/32 - loss 0.38505630 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 21:05:33,893 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:33,893 EPOCH 4 done: loss 0.3990 - lr 0.0200000\n",
+      "2021-09-21 21:05:34,036 DEV : loss 0.7900415062904358 - score 0.75\n",
+      "2021-09-21 21:05:34,037 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:05:34,078 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:34,335 epoch 5 - iter 3/32 - loss 0.03178862 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:05:34,524 epoch 5 - iter 6/32 - loss 0.02507301 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:05:34,730 epoch 5 - iter 9/32 - loss 0.03247041 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 21:05:34,949 epoch 5 - iter 12/32 - loss 0.13405869 - samples/sec: 13.76 - lr: 0.020000\n",
+      "2021-09-21 21:05:35,193 epoch 5 - iter 15/32 - loss 0.16633834 - samples/sec: 12.31 - lr: 0.020000\n",
+      "2021-09-21 21:05:35,419 epoch 5 - iter 18/32 - loss 0.18174469 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 21:05:35,657 epoch 5 - iter 21/32 - loss 0.19455581 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 21:05:35,896 epoch 5 - iter 24/32 - loss 0.17925378 - samples/sec: 12.54 - lr: 0.020000\n",
+      "2021-09-21 21:05:36,143 epoch 5 - iter 27/32 - loss 0.16074286 - samples/sec: 12.19 - lr: 0.020000\n",
+      "2021-09-21 21:05:36,382 epoch 5 - iter 30/32 - loss 0.16768393 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 21:05:36,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:36,532 EPOCH 5 done: loss 0.2045 - lr 0.0200000\n",
+      "2021-09-21 21:05:36,803 DEV : loss 1.1816476583480835 - score 0.75\n",
+      "2021-09-21 21:05:36,804 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:05:36,806 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:37,083 epoch 6 - iter 3/32 - loss 0.00198364 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:05:37,316 epoch 6 - iter 6/32 - loss 0.00159633 - samples/sec: 12.88 - lr: 0.020000\n",
+      "2021-09-21 21:05:37,534 epoch 6 - iter 9/32 - loss 0.02651340 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 21:05:37,738 epoch 6 - iter 12/32 - loss 0.02120441 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 21:05:37,957 epoch 6 - iter 15/32 - loss 0.01844928 - samples/sec: 13.76 - lr: 0.020000\n",
+      "2021-09-21 21:05:38,178 epoch 6 - iter 18/32 - loss 0.11834797 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 21:05:38,403 epoch 6 - iter 21/32 - loss 0.14104363 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 21:05:38,653 epoch 6 - iter 24/32 - loss 0.12862512 - samples/sec: 12.00 - lr: 0.020000\n",
+      "2021-09-21 21:05:38,889 epoch 6 - iter 27/32 - loss 0.11665617 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 21:05:39,146 epoch 6 - iter 30/32 - loss 0.10527793 - samples/sec: 11.74 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:27,737 EPOCH 6 done: loss 0.1239 - lr 0.0200000\n",
-      "2021-09-08 01:24:27,910 DEV : loss 0.7249132394790649 - score 0.75\n",
+      "2021-09-21 21:05:39,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:39,311 EPOCH 6 done: loss 0.0988 - lr 0.0200000\n",
+      "2021-09-21 21:05:39,462 DEV : loss 1.2546813488006592 - score 0.75\n",
       "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:24:27,911 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:24:27,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:28,181 epoch 7 - iter 3/32 - loss 0.00117928 - samples/sec: 17.41 - lr: 0.010000\n",
-      "2021-09-08 01:24:28,328 epoch 7 - iter 6/32 - loss 0.00405952 - samples/sec: 20.50 - lr: 0.010000\n",
-      "2021-09-08 01:24:28,483 epoch 7 - iter 9/32 - loss 0.08347349 - samples/sec: 19.35 - lr: 0.010000\n",
-      "2021-09-08 01:24:28,645 epoch 7 - iter 12/32 - loss 0.06431854 - samples/sec: 18.64 - lr: 0.010000\n",
-      "2021-09-08 01:24:28,786 epoch 7 - iter 15/32 - loss 0.05239422 - samples/sec: 21.36 - lr: 0.010000\n",
-      "2021-09-08 01:24:28,958 epoch 7 - iter 18/32 - loss 0.04500702 - samples/sec: 17.46 - lr: 0.010000\n",
-      "2021-09-08 01:24:29,095 epoch 7 - iter 21/32 - loss 0.03866222 - samples/sec: 21.99 - lr: 0.010000\n",
-      "2021-09-08 01:24:29,270 epoch 7 - iter 24/32 - loss 0.03465270 - samples/sec: 17.22 - lr: 0.010000\n",
-      "2021-09-08 01:24:29,427 epoch 7 - iter 27/32 - loss 0.03092808 - samples/sec: 19.22 - lr: 0.010000\n",
-      "2021-09-08 01:24:29,572 epoch 7 - iter 30/32 - loss 0.02807024 - samples/sec: 20.69 - lr: 0.010000\n",
-      "2021-09-08 01:24:29,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:29,716 EPOCH 7 done: loss 0.0266 - lr 0.0100000\n",
-      "2021-09-08 01:24:30,241 DEV : loss 0.5296403169631958 - score 0.75\n",
-      "2021-09-08 01:24:30,243 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:24:30,245 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:30,440 epoch 8 - iter 3/32 - loss 0.00050771 - samples/sec: 18.12 - lr: 0.010000\n",
-      "2021-09-08 01:24:30,576 epoch 8 - iter 6/32 - loss 0.00094490 - samples/sec: 22.19 - lr: 0.010000\n",
-      "2021-09-08 01:24:30,765 epoch 8 - iter 9/32 - loss 0.00090232 - samples/sec: 15.88 - lr: 0.010000\n",
-      "2021-09-08 01:24:30,901 epoch 8 - iter 12/32 - loss 0.00120329 - samples/sec: 22.17 - lr: 0.010000\n",
-      "2021-09-08 01:24:31,050 epoch 8 - iter 15/32 - loss 0.01732361 - samples/sec: 20.30 - lr: 0.010000\n",
-      "2021-09-08 01:24:31,234 epoch 8 - iter 18/32 - loss 0.01456499 - samples/sec: 16.34 - lr: 0.010000\n",
-      "2021-09-08 01:24:31,371 epoch 8 - iter 21/32 - loss 0.01252018 - samples/sec: 21.99 - lr: 0.010000\n",
-      "2021-09-08 01:24:31,550 epoch 8 - iter 24/32 - loss 0.01100024 - samples/sec: 16.86 - lr: 0.010000\n",
-      "2021-09-08 01:24:31,691 epoch 8 - iter 27/32 - loss 0.00987414 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 01:24:31,873 epoch 8 - iter 30/32 - loss 0.00896436 - samples/sec: 16.50 - lr: 0.010000\n",
-      "2021-09-08 01:24:31,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:31,964 EPOCH 8 done: loss 0.0085 - lr 0.0100000\n",
-      "2021-09-08 01:24:32,041 DEV : loss 0.5869695544242859 - score 0.75\n",
-      "2021-09-08 01:24:32,042 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:24:32,044 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:32,226 epoch 9 - iter 3/32 - loss 0.00371941 - samples/sec: 17.78 - lr: 0.010000\n",
-      "2021-09-08 01:24:32,361 epoch 9 - iter 6/32 - loss 0.00201547 - samples/sec: 22.35 - lr: 0.010000\n",
-      "2021-09-08 01:24:32,543 epoch 9 - iter 9/32 - loss 0.00218718 - samples/sec: 16.54 - lr: 0.010000\n",
-      "2021-09-08 01:24:32,680 epoch 9 - iter 12/32 - loss 0.00262610 - samples/sec: 22.10 - lr: 0.010000\n",
-      "2021-09-08 01:24:32,842 epoch 9 - iter 15/32 - loss 0.00215192 - samples/sec: 18.58 - lr: 0.010000\n",
-      "2021-09-08 01:24:32,988 epoch 9 - iter 18/32 - loss 0.00184449 - samples/sec: 20.56 - lr: 0.010000\n",
-      "2021-09-08 01:24:33,129 epoch 9 - iter 21/32 - loss 0.00186906 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 01:24:33,327 epoch 9 - iter 24/32 - loss 0.00202402 - samples/sec: 15.19 - lr: 0.010000\n",
-      "2021-09-08 01:24:33,465 epoch 9 - iter 27/32 - loss 0.00712111 - samples/sec: 21.81 - lr: 0.010000\n",
-      "2021-09-08 01:24:33,615 epoch 9 - iter 30/32 - loss 0.00655036 - samples/sec: 20.10 - lr: 0.010000\n",
-      "2021-09-08 01:24:33,741 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:33,742 EPOCH 9 done: loss 0.0067 - lr 0.0100000\n",
-      "2021-09-08 01:24:33,807 DEV : loss 0.5539666414260864 - score 0.75\n",
-      "2021-09-08 01:24:33,808 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:24:33,810 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:33,994 epoch 10 - iter 3/32 - loss 0.00121061 - samples/sec: 17.66 - lr: 0.010000\n",
-      "2021-09-08 01:24:34,151 epoch 10 - iter 6/32 - loss 0.00091417 - samples/sec: 19.24 - lr: 0.010000\n",
-      "2021-09-08 01:24:34,292 epoch 10 - iter 9/32 - loss 0.00150215 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 01:24:34,475 epoch 10 - iter 12/32 - loss 0.00173730 - samples/sec: 16.37 - lr: 0.010000\n",
-      "2021-09-08 01:24:34,620 epoch 10 - iter 15/32 - loss 0.00142867 - samples/sec: 20.81 - lr: 0.010000\n",
-      "2021-09-08 01:24:34,772 epoch 10 - iter 18/32 - loss 0.00132616 - samples/sec: 19.83 - lr: 0.010000\n",
-      "2021-09-08 01:24:34,936 epoch 10 - iter 21/32 - loss 0.00126343 - samples/sec: 18.39 - lr: 0.010000\n",
-      "2021-09-08 01:24:35,089 epoch 10 - iter 24/32 - loss 0.00129437 - samples/sec: 19.71 - lr: 0.010000\n",
-      "2021-09-08 01:24:35,258 epoch 10 - iter 27/32 - loss 0.00124904 - samples/sec: 17.83 - lr: 0.010000\n",
-      "2021-09-08 01:24:35,392 epoch 10 - iter 30/32 - loss 0.00123146 - samples/sec: 22.37 - lr: 0.010000\n",
-      "2021-09-08 01:24:35,512 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:35,512 EPOCH 10 done: loss 0.0012 - lr 0.0100000\n",
-      "2021-09-08 01:24:35,607 DEV : loss 0.573169469833374 - score 0.75\n",
+      "2021-09-21 21:05:39,464 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:05:39,467 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:39,704 epoch 7 - iter 3/32 - loss 0.00478500 - samples/sec: 15.06 - lr: 0.010000\n",
+      "2021-09-21 21:05:39,948 epoch 7 - iter 6/32 - loss 0.00405706 - samples/sec: 12.34 - lr: 0.010000\n",
+      "2021-09-21 21:05:40,180 epoch 7 - iter 9/32 - loss 0.01170961 - samples/sec: 12.92 - lr: 0.010000\n",
+      "2021-09-21 21:05:40,389 epoch 7 - iter 12/32 - loss 0.01026746 - samples/sec: 14.42 - lr: 0.010000\n",
+      "2021-09-21 21:05:40,583 epoch 7 - iter 15/32 - loss 0.00838200 - samples/sec: 15.51 - lr: 0.010000\n",
+      "2021-09-21 21:05:40,783 epoch 7 - iter 18/32 - loss 0.01496361 - samples/sec: 15.00 - lr: 0.010000\n",
+      "2021-09-21 21:05:41,004 epoch 7 - iter 21/32 - loss 0.01819553 - samples/sec: 13.65 - lr: 0.010000\n",
+      "2021-09-21 21:05:41,197 epoch 7 - iter 24/32 - loss 0.01711289 - samples/sec: 15.53 - lr: 0.010000\n",
+      "2021-09-21 21:05:41,409 epoch 7 - iter 27/32 - loss 0.01559932 - samples/sec: 14.21 - lr: 0.010000\n",
+      "2021-09-21 21:05:41,616 epoch 7 - iter 30/32 - loss 0.02021979 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 21:05:41,763 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:41,764 EPOCH 7 done: loss 0.0217 - lr 0.0100000\n",
+      "2021-09-21 21:05:41,908 DEV : loss 1.1554912328720093 - score 0.75\n",
+      "2021-09-21 21:05:41,912 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:05:41,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:42,183 epoch 8 - iter 3/32 - loss 0.02879404 - samples/sec: 12.57 - lr: 0.010000\n",
+      "2021-09-21 21:05:42,397 epoch 8 - iter 6/32 - loss 0.01642640 - samples/sec: 14.00 - lr: 0.010000\n",
+      "2021-09-21 21:05:42,615 epoch 8 - iter 9/32 - loss 0.01219625 - samples/sec: 13.82 - lr: 0.010000\n",
+      "2021-09-21 21:05:42,829 epoch 8 - iter 12/32 - loss 0.12268826 - samples/sec: 14.06 - lr: 0.010000\n",
+      "2021-09-21 21:05:43,051 epoch 8 - iter 15/32 - loss 0.09940733 - samples/sec: 13.58 - lr: 0.010000\n",
+      "2021-09-21 21:05:43,247 epoch 8 - iter 18/32 - loss 0.08414427 - samples/sec: 15.35 - lr: 0.010000\n",
+      "2021-09-21 21:05:43,460 epoch 8 - iter 21/32 - loss 0.07266595 - samples/sec: 14.13 - lr: 0.010000\n",
+      "2021-09-21 21:05:43,683 epoch 8 - iter 24/32 - loss 0.06374343 - samples/sec: 13.45 - lr: 0.010000\n",
+      "2021-09-21 21:05:43,901 epoch 8 - iter 27/32 - loss 0.05838072 - samples/sec: 13.83 - lr: 0.010000\n",
+      "2021-09-21 21:05:44,120 epoch 8 - iter 30/32 - loss 0.05275266 - samples/sec: 13.71 - lr: 0.010000\n",
+      "2021-09-21 21:05:44,294 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:44,294 EPOCH 8 done: loss 0.0495 - lr 0.0100000\n",
+      "2021-09-21 21:05:44,408 DEV : loss 1.5200443267822266 - score 0.75\n",
+      "2021-09-21 21:05:44,409 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:05:44,411 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:44,589 epoch 9 - iter 3/32 - loss 0.00439061 - samples/sec: 19.15 - lr: 0.010000\n",
+      "2021-09-21 21:05:44,804 epoch 9 - iter 6/32 - loss 0.00664930 - samples/sec: 14.00 - lr: 0.010000\n",
+      "2021-09-21 21:05:45,046 epoch 9 - iter 9/32 - loss 0.04992530 - samples/sec: 12.42 - lr: 0.010000\n",
+      "2021-09-21 21:05:45,251 epoch 9 - iter 12/32 - loss 0.03863786 - samples/sec: 14.68 - lr: 0.010000\n",
+      "2021-09-21 21:05:45,465 epoch 9 - iter 15/32 - loss 0.03126675 - samples/sec: 14.08 - lr: 0.010000\n",
+      "2021-09-21 21:05:45,665 epoch 9 - iter 18/32 - loss 0.05952520 - samples/sec: 15.03 - lr: 0.010000\n",
+      "2021-09-21 21:05:45,860 epoch 9 - iter 21/32 - loss 0.05120699 - samples/sec: 15.43 - lr: 0.010000\n",
+      "2021-09-21 21:05:46,039 epoch 9 - iter 24/32 - loss 0.04553750 - samples/sec: 16.78 - lr: 0.010000\n",
+      "2021-09-21 21:05:46,219 epoch 9 - iter 27/32 - loss 0.04072309 - samples/sec: 16.70 - lr: 0.010000\n",
+      "2021-09-21 21:05:46,395 epoch 9 - iter 30/32 - loss 0.03703279 - samples/sec: 17.16 - lr: 0.010000\n",
+      "2021-09-21 21:05:46,498 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:46,498 EPOCH 9 done: loss 0.0347 - lr 0.0100000\n",
+      "2021-09-21 21:05:46,624 DEV : loss 1.6488711833953857 - score 0.75\n",
+      "2021-09-21 21:05:46,627 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:05:46,629 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:46,814 epoch 10 - iter 3/32 - loss 0.00928903 - samples/sec: 18.66 - lr: 0.010000\n",
+      "2021-09-21 21:05:46,997 epoch 10 - iter 6/32 - loss 0.00586733 - samples/sec: 16.46 - lr: 0.010000\n",
+      "2021-09-21 21:05:47,184 epoch 10 - iter 9/32 - loss 0.00418007 - samples/sec: 16.17 - lr: 0.010000\n",
+      "2021-09-21 21:05:47,344 epoch 10 - iter 12/32 - loss 0.00349282 - samples/sec: 18.82 - lr: 0.010000\n",
+      "2021-09-21 21:05:47,512 epoch 10 - iter 15/32 - loss 0.00309454 - samples/sec: 17.94 - lr: 0.010000\n",
+      "2021-09-21 21:05:47,667 epoch 10 - iter 18/32 - loss 0.00282471 - samples/sec: 19.34 - lr: 0.010000\n",
+      "2021-09-21 21:05:47,834 epoch 10 - iter 21/32 - loss 0.00259117 - samples/sec: 18.07 - lr: 0.010000\n",
+      "2021-09-21 21:05:48,022 epoch 10 - iter 24/32 - loss 0.00231737 - samples/sec: 16.01 - lr: 0.010000\n",
+      "2021-09-21 21:05:48,199 epoch 10 - iter 27/32 - loss 0.00301723 - samples/sec: 17.03 - lr: 0.010000\n",
+      "2021-09-21 21:05:48,359 epoch 10 - iter 30/32 - loss 0.00278532 - samples/sec: 18.82 - lr: 0.010000\n",
+      "2021-09-21 21:05:48,466 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:48,467 EPOCH 10 done: loss 0.0030 - lr 0.0100000\n",
+      "2021-09-21 21:05:48,676 DEV : loss 1.4668715000152588 - score 0.75\n",
       "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:24:35,608 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:24:39,294 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:39,295 Testing using best model ...\n",
-      "2021-09-08 01:24:39,296 loading file temp/best-model.pt\n",
+      "2021-09-21 21:05:48,677 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:05:55,230 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:55,231 Testing using best model ...\n",
+      "2021-09-21 21:05:55,232 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:24:44,110 \t0.75\n",
-      "2021-09-08 01:24:44,110 \n",
+      "2021-09-21 21:06:00,514 \t0.75\n",
+      "2021-09-21 21:06:00,514 \n",
       "Results:\n",
       "- F-score (micro) 0.75\n",
-      "- F-score (macro) 0.5833\n",
+      "- F-score (macro) 0.5\n",
       "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "     sadness     1.0000    0.5000    0.6667         2\n",
-      "    optimism     0.5000    1.0000    0.6667         1\n",
-      "       anger     1.0000    1.0000    1.0000         1\n",
-      "         joy     0.0000    0.0000    0.0000         0\n",
+      "     sadness     1.0000    1.0000    1.0000         2\n",
+      "    optimism     0.0000    0.0000    0.0000         0\n",
+      "       anger     0.0000    0.0000    0.0000         1\n",
+      "         joy     1.0000    1.0000    1.0000         1\n",
       "\n",
       "   micro avg     0.7500    0.7500    0.7500         4\n",
-      "   macro avg     0.6250    0.6250    0.5833         4\n",
-      "weighted avg     0.8750    0.7500    0.7500         4\n",
+      "   macro avg     0.5000    0.5000    0.5000         4\n",
+      "weighted avg     0.7500    0.7500    0.7500         4\n",
       " samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 01:24:44,111 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:51,118 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:06:00,514 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:09,338 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:24:55,162 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:06:21,205 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 41861.64it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 32256.98it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:55,165 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 01:24:55,176 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:55,178 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:06:21,208 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 21:06:21,362 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:21,364 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2045,26 +2048,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:55,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:55,179 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:24:55,179 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:55,179 Parameters:\n",
-      "2021-09-08 01:24:55,180  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:24:55,180  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:24:55,180  - patience: \"3\"\n",
-      "2021-09-08 01:24:55,180  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:24:55,181  - max_epochs: \"10\"\n",
-      "2021-09-08 01:24:55,181  - shuffle: \"True\"\n",
-      "2021-09-08 01:24:55,181  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:24:55,182  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:24:55,182 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:55,183 Model training base path: \"temp\"\n",
-      "2021-09-08 01:24:55,184 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:55,184 Device: cuda:0\n",
-      "2021-09-08 01:24:55,184 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:55,185 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:24:55,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:55,357 epoch 1 - iter 3/32 - loss 0.59879914 - samples/sec: 20.25 - lr: 0.020000\n"
+      "2021-09-21 21:06:21,365 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:21,365 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:06:21,365 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:21,366 Parameters:\n",
+      "2021-09-21 21:06:21,366  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:06:21,366  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:06:21,366  - patience: \"3\"\n",
+      "2021-09-21 21:06:21,367  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:06:21,367  - max_epochs: \"10\"\n",
+      "2021-09-21 21:06:21,367  - shuffle: \"True\"\n",
+      "2021-09-21 21:06:21,367  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:06:21,368  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:06:21,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:21,368 Model training base path: \"temp\"\n",
+      "2021-09-21 21:06:21,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:21,369 Device: cuda:0\n",
+      "2021-09-21 21:06:21,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:21,369 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -2078,208 +2079,210 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:24:55,513 epoch 1 - iter 6/32 - loss 0.49308185 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 01:24:55,698 epoch 1 - iter 9/32 - loss 0.53836426 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 01:24:55,845 epoch 1 - iter 12/32 - loss 0.74286436 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 01:24:56,045 epoch 1 - iter 15/32 - loss 0.66976653 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:24:56,195 epoch 1 - iter 18/32 - loss 0.69042751 - samples/sec: 20.13 - lr: 0.020000\n",
-      "2021-09-08 01:24:56,378 epoch 1 - iter 21/32 - loss 0.64576201 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:24:56,530 epoch 1 - iter 24/32 - loss 0.58760140 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 01:24:56,707 epoch 1 - iter 27/32 - loss 0.56332141 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:24:56,872 epoch 1 - iter 30/32 - loss 0.57995175 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 01:24:56,970 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:24:56,970 EPOCH 1 done: loss 0.5639 - lr 0.0200000\n",
-      "2021-09-08 01:24:57,070 DEV : loss 1.325430989265442 - score 0.0\n",
-      "2021-09-08 01:24:57,071 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:06:21,596 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:21,764 epoch 1 - iter 3/32 - loss 0.46104585 - samples/sec: 19.61 - lr: 0.020000\n",
+      "2021-09-21 21:06:21,933 epoch 1 - iter 6/32 - loss 0.31555912 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:06:22,083 epoch 1 - iter 9/32 - loss 0.23007436 - samples/sec: 20.05 - lr: 0.020000\n",
+      "2021-09-21 21:06:22,242 epoch 1 - iter 12/32 - loss 0.42166785 - samples/sec: 18.89 - lr: 0.020000\n",
+      "2021-09-21 21:06:22,394 epoch 1 - iter 15/32 - loss 0.42638269 - samples/sec: 19.80 - lr: 0.020000\n",
+      "2021-09-21 21:06:22,549 epoch 1 - iter 18/32 - loss 0.50146800 - samples/sec: 19.50 - lr: 0.020000\n",
+      "2021-09-21 21:06:22,715 epoch 1 - iter 21/32 - loss 0.46008902 - samples/sec: 18.17 - lr: 0.020000\n",
+      "2021-09-21 21:06:22,873 epoch 1 - iter 24/32 - loss 0.46149407 - samples/sec: 19.06 - lr: 0.020000\n",
+      "2021-09-21 21:06:23,030 epoch 1 - iter 27/32 - loss 0.61562901 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 21:06:23,200 epoch 1 - iter 30/32 - loss 0.65073377 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 21:06:23,312 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:23,312 EPOCH 1 done: loss 0.6636 - lr 0.0200000\n",
+      "2021-09-21 21:06:27,393 DEV : loss 1.2977452278137207 - score 0.5\n",
+      "2021-09-21 21:06:27,394 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:06:43,538 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:43,765 epoch 2 - iter 3/32 - loss 0.93373268 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,025 epoch 2 - iter 6/32 - loss 0.57137944 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,268 epoch 2 - iter 9/32 - loss 0.48313582 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,490 epoch 2 - iter 12/32 - loss 0.43665237 - samples/sec: 13.55 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,719 epoch 2 - iter 15/32 - loss 0.36120649 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,948 epoch 2 - iter 18/32 - loss 0.33483602 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,167 epoch 2 - iter 21/32 - loss 0.32076236 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,420 epoch 2 - iter 24/32 - loss 0.33107466 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,624 epoch 2 - iter 27/32 - loss 0.29956287 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,868 epoch 2 - iter 30/32 - loss 0.36475818 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 21:06:46,033 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:46,034 EPOCH 2 done: loss 0.3887 - lr 0.0200000\n",
+      "2021-09-21 21:06:46,199 DEV : loss 0.955615758895874 - score 0.25\n",
+      "2021-09-21 21:06:46,200 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:06:46,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:46,459 epoch 3 - iter 3/32 - loss 0.26450687 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:06:46,698 epoch 3 - iter 6/32 - loss 0.22182501 - samples/sec: 12.57 - lr: 0.020000\n",
+      "2021-09-21 21:06:46,956 epoch 3 - iter 9/32 - loss 0.23450100 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 21:06:47,166 epoch 3 - iter 12/32 - loss 0.28782270 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 21:06:47,391 epoch 3 - iter 15/32 - loss 0.28440326 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 21:06:47,597 epoch 3 - iter 18/32 - loss 0.23825743 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 21:06:47,801 epoch 3 - iter 21/32 - loss 0.20516876 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 21:06:48,013 epoch 3 - iter 24/32 - loss 0.22416387 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 21:06:48,251 epoch 3 - iter 27/32 - loss 0.27893542 - samples/sec: 12.66 - lr: 0.020000\n",
+      "2021-09-21 21:06:48,484 epoch 3 - iter 30/32 - loss 0.29674185 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 21:06:48,638 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,639 EPOCH 3 done: loss 0.2811 - lr 0.0200000\n",
+      "2021-09-21 21:06:48,835 DEV : loss 0.6379813551902771 - score 0.25\n",
+      "2021-09-21 21:06:48,835 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:06:48,838 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:49,214 epoch 4 - iter 3/32 - loss 0.08089394 - samples/sec: 9.38 - lr: 0.020000\n",
+      "2021-09-21 21:06:49,520 epoch 4 - iter 6/32 - loss 0.04163733 - samples/sec: 9.82 - lr: 0.020000\n",
+      "2021-09-21 21:06:49,785 epoch 4 - iter 9/32 - loss 0.10022852 - samples/sec: 11.36 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,022 epoch 4 - iter 12/32 - loss 0.09607406 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,248 epoch 4 - iter 15/32 - loss 0.10142169 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,480 epoch 4 - iter 18/32 - loss 0.09249022 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,700 epoch 4 - iter 21/32 - loss 0.11639922 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,910 epoch 4 - iter 24/32 - loss 0.10278345 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:06:51,117 epoch 4 - iter 27/32 - loss 0.09197470 - samples/sec: 14.54 - lr: 0.020000\n",
+      "2021-09-21 21:06:51,354 epoch 4 - iter 30/32 - loss 0.08661702 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 21:06:51,489 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:51,489 EPOCH 4 done: loss 0.0816 - lr 0.0200000\n",
+      "2021-09-21 21:06:51,746 DEV : loss 0.7525721788406372 - score 0.75\n",
+      "2021-09-21 21:06:51,747 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:25:01,324 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:01,493 epoch 2 - iter 3/32 - loss 0.55777244 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 01:25:01,657 epoch 2 - iter 6/32 - loss 0.49627303 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 01:25:01,855 epoch 2 - iter 9/32 - loss 0.44810672 - samples/sec: 15.19 - lr: 0.020000\n",
-      "2021-09-08 01:25:02,000 epoch 2 - iter 12/32 - loss 0.43500227 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:25:02,155 epoch 2 - iter 15/32 - loss 0.51584049 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 01:25:02,339 epoch 2 - iter 18/32 - loss 0.48967477 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:25:02,494 epoch 2 - iter 21/32 - loss 0.44701313 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 01:25:02,672 epoch 2 - iter 24/32 - loss 0.55100225 - samples/sec: 16.90 - lr: 0.020000\n",
-      "2021-09-08 01:25:02,816 epoch 2 - iter 27/32 - loss 0.55096689 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 01:25:03,015 epoch 2 - iter 30/32 - loss 0.58612518 - samples/sec: 15.17 - lr: 0.020000\n",
-      "2021-09-08 01:25:03,123 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:03,123 EPOCH 2 done: loss 0.5640 - lr 0.0200000\n",
-      "2021-09-08 01:25:03,191 DEV : loss 0.40378642082214355 - score 0.75\n",
-      "2021-09-08 01:25:03,192 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:07:09,119 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:09,294 epoch 5 - iter 3/32 - loss 0.11197447 - samples/sec: 18.56 - lr: 0.020000\n",
+      "2021-09-21 21:07:09,471 epoch 5 - iter 6/32 - loss 0.05954806 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 21:07:09,645 epoch 5 - iter 9/32 - loss 0.04383841 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 21:07:09,828 epoch 5 - iter 12/32 - loss 0.06159198 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 21:07:10,006 epoch 5 - iter 15/32 - loss 0.04951125 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 21:07:10,185 epoch 5 - iter 18/32 - loss 0.05031858 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 21:07:10,368 epoch 5 - iter 21/32 - loss 0.04323301 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 21:07:10,552 epoch 5 - iter 24/32 - loss 0.04094254 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 21:07:10,724 epoch 5 - iter 27/32 - loss 0.03777009 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 21:07:10,917 epoch 5 - iter 30/32 - loss 0.08097326 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 21:07:11,038 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:11,038 EPOCH 5 done: loss 0.0767 - lr 0.0200000\n",
+      "2021-09-21 21:07:11,291 DEV : loss 0.6524877548217773 - score 0.75\n",
+      "2021-09-21 21:07:11,293 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:25:07,200 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:07,404 epoch 3 - iter 3/32 - loss 0.31938088 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 01:25:07,548 epoch 3 - iter 6/32 - loss 0.25096836 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 01:25:07,737 epoch 3 - iter 9/32 - loss 0.29505950 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 01:25:07,893 epoch 3 - iter 12/32 - loss 0.33652296 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:25:08,046 epoch 3 - iter 15/32 - loss 0.37739316 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 01:25:08,217 epoch 3 - iter 18/32 - loss 0.34651441 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:25:08,367 epoch 3 - iter 21/32 - loss 0.33894814 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 01:25:08,558 epoch 3 - iter 24/32 - loss 0.38229876 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 01:25:08,705 epoch 3 - iter 27/32 - loss 0.37267682 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 01:25:08,888 epoch 3 - iter 30/32 - loss 0.36361732 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:25:08,985 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:08,986 EPOCH 3 done: loss 0.3973 - lr 0.0200000\n",
-      "2021-09-08 01:25:09,067 DEV : loss 0.477238267660141 - score 0.25\n",
-      "2021-09-08 01:25:09,068 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:25:09,070 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:09,269 epoch 4 - iter 3/32 - loss 0.23727166 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 01:25:09,411 epoch 4 - iter 6/32 - loss 0.17565876 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 01:25:09,584 epoch 4 - iter 9/32 - loss 0.28965679 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 01:25:09,750 epoch 4 - iter 12/32 - loss 0.28845651 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 01:25:09,910 epoch 4 - iter 15/32 - loss 0.23208577 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 01:25:10,063 epoch 4 - iter 18/32 - loss 0.21785499 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:25:10,240 epoch 4 - iter 21/32 - loss 0.19903838 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 01:25:10,381 epoch 4 - iter 24/32 - loss 0.18518899 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:25:10,574 epoch 4 - iter 27/32 - loss 0.17078322 - samples/sec: 15.64 - lr: 0.020000\n",
-      "2021-09-08 01:25:10,742 epoch 4 - iter 30/32 - loss 0.19054976 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 01:25:10,836 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:10,836 EPOCH 4 done: loss 0.2012 - lr 0.0200000\n",
-      "2021-09-08 01:25:10,927 DEV : loss 0.5395405292510986 - score 0.5\n",
-      "2021-09-08 01:25:10,928 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:25:10,932 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:11,166 epoch 5 - iter 3/32 - loss 0.22510376 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 01:25:11,310 epoch 5 - iter 6/32 - loss 0.20992342 - samples/sec: 21.00 - lr: 0.020000\n",
-      "2021-09-08 01:25:11,468 epoch 5 - iter 9/32 - loss 0.19041110 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 01:25:11,635 epoch 5 - iter 12/32 - loss 0.14763632 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 01:25:11,780 epoch 5 - iter 15/32 - loss 0.12805313 - samples/sec: 20.71 - lr: 0.020000\n",
-      "2021-09-08 01:25:11,975 epoch 5 - iter 18/32 - loss 0.11340274 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 01:25:12,116 epoch 5 - iter 21/32 - loss 0.10358930 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:25:12,319 epoch 5 - iter 24/32 - loss 0.13403350 - samples/sec: 14.81 - lr: 0.020000\n",
-      "2021-09-08 01:25:12,457 epoch 5 - iter 27/32 - loss 0.11975021 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 01:25:12,621 epoch 5 - iter 30/32 - loss 0.15236577 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 01:25:12,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:12,738 EPOCH 5 done: loss 0.1576 - lr 0.0200000\n",
-      "2021-09-08 01:25:12,803 DEV : loss 1.323738932609558 - score 0.5\n",
-      "2021-09-08 01:25:12,804 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:25:12,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:12,992 epoch 6 - iter 3/32 - loss 0.08611588 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:25:13,138 epoch 6 - iter 6/32 - loss 0.20310615 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 01:25:13,312 epoch 6 - iter 9/32 - loss 0.16396115 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 01:25:13,478 epoch 6 - iter 12/32 - loss 0.15700181 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 01:25:13,629 epoch 6 - iter 15/32 - loss 0.18672854 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 01:25:13,805 epoch 6 - iter 18/32 - loss 0.15682625 - samples/sec: 17.07 - lr: 0.020000\n",
-      "2021-09-08 01:25:13,940 epoch 6 - iter 21/32 - loss 0.13530773 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 01:25:14,105 epoch 6 - iter 24/32 - loss 0.11882421 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 01:25:14,259 epoch 6 - iter 27/32 - loss 0.13041860 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 01:25:14,407 epoch 6 - iter 30/32 - loss 0.15041513 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 01:25:14,543 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:14,544 EPOCH 6 done: loss 0.1411 - lr 0.0200000\n"
+      "2021-09-21 21:07:21,406 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:21,551 epoch 6 - iter 3/32 - loss 0.00251468 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 21:07:21,684 epoch 6 - iter 6/32 - loss 0.00891651 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 21:07:21,816 epoch 6 - iter 9/32 - loss 0.07452097 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 21:07:21,947 epoch 6 - iter 12/32 - loss 0.05827426 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 21:07:22,079 epoch 6 - iter 15/32 - loss 0.05572400 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 21:07:22,208 epoch 6 - iter 18/32 - loss 0.04652624 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 21:07:22,338 epoch 6 - iter 21/32 - loss 0.04037562 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 21:07:22,469 epoch 6 - iter 24/32 - loss 0.03755188 - samples/sec: 22.95 - lr: 0.020000\n",
+      "2021-09-21 21:07:22,600 epoch 6 - iter 27/32 - loss 0.09414142 - samples/sec: 22.95 - lr: 0.020000\n",
+      "2021-09-21 21:07:22,728 epoch 6 - iter 30/32 - loss 0.08510973 - samples/sec: 23.48 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:14,610 DEV : loss 0.9146150946617126 - score 0.25\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:25:14,611 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:25:14,613 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:14,818 epoch 7 - iter 3/32 - loss 0.15999090 - samples/sec: 15.79 - lr: 0.010000\n",
-      "2021-09-08 01:25:14,952 epoch 7 - iter 6/32 - loss 0.08676788 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 01:25:15,121 epoch 7 - iter 9/32 - loss 0.12015262 - samples/sec: 17.82 - lr: 0.010000\n",
-      "2021-09-08 01:25:15,268 epoch 7 - iter 12/32 - loss 0.09308694 - samples/sec: 20.51 - lr: 0.010000\n",
-      "2021-09-08 01:25:15,435 epoch 7 - iter 15/32 - loss 0.07620374 - samples/sec: 18.04 - lr: 0.010000\n",
-      "2021-09-08 01:25:15,585 epoch 7 - iter 18/32 - loss 0.07080594 - samples/sec: 20.09 - lr: 0.010000\n",
-      "2021-09-08 01:25:15,749 epoch 7 - iter 21/32 - loss 0.06123278 - samples/sec: 18.33 - lr: 0.010000\n",
-      "2021-09-08 01:25:15,920 epoch 7 - iter 24/32 - loss 0.05421237 - samples/sec: 17.59 - lr: 0.010000\n",
-      "2021-09-08 01:25:16,063 epoch 7 - iter 27/32 - loss 0.05077834 - samples/sec: 21.04 - lr: 0.010000\n",
-      "2021-09-08 01:25:16,248 epoch 7 - iter 30/32 - loss 0.04594761 - samples/sec: 16.29 - lr: 0.010000\n",
-      "2021-09-08 01:25:16,365 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,366 EPOCH 7 done: loss 0.0436 - lr 0.0100000\n",
-      "2021-09-08 01:25:16,434 DEV : loss 1.2424883842468262 - score 0.5\n",
-      "2021-09-08 01:25:16,434 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:25:16,437 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:16,631 epoch 8 - iter 3/32 - loss 0.04057781 - samples/sec: 16.65 - lr: 0.010000\n",
-      "2021-09-08 01:25:16,794 epoch 8 - iter 6/32 - loss 0.09287652 - samples/sec: 18.49 - lr: 0.010000\n",
-      "2021-09-08 01:25:16,946 epoch 8 - iter 9/32 - loss 0.07686170 - samples/sec: 19.79 - lr: 0.010000\n",
-      "2021-09-08 01:25:17,120 epoch 8 - iter 12/32 - loss 0.06258579 - samples/sec: 17.31 - lr: 0.010000\n",
-      "2021-09-08 01:25:17,262 epoch 8 - iter 15/32 - loss 0.05698823 - samples/sec: 21.21 - lr: 0.010000\n",
-      "2021-09-08 01:25:17,433 epoch 8 - iter 18/32 - loss 0.04846384 - samples/sec: 17.59 - lr: 0.010000\n",
-      "2021-09-08 01:25:17,593 epoch 8 - iter 21/32 - loss 0.04169245 - samples/sec: 18.81 - lr: 0.010000\n",
-      "2021-09-08 01:25:17,740 epoch 8 - iter 24/32 - loss 0.07825272 - samples/sec: 20.50 - lr: 0.010000\n",
-      "2021-09-08 01:25:17,912 epoch 8 - iter 27/32 - loss 0.07260939 - samples/sec: 17.48 - lr: 0.010000\n",
-      "2021-09-08 01:25:18,049 epoch 8 - iter 30/32 - loss 0.06619202 - samples/sec: 22.09 - lr: 0.010000\n",
-      "2021-09-08 01:25:18,179 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:18,179 EPOCH 8 done: loss 0.0621 - lr 0.0100000\n",
-      "2021-09-08 01:25:18,245 DEV : loss 1.0585857629776 - score 0.25\n",
-      "2021-09-08 01:25:18,246 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:25:18,248 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:18,442 epoch 9 - iter 3/32 - loss 0.00270315 - samples/sec: 16.72 - lr: 0.010000\n",
-      "2021-09-08 01:25:18,607 epoch 9 - iter 6/32 - loss 0.05793445 - samples/sec: 18.19 - lr: 0.010000\n",
-      "2021-09-08 01:25:18,751 epoch 9 - iter 9/32 - loss 0.04637118 - samples/sec: 20.91 - lr: 0.010000\n",
-      "2021-09-08 01:25:18,948 epoch 9 - iter 12/32 - loss 0.04256559 - samples/sec: 15.24 - lr: 0.010000\n",
-      "2021-09-08 01:25:19,083 epoch 9 - iter 15/32 - loss 0.03445614 - samples/sec: 22.44 - lr: 0.010000\n",
-      "2021-09-08 01:25:19,258 epoch 9 - iter 18/32 - loss 0.02883215 - samples/sec: 17.17 - lr: 0.010000\n",
-      "2021-09-08 01:25:19,406 epoch 9 - iter 21/32 - loss 0.02502831 - samples/sec: 20.45 - lr: 0.010000\n",
-      "2021-09-08 01:25:19,552 epoch 9 - iter 24/32 - loss 0.02228355 - samples/sec: 20.56 - lr: 0.010000\n",
-      "2021-09-08 01:25:19,735 epoch 9 - iter 27/32 - loss 0.01997299 - samples/sec: 16.46 - lr: 0.010000\n",
-      "2021-09-08 01:25:19,873 epoch 9 - iter 30/32 - loss 0.01835286 - samples/sec: 21.84 - lr: 0.010000\n",
-      "2021-09-08 01:25:19,997 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:19,998 EPOCH 9 done: loss 0.0172 - lr 0.0100000\n",
-      "2021-09-08 01:25:20,089 DEV : loss 1.1793416738510132 - score 0.25\n",
-      "2021-09-08 01:25:20,090 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:25:20,092 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:20,248 epoch 10 - iter 3/32 - loss 0.15241284 - samples/sec: 21.10 - lr: 0.010000\n",
-      "2021-09-08 01:25:20,428 epoch 10 - iter 6/32 - loss 0.08577709 - samples/sec: 16.78 - lr: 0.010000\n",
-      "2021-09-08 01:25:20,565 epoch 10 - iter 9/32 - loss 0.05795825 - samples/sec: 21.94 - lr: 0.010000\n",
-      "2021-09-08 01:25:20,758 epoch 10 - iter 12/32 - loss 0.04449205 - samples/sec: 15.55 - lr: 0.010000\n",
-      "2021-09-08 01:25:20,893 epoch 10 - iter 15/32 - loss 0.03608031 - samples/sec: 22.38 - lr: 0.010000\n",
-      "2021-09-08 01:25:21,035 epoch 10 - iter 18/32 - loss 0.03078122 - samples/sec: 21.25 - lr: 0.010000\n",
-      "2021-09-08 01:25:21,229 epoch 10 - iter 21/32 - loss 0.02735957 - samples/sec: 15.47 - lr: 0.010000\n",
-      "2021-09-08 01:25:21,364 epoch 10 - iter 24/32 - loss 0.02418093 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 01:25:21,520 epoch 10 - iter 27/32 - loss 0.02171400 - samples/sec: 19.31 - lr: 0.010000\n",
-      "2021-09-08 01:25:21,710 epoch 10 - iter 30/32 - loss 0.01960396 - samples/sec: 15.81 - lr: 0.010000\n",
-      "2021-09-08 01:25:21,805 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:21,806 EPOCH 10 done: loss 0.0184 - lr 0.0100000\n",
-      "2021-09-08 01:25:21,975 DEV : loss 1.3369457721710205 - score 0.25\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:25:21,976 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:25:26,425 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:26,425 Testing using best model ...\n",
-      "2021-09-08 01:25:26,427 loading file temp/best-model.pt\n",
+      "2021-09-21 21:07:22,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:22,825 EPOCH 6 done: loss 0.0816 - lr 0.0200000\n",
+      "2021-09-21 21:07:22,964 DEV : loss 1.128293514251709 - score 0.5\n",
+      "2021-09-21 21:07:22,965 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:07:23,062 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:23,301 epoch 7 - iter 3/32 - loss 0.01150059 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 21:07:23,520 epoch 7 - iter 6/32 - loss 0.19406647 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 21:07:23,726 epoch 7 - iter 9/32 - loss 0.13024301 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 21:07:23,925 epoch 7 - iter 12/32 - loss 0.12206750 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 21:07:24,111 epoch 7 - iter 15/32 - loss 0.09769360 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 21:07:24,305 epoch 7 - iter 18/32 - loss 0.08144305 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 21:07:24,484 epoch 7 - iter 21/32 - loss 0.06985399 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 21:07:24,657 epoch 7 - iter 24/32 - loss 0.06118303 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 21:07:24,875 epoch 7 - iter 27/32 - loss 0.05565609 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:07:25,073 epoch 7 - iter 30/32 - loss 0.05376972 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:07:25,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:25,207 EPOCH 7 done: loss 0.0505 - lr 0.0200000\n",
+      "2021-09-21 21:07:25,431 DEV : loss 1.474938154220581 - score 0.25\n",
+      "2021-09-21 21:07:25,432 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:07:25,508 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:25,725 epoch 8 - iter 3/32 - loss 0.01287328 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 21:07:25,913 epoch 8 - iter 6/32 - loss 0.01153342 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:07:26,135 epoch 8 - iter 9/32 - loss 0.02423228 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:07:26,337 epoch 8 - iter 12/32 - loss 0.01841022 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 21:07:26,533 epoch 8 - iter 15/32 - loss 0.01476593 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 21:07:26,670 epoch 8 - iter 18/32 - loss 0.01340213 - samples/sec: 21.95 - lr: 0.020000\n",
+      "2021-09-21 21:07:26,806 epoch 8 - iter 21/32 - loss 0.01151121 - samples/sec: 22.21 - lr: 0.020000\n",
+      "2021-09-21 21:07:26,941 epoch 8 - iter 24/32 - loss 0.01018220 - samples/sec: 22.26 - lr: 0.020000\n",
+      "2021-09-21 21:07:27,077 epoch 8 - iter 27/32 - loss 0.00907677 - samples/sec: 22.25 - lr: 0.020000\n",
+      "2021-09-21 21:07:27,217 epoch 8 - iter 30/32 - loss 0.03324114 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 21:07:27,313 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:27,314 EPOCH 8 done: loss 0.0313 - lr 0.0200000\n",
+      "2021-09-21 21:07:27,472 DEV : loss 1.0545098781585693 - score 0.75\n",
+      "2021-09-21 21:07:27,478 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:07:27,480 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:27,761 epoch 9 - iter 3/32 - loss 0.00025140 - samples/sec: 11.87 - lr: 0.020000\n",
+      "2021-09-21 21:07:28,010 epoch 9 - iter 6/32 - loss 0.00548272 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 21:07:28,250 epoch 9 - iter 9/32 - loss 0.00392534 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 21:07:28,477 epoch 9 - iter 12/32 - loss 0.00302361 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 21:07:28,728 epoch 9 - iter 15/32 - loss 0.00389103 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 21:07:28,998 epoch 9 - iter 18/32 - loss 0.00382179 - samples/sec: 11.15 - lr: 0.020000\n",
+      "2021-09-21 21:07:29,252 epoch 9 - iter 21/32 - loss 0.00402732 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 21:07:29,508 epoch 9 - iter 24/32 - loss 0.00355070 - samples/sec: 11.77 - lr: 0.020000\n",
+      "2021-09-21 21:07:29,775 epoch 9 - iter 27/32 - loss 0.00372860 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 21:07:30,016 epoch 9 - iter 30/32 - loss 0.05519327 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 21:07:30,191 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:30,192 EPOCH 9 done: loss 0.0518 - lr 0.0200000\n",
+      "2021-09-21 21:07:30,366 DEV : loss 1.5982365608215332 - score 0.5\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:07:30,367 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:07:30,379 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:30,671 epoch 10 - iter 3/32 - loss 0.00072118 - samples/sec: 11.77 - lr: 0.010000\n",
+      "2021-09-21 21:07:30,922 epoch 10 - iter 6/32 - loss 0.00076921 - samples/sec: 11.96 - lr: 0.010000\n",
+      "2021-09-21 21:07:31,167 epoch 10 - iter 9/32 - loss 0.00058673 - samples/sec: 12.27 - lr: 0.010000\n",
+      "2021-09-21 21:07:31,419 epoch 10 - iter 12/32 - loss 0.00081791 - samples/sec: 11.94 - lr: 0.010000\n",
+      "2021-09-21 21:07:31,653 epoch 10 - iter 15/32 - loss 0.00082718 - samples/sec: 12.88 - lr: 0.010000\n",
+      "2021-09-21 21:07:31,901 epoch 10 - iter 18/32 - loss 0.00074119 - samples/sec: 12.13 - lr: 0.010000\n",
+      "2021-09-21 21:07:32,126 epoch 10 - iter 21/32 - loss 0.00073460 - samples/sec: 13.37 - lr: 0.010000\n",
+      "2021-09-21 21:07:32,337 epoch 10 - iter 24/32 - loss 0.00073952 - samples/sec: 14.22 - lr: 0.010000\n",
+      "2021-09-21 21:07:32,528 epoch 10 - iter 27/32 - loss 0.00082900 - samples/sec: 15.79 - lr: 0.010000\n",
+      "2021-09-21 21:07:32,704 epoch 10 - iter 30/32 - loss 0.00077548 - samples/sec: 17.12 - lr: 0.010000\n",
+      "2021-09-21 21:07:32,829 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:32,829 EPOCH 10 done: loss 0.0009 - lr 0.0100000\n",
+      "2021-09-21 21:07:32,920 DEV : loss 1.5822722911834717 - score 0.5\n",
+      "2021-09-21 21:07:32,921 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:07:39,798 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:39,798 Testing using best model ...\n",
+      "2021-09-21 21:07:39,873 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:25:31,595 \t0.75\n",
-      "2021-09-08 01:25:31,597 \n",
+      "2021-09-21 21:07:48,200 \t0.25\n",
+      "2021-09-21 21:07:48,200 \n",
       "Results:\n",
-      "- F-score (micro) 0.75\n",
-      "- F-score (macro) 0.45\n",
-      "- Accuracy 0.75\n",
+      "- F-score (micro) 0.25\n",
+      "- F-score (macro) 0.25\n",
+      "- Accuracy 0.25\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
       "     sadness     0.0000    0.0000    0.0000         0\n",
-      "    optimism     0.0000    0.0000    0.0000         0\n",
-      "       anger     1.0000    0.6667    0.8000         3\n",
-      "         joy     1.0000    1.0000    1.0000         1\n",
+      "    optimism     0.0000    0.0000    0.0000         1\n",
+      "       anger     1.0000    1.0000    1.0000         1\n",
+      "         joy     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "   micro avg     0.7500    0.7500    0.7500         4\n",
-      "   macro avg     0.5000    0.4167    0.4500         4\n",
-      "weighted avg     1.0000    0.7500    0.8500         4\n",
-      " samples avg     0.7500    0.7500    0.7500         4\n",
+      "   micro avg     0.2500    0.2500    0.2500         4\n",
+      "   macro avg     0.2500    0.2500    0.2500         4\n",
+      "weighted avg     0.2500    0.2500    0.2500         4\n",
+      " samples avg     0.2500    0.2500    0.2500         4\n",
       "\n",
-      "2021-09-08 01:25:31,598 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:38,906 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:07:48,201 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:00,347 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:25:42,868 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:08:04,911 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 38946.34it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 46161.71it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:42,871 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 01:25:43,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:43,009 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:08:04,914 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 21:08:04,923 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,925 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2592,24 +2595,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:43,010 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:43,011 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:25:43,011 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:43,011 Parameters:\n",
-      "2021-09-08 01:25:43,012  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:25:43,012  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:25:43,013  - patience: \"3\"\n",
-      "2021-09-08 01:25:43,013  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:25:43,014  - max_epochs: \"10\"\n",
-      "2021-09-08 01:25:43,014  - shuffle: \"True\"\n",
-      "2021-09-08 01:25:43,015  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:25:43,016  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:25:43,016 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:43,017 Model training base path: \"temp\"\n",
-      "2021-09-08 01:25:43,017 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:43,018 Device: cuda:0\n",
-      "2021-09-08 01:25:43,018 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:43,019 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:08:04,925 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,925 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:08:04,926 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,926 Parameters:\n",
+      "2021-09-21 21:08:04,926  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:08:04,927  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:08:04,927  - patience: \"3\"\n",
+      "2021-09-21 21:08:04,927  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:08:04,927  - max_epochs: \"10\"\n",
+      "2021-09-21 21:08:04,928  - shuffle: \"True\"\n",
+      "2021-09-21 21:08:04,928  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:08:04,928  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:08:04,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,929 Model training base path: \"temp\"\n",
+      "2021-09-21 21:08:04,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,929 Device: cuda:0\n",
+      "2021-09-21 21:08:04,930 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,930 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:08:04,936 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2623,192 +2627,191 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:25:43,205 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:43,394 epoch 1 - iter 3/32 - loss 0.76715110 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 01:25:43,578 epoch 1 - iter 6/32 - loss 0.56301487 - samples/sec: 16.44 - lr: 0.020000\n",
-      "2021-09-08 01:25:43,759 epoch 1 - iter 9/32 - loss 0.58988268 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 01:25:43,912 epoch 1 - iter 12/32 - loss 0.77737564 - samples/sec: 19.75 - lr: 0.020000\n",
-      "2021-09-08 01:25:44,056 epoch 1 - iter 15/32 - loss 0.90260753 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 01:25:44,197 epoch 1 - iter 18/32 - loss 0.87926556 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 01:25:44,339 epoch 1 - iter 21/32 - loss 0.80666746 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:25:44,480 epoch 1 - iter 24/32 - loss 0.77145234 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:25:44,621 epoch 1 - iter 27/32 - loss 0.74500984 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:25:44,763 epoch 1 - iter 30/32 - loss 0.73571341 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 01:25:44,856 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:44,856 EPOCH 1 done: loss 0.6979 - lr 0.0200000\n",
-      "2021-09-08 01:25:45,016 DEV : loss 1.3457818031311035 - score 0.25\n",
-      "2021-09-08 01:25:45,017 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:08:05,238 epoch 1 - iter 3/32 - loss 0.77752727 - samples/sec: 12.31 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,486 epoch 1 - iter 6/32 - loss 0.48543104 - samples/sec: 12.13 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,748 epoch 1 - iter 9/32 - loss 0.53412665 - samples/sec: 11.50 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,039 epoch 1 - iter 12/32 - loss 0.67637629 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,299 epoch 1 - iter 15/32 - loss 0.63181099 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,585 epoch 1 - iter 18/32 - loss 0.67363764 - samples/sec: 10.50 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,867 epoch 1 - iter 21/32 - loss 0.58216206 - samples/sec: 10.66 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,164 epoch 1 - iter 24/32 - loss 0.53093890 - samples/sec: 10.12 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,476 epoch 1 - iter 27/32 - loss 0.67879535 - samples/sec: 9.66 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,741 epoch 1 - iter 30/32 - loss 0.66635434 - samples/sec: 11.36 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,942 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:07,942 EPOCH 1 done: loss 0.6556 - lr 0.0200000\n",
+      "2021-09-21 21:08:08,136 DEV : loss 0.684792697429657 - score 0.25\n",
+      "2021-09-21 21:08:08,137 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:25:52,328 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:52,515 epoch 2 - iter 3/32 - loss 0.44328671 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 01:25:52,673 epoch 2 - iter 6/32 - loss 0.63618314 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:25:52,866 epoch 2 - iter 9/32 - loss 0.61383823 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,010 epoch 2 - iter 12/32 - loss 0.52245552 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,202 epoch 2 - iter 15/32 - loss 0.60493502 - samples/sec: 15.67 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,348 epoch 2 - iter 18/32 - loss 0.61219421 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,541 epoch 2 - iter 21/32 - loss 0.57091271 - samples/sec: 15.60 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,684 epoch 2 - iter 24/32 - loss 0.52175019 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:25:53,854 epoch 2 - iter 27/32 - loss 0.48451818 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 01:25:54,036 epoch 2 - iter 30/32 - loss 0.56485154 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 01:25:54,132 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:54,132 EPOCH 2 done: loss 0.5358 - lr 0.0200000\n",
-      "2021-09-08 01:25:54,227 DEV : loss 0.9685122966766357 - score 0.25\n",
-      "2021-09-08 01:25:54,228 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:08:15,430 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:15,605 epoch 2 - iter 3/32 - loss 0.07584773 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 21:08:15,771 epoch 2 - iter 6/32 - loss 0.69428561 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 21:08:15,943 epoch 2 - iter 9/32 - loss 0.56877654 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 21:08:16,137 epoch 2 - iter 12/32 - loss 0.53187588 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:08:16,304 epoch 2 - iter 15/32 - loss 0.46970950 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:08:16,485 epoch 2 - iter 18/32 - loss 0.47759449 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 21:08:16,659 epoch 2 - iter 21/32 - loss 0.51656593 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 21:08:16,828 epoch 2 - iter 24/32 - loss 0.52363336 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 21:08:17,032 epoch 2 - iter 27/32 - loss 0.52106807 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 21:08:17,295 epoch 2 - iter 30/32 - loss 0.51085696 - samples/sec: 11.44 - lr: 0.020000\n",
+      "2021-09-21 21:08:17,477 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:17,478 EPOCH 2 done: loss 0.5095 - lr 0.0200000\n",
+      "2021-09-21 21:08:17,660 DEV : loss 0.7958274483680725 - score 0.75\n",
+      "2021-09-21 21:08:17,661 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:25:58,264 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:25:58,424 epoch 3 - iter 3/32 - loss 0.27546908 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 01:25:58,582 epoch 3 - iter 6/32 - loss 0.61795250 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 01:25:58,775 epoch 3 - iter 9/32 - loss 0.50673457 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 01:25:58,919 epoch 3 - iter 12/32 - loss 0.40462591 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,105 epoch 3 - iter 15/32 - loss 0.35568509 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,252 epoch 3 - iter 18/32 - loss 0.39436972 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,431 epoch 3 - iter 21/32 - loss 0.33973908 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,572 epoch 3 - iter 24/32 - loss 0.35775353 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,740 epoch 3 - iter 27/32 - loss 0.32775745 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 01:25:59,910 epoch 3 - iter 30/32 - loss 0.30590931 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 01:26:00,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:00,008 EPOCH 3 done: loss 0.2899 - lr 0.0200000\n",
-      "2021-09-08 01:26:00,091 DEV : loss 1.3458237648010254 - score 0.25\n",
-      "2021-09-08 01:26:00,091 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:26:00,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:00,327 epoch 4 - iter 3/32 - loss 0.53153925 - samples/sec: 15.31 - lr: 0.020000\n",
-      "2021-09-08 01:26:00,472 epoch 4 - iter 6/32 - loss 0.46401686 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:26:00,653 epoch 4 - iter 9/32 - loss 0.34194107 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 01:26:00,829 epoch 4 - iter 12/32 - loss 0.26098974 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:26:00,977 epoch 4 - iter 15/32 - loss 0.39914505 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 01:26:01,181 epoch 4 - iter 18/32 - loss 0.48258759 - samples/sec: 14.76 - lr: 0.020000\n",
-      "2021-09-08 01:26:01,322 epoch 4 - iter 21/32 - loss 0.52313683 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:26:01,499 epoch 4 - iter 24/32 - loss 0.46822439 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:26:01,670 epoch 4 - iter 27/32 - loss 0.42232153 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 01:26:01,816 epoch 4 - iter 30/32 - loss 0.38663457 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 01:26:01,939 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:01,940 EPOCH 4 done: loss 0.3629 - lr 0.0200000\n",
-      "2021-09-08 01:26:02,125 DEV : loss 1.1504902839660645 - score 0.5\n",
-      "2021-09-08 01:26:02,126 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:26:07,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:07,538 epoch 5 - iter 3/32 - loss 0.02599002 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 01:26:07,711 epoch 5 - iter 6/32 - loss 0.18128908 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 01:26:07,852 epoch 5 - iter 9/32 - loss 0.13447117 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,037 epoch 5 - iter 12/32 - loss 0.12232198 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,188 epoch 5 - iter 15/32 - loss 0.15843315 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,341 epoch 5 - iter 18/32 - loss 0.17487020 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,527 epoch 5 - iter 21/32 - loss 0.15289653 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,675 epoch 5 - iter 24/32 - loss 0.17976255 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,848 epoch 5 - iter 27/32 - loss 0.17968359 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:26:08,985 epoch 5 - iter 30/32 - loss 0.16256983 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 01:26:09,111 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:09,111 EPOCH 5 done: loss 0.1965 - lr 0.0200000\n",
-      "2021-09-08 01:26:09,281 DEV : loss 1.716923475265503 - score 0.5\n",
-      "2021-09-08 01:26:09,282 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:26:09,340 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:09,491 epoch 6 - iter 3/32 - loss 0.00739745 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 01:26:09,656 epoch 6 - iter 6/32 - loss 0.00610508 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 01:26:09,823 epoch 6 - iter 9/32 - loss 0.01000259 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 01:26:09,961 epoch 6 - iter 12/32 - loss 0.01020740 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 01:26:10,154 epoch 6 - iter 15/32 - loss 0.02827937 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:26:10,295 epoch 6 - iter 18/32 - loss 0.04361364 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 01:26:10,465 epoch 6 - iter 21/32 - loss 0.03829589 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 01:26:10,616 epoch 6 - iter 24/32 - loss 0.06738199 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 01:26:10,795 epoch 6 - iter 27/32 - loss 0.06448436 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 01:26:10,939 epoch 6 - iter 30/32 - loss 0.10164234 - samples/sec: 20.95 - lr: 0.020000\n"
+      "2021-09-21 21:08:21,750 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:22,028 epoch 3 - iter 3/32 - loss 0.10860793 - samples/sec: 11.91 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,253 epoch 3 - iter 6/32 - loss 0.09259563 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,480 epoch 3 - iter 9/32 - loss 0.18054448 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,703 epoch 3 - iter 12/32 - loss 0.14609195 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,917 epoch 3 - iter 15/32 - loss 0.13848829 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,152 epoch 3 - iter 18/32 - loss 0.14468652 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,365 epoch 3 - iter 21/32 - loss 0.16130325 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,575 epoch 3 - iter 24/32 - loss 0.18592612 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,755 epoch 3 - iter 27/32 - loss 0.17636554 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,952 epoch 3 - iter 30/32 - loss 0.22870181 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,103 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:24,104 EPOCH 3 done: loss 0.2150 - lr 0.0200000\n",
+      "2021-09-21 21:08:24,305 DEV : loss 0.8611387610435486 - score 0.75\n",
+      "2021-09-21 21:08:24,306 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:08:24,383 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:24,603 epoch 4 - iter 3/32 - loss 0.00167911 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,823 epoch 4 - iter 6/32 - loss 0.07486911 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,036 epoch 4 - iter 9/32 - loss 0.10805938 - samples/sec: 14.15 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,247 epoch 4 - iter 12/32 - loss 0.12147834 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,471 epoch 4 - iter 15/32 - loss 0.13447173 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,654 epoch 4 - iter 18/32 - loss 0.11285694 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,813 epoch 4 - iter 21/32 - loss 0.11100415 - samples/sec: 18.95 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,974 epoch 4 - iter 24/32 - loss 0.10482923 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 21:08:26,137 epoch 4 - iter 27/32 - loss 0.09408224 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 21:08:26,315 epoch 4 - iter 30/32 - loss 0.11411172 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:08:26,425 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:26,426 EPOCH 4 done: loss 0.1072 - lr 0.0200000\n",
+      "2021-09-21 21:08:26,758 DEV : loss 1.0842220783233643 - score 0.5\n",
+      "2021-09-21 21:08:26,759 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:08:26,786 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:27,021 epoch 5 - iter 3/32 - loss 0.03219941 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 21:08:27,228 epoch 5 - iter 6/32 - loss 0.04907577 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 21:08:27,440 epoch 5 - iter 9/32 - loss 0.03648977 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 21:08:27,652 epoch 5 - iter 12/32 - loss 0.03410875 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:08:27,852 epoch 5 - iter 15/32 - loss 0.02751669 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:08:28,099 epoch 5 - iter 18/32 - loss 0.03132479 - samples/sec: 12.18 - lr: 0.020000\n",
+      "2021-09-21 21:08:28,254 epoch 5 - iter 21/32 - loss 0.03035689 - samples/sec: 19.46 - lr: 0.020000\n",
+      "2021-09-21 21:08:28,430 epoch 5 - iter 24/32 - loss 0.03895935 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:08:28,596 epoch 5 - iter 27/32 - loss 0.03688652 - samples/sec: 18.17 - lr: 0.020000\n",
+      "2021-09-21 21:08:28,781 epoch 5 - iter 30/32 - loss 0.03466414 - samples/sec: 16.26 - lr: 0.020000\n",
+      "2021-09-21 21:08:28,898 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:28,899 EPOCH 5 done: loss 0.0690 - lr 0.0200000\n",
+      "2021-09-21 21:08:29,094 DEV : loss 1.5508551597595215 - score 0.25\n",
+      "2021-09-21 21:08:29,095 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:08:29,176 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:29,363 epoch 6 - iter 3/32 - loss 0.02436331 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 21:08:29,546 epoch 6 - iter 6/32 - loss 0.05077244 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 21:08:29,719 epoch 6 - iter 9/32 - loss 0.03534304 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 21:08:29,898 epoch 6 - iter 12/32 - loss 0.03023732 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 21:08:30,068 epoch 6 - iter 15/32 - loss 0.08196179 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 21:08:30,263 epoch 6 - iter 18/32 - loss 0.07103619 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 21:08:30,426 epoch 6 - iter 21/32 - loss 0.06419989 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 21:08:30,602 epoch 6 - iter 24/32 - loss 0.15077743 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 21:08:30,780 epoch 6 - iter 27/32 - loss 0.14633515 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 21:08:30,952 epoch 6 - iter 30/32 - loss 0.13189032 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 21:08:31,041 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:26:11,049 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:11,049 EPOCH 6 done: loss 0.0961 - lr 0.0200000\n",
-      "2021-09-08 01:26:11,251 DEV : loss 1.621936559677124 - score 0.25\n",
-      "2021-09-08 01:26:11,252 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:26:11,348 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:11,567 epoch 7 - iter 3/32 - loss 0.21388194 - samples/sec: 14.68 - lr: 0.020000\n",
-      "2021-09-08 01:26:11,705 epoch 7 - iter 6/32 - loss 0.19381220 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 01:26:11,878 epoch 7 - iter 9/32 - loss 0.28282469 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:26:12,032 epoch 7 - iter 12/32 - loss 0.21792116 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 01:26:12,202 epoch 7 - iter 15/32 - loss 0.23320057 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 01:26:12,354 epoch 7 - iter 18/32 - loss 0.19492040 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 01:26:12,495 epoch 7 - iter 21/32 - loss 0.16800357 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:26:12,669 epoch 7 - iter 24/32 - loss 0.14801890 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 01:26:12,816 epoch 7 - iter 27/32 - loss 0.13619851 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 01:26:12,994 epoch 7 - iter 30/32 - loss 0.12279581 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 01:26:13,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:13,087 EPOCH 7 done: loss 0.1159 - lr 0.0200000\n",
-      "2021-09-08 01:26:13,186 DEV : loss 1.6060336828231812 - score 0.25\n",
-      "2021-09-08 01:26:13,186 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:26:13,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:13,344 epoch 8 - iter 3/32 - loss 0.00934948 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 01:26:13,501 epoch 8 - iter 6/32 - loss 0.00542987 - samples/sec: 19.09 - lr: 0.020000\n",
-      "2021-09-08 01:26:13,651 epoch 8 - iter 9/32 - loss 0.02157687 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 01:26:13,803 epoch 8 - iter 12/32 - loss 0.02030627 - samples/sec: 19.80 - lr: 0.020000\n",
-      "2021-09-08 01:26:13,997 epoch 8 - iter 15/32 - loss 0.01679882 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,131 epoch 8 - iter 18/32 - loss 0.01473011 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,290 epoch 8 - iter 21/32 - loss 0.01287401 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,471 epoch 8 - iter 24/32 - loss 0.01199499 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,609 epoch 8 - iter 27/32 - loss 0.01089776 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,798 epoch 8 - iter 30/32 - loss 0.01350012 - samples/sec: 15.90 - lr: 0.020000\n",
-      "2021-09-08 01:26:14,888 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:14,889 EPOCH 8 done: loss 0.0129 - lr 0.0200000\n",
-      "2021-09-08 01:26:14,966 DEV : loss 2.0112709999084473 - score 0.25\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:26:14,966 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:26:14,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:15,163 epoch 9 - iter 3/32 - loss 0.00309505 - samples/sec: 18.87 - lr: 0.010000\n",
-      "2021-09-08 01:26:15,307 epoch 9 - iter 6/32 - loss 0.28357315 - samples/sec: 20.90 - lr: 0.010000\n",
-      "2021-09-08 01:26:15,493 epoch 9 - iter 9/32 - loss 0.18923874 - samples/sec: 16.17 - lr: 0.010000\n",
-      "2021-09-08 01:26:15,681 epoch 9 - iter 12/32 - loss 0.14238027 - samples/sec: 16.04 - lr: 0.010000\n",
-      "2021-09-08 01:26:15,820 epoch 9 - iter 15/32 - loss 0.11398004 - samples/sec: 21.62 - lr: 0.010000\n",
-      "2021-09-08 01:26:15,962 epoch 9 - iter 18/32 - loss 0.09521564 - samples/sec: 21.14 - lr: 0.010000\n",
-      "2021-09-08 01:26:16,143 epoch 9 - iter 21/32 - loss 0.08522976 - samples/sec: 16.66 - lr: 0.010000\n",
-      "2021-09-08 01:26:16,313 epoch 9 - iter 24/32 - loss 0.07461766 - samples/sec: 17.67 - lr: 0.010000\n",
-      "2021-09-08 01:26:16,448 epoch 9 - iter 27/32 - loss 0.06648470 - samples/sec: 22.40 - lr: 0.010000\n",
-      "2021-09-08 01:26:16,617 epoch 9 - iter 30/32 - loss 0.06041789 - samples/sec: 17.79 - lr: 0.010000\n",
-      "2021-09-08 01:26:16,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:16,733 EPOCH 9 done: loss 0.0567 - lr 0.0100000\n",
-      "2021-09-08 01:26:16,823 DEV : loss 2.0015029907226562 - score 0.0\n",
-      "2021-09-08 01:26:16,824 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:26:16,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:17,000 epoch 10 - iter 3/32 - loss 0.00106006 - samples/sec: 18.72 - lr: 0.010000\n",
-      "2021-09-08 01:26:17,161 epoch 10 - iter 6/32 - loss 0.00403958 - samples/sec: 18.74 - lr: 0.010000\n",
-      "2021-09-08 01:26:17,300 epoch 10 - iter 9/32 - loss 0.00380155 - samples/sec: 21.65 - lr: 0.010000\n",
-      "2021-09-08 01:26:17,482 epoch 10 - iter 12/32 - loss 0.00311229 - samples/sec: 16.52 - lr: 0.010000\n",
-      "2021-09-08 01:26:17,617 epoch 10 - iter 15/32 - loss 0.00290388 - samples/sec: 22.30 - lr: 0.010000\n",
-      "2021-09-08 01:26:17,806 epoch 10 - iter 18/32 - loss 0.00267917 - samples/sec: 15.94 - lr: 0.010000\n",
-      "2021-09-08 01:26:17,965 epoch 10 - iter 21/32 - loss 0.00233381 - samples/sec: 18.93 - lr: 0.010000\n",
-      "2021-09-08 01:26:18,112 epoch 10 - iter 24/32 - loss 0.00219744 - samples/sec: 20.36 - lr: 0.010000\n",
-      "2021-09-08 01:26:18,281 epoch 10 - iter 27/32 - loss 0.00205996 - samples/sec: 17.85 - lr: 0.010000\n",
-      "2021-09-08 01:26:18,419 epoch 10 - iter 30/32 - loss 0.02102275 - samples/sec: 21.80 - lr: 0.010000\n",
-      "2021-09-08 01:26:18,539 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:18,539 EPOCH 10 done: loss 0.0197 - lr 0.0100000\n",
-      "2021-09-08 01:26:18,723 DEV : loss 2.13718581199646 - score 0.0\n",
-      "2021-09-08 01:26:18,724 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:26:23,391 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:26:23,392 Testing using best model ...\n",
-      "2021-09-08 01:26:23,415 loading file temp/best-model.pt\n",
+      "2021-09-21 21:08:31,042 EPOCH 6 done: loss 0.1237 - lr 0.0200000\n",
+      "2021-09-21 21:08:31,260 DEV : loss 0.8336905837059021 - score 0.75\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:08:31,263 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:08:31,353 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:31,565 epoch 7 - iter 3/32 - loss 0.00265323 - samples/sec: 17.34 - lr: 0.010000\n",
+      "2021-09-21 21:08:31,749 epoch 7 - iter 6/32 - loss 0.05309793 - samples/sec: 16.27 - lr: 0.010000\n",
+      "2021-09-21 21:08:31,930 epoch 7 - iter 9/32 - loss 0.03704170 - samples/sec: 16.70 - lr: 0.010000\n",
+      "2021-09-21 21:08:32,099 epoch 7 - iter 12/32 - loss 0.02910736 - samples/sec: 17.78 - lr: 0.010000\n",
+      "2021-09-21 21:08:32,270 epoch 7 - iter 15/32 - loss 0.02420548 - samples/sec: 17.59 - lr: 0.010000\n",
+      "2021-09-21 21:08:32,444 epoch 7 - iter 18/32 - loss 0.02025887 - samples/sec: 17.27 - lr: 0.010000\n",
+      "2021-09-21 21:08:32,596 epoch 7 - iter 21/32 - loss 0.01824822 - samples/sec: 19.84 - lr: 0.010000\n",
+      "2021-09-21 21:08:32,780 epoch 7 - iter 24/32 - loss 0.01819268 - samples/sec: 16.32 - lr: 0.010000\n",
+      "2021-09-21 21:08:32,950 epoch 7 - iter 27/32 - loss 0.01643315 - samples/sec: 17.76 - lr: 0.010000\n",
+      "2021-09-21 21:08:33,118 epoch 7 - iter 30/32 - loss 0.01775408 - samples/sec: 17.93 - lr: 0.010000\n",
+      "2021-09-21 21:08:33,224 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:33,224 EPOCH 7 done: loss 0.0219 - lr 0.0100000\n",
+      "2021-09-21 21:08:34,517 DEV : loss 0.8205548524856567 - score 0.75\n",
+      "2021-09-21 21:08:34,518 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:08:34,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:34,825 epoch 8 - iter 3/32 - loss 0.00182474 - samples/sec: 13.90 - lr: 0.010000\n",
+      "2021-09-21 21:08:35,020 epoch 8 - iter 6/32 - loss 0.00134446 - samples/sec: 15.47 - lr: 0.010000\n",
+      "2021-09-21 21:08:35,186 epoch 8 - iter 9/32 - loss 0.00373965 - samples/sec: 18.12 - lr: 0.010000\n",
+      "2021-09-21 21:08:35,362 epoch 8 - iter 12/32 - loss 0.00288000 - samples/sec: 17.12 - lr: 0.010000\n",
+      "2021-09-21 21:08:35,537 epoch 8 - iter 15/32 - loss 0.00498116 - samples/sec: 17.23 - lr: 0.010000\n",
+      "2021-09-21 21:08:35,709 epoch 8 - iter 18/32 - loss 0.00422474 - samples/sec: 17.45 - lr: 0.010000\n",
+      "2021-09-21 21:08:35,875 epoch 8 - iter 21/32 - loss 0.00368654 - samples/sec: 18.14 - lr: 0.010000\n",
+      "2021-09-21 21:08:36,014 epoch 8 - iter 24/32 - loss 0.00377114 - samples/sec: 21.77 - lr: 0.010000\n",
+      "2021-09-21 21:08:36,170 epoch 8 - iter 27/32 - loss 0.00399011 - samples/sec: 19.26 - lr: 0.010000\n",
+      "2021-09-21 21:08:36,305 epoch 8 - iter 30/32 - loss 0.00379010 - samples/sec: 22.39 - lr: 0.010000\n",
+      "2021-09-21 21:08:36,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:36,396 EPOCH 8 done: loss 0.0040 - lr 0.0100000\n",
+      "2021-09-21 21:08:36,479 DEV : loss 0.9222744703292847 - score 0.75\n",
+      "2021-09-21 21:08:36,481 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:08:36,483 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:36,652 epoch 9 - iter 3/32 - loss 0.00117552 - samples/sec: 20.21 - lr: 0.010000\n",
+      "2021-09-21 21:08:36,792 epoch 9 - iter 6/32 - loss 0.00077372 - samples/sec: 21.49 - lr: 0.010000\n",
+      "2021-09-21 21:08:36,953 epoch 9 - iter 9/32 - loss 0.00356726 - samples/sec: 18.73 - lr: 0.010000\n",
+      "2021-09-21 21:08:37,100 epoch 9 - iter 12/32 - loss 0.00388408 - samples/sec: 20.52 - lr: 0.010000\n",
+      "2021-09-21 21:08:37,261 epoch 9 - iter 15/32 - loss 0.00327815 - samples/sec: 18.68 - lr: 0.010000\n",
+      "2021-09-21 21:08:37,413 epoch 9 - iter 18/32 - loss 0.00280698 - samples/sec: 19.78 - lr: 0.010000\n",
+      "2021-09-21 21:08:37,554 epoch 9 - iter 21/32 - loss 0.00281594 - samples/sec: 21.40 - lr: 0.010000\n",
+      "2021-09-21 21:08:37,702 epoch 9 - iter 24/32 - loss 0.00439276 - samples/sec: 20.36 - lr: 0.010000\n",
+      "2021-09-21 21:08:37,843 epoch 9 - iter 27/32 - loss 0.00396309 - samples/sec: 21.33 - lr: 0.010000\n",
+      "2021-09-21 21:08:37,997 epoch 9 - iter 30/32 - loss 0.00359216 - samples/sec: 19.48 - lr: 0.010000\n",
+      "2021-09-21 21:08:38,105 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:38,106 EPOCH 9 done: loss 0.0150 - lr 0.0100000\n",
+      "2021-09-21 21:08:38,189 DEV : loss 1.0383033752441406 - score 0.5\n",
+      "2021-09-21 21:08:38,190 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:08:38,192 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:38,356 epoch 10 - iter 3/32 - loss 0.00102680 - samples/sec: 20.19 - lr: 0.010000\n",
+      "2021-09-21 21:08:38,497 epoch 10 - iter 6/32 - loss 0.00085375 - samples/sec: 21.36 - lr: 0.010000\n",
+      "2021-09-21 21:08:38,648 epoch 10 - iter 9/32 - loss 0.00086039 - samples/sec: 19.94 - lr: 0.010000\n",
+      "2021-09-21 21:08:38,794 epoch 10 - iter 12/32 - loss 0.00092667 - samples/sec: 20.74 - lr: 0.010000\n",
+      "2021-09-21 21:08:38,943 epoch 10 - iter 15/32 - loss 0.00113882 - samples/sec: 20.15 - lr: 0.010000\n",
+      "2021-09-21 21:08:39,091 epoch 10 - iter 18/32 - loss 0.00563077 - samples/sec: 20.30 - lr: 0.010000\n",
+      "2021-09-21 21:08:39,252 epoch 10 - iter 21/32 - loss 0.00505395 - samples/sec: 18.79 - lr: 0.010000\n",
+      "2021-09-21 21:08:39,423 epoch 10 - iter 24/32 - loss 0.00454509 - samples/sec: 17.57 - lr: 0.010000\n",
+      "2021-09-21 21:08:39,576 epoch 10 - iter 27/32 - loss 0.00410262 - samples/sec: 19.71 - lr: 0.010000\n",
+      "2021-09-21 21:08:39,734 epoch 10 - iter 30/32 - loss 0.00411083 - samples/sec: 19.05 - lr: 0.010000\n",
+      "2021-09-21 21:08:39,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:39,844 EPOCH 10 done: loss 0.0040 - lr 0.0100000\n",
+      "2021-09-21 21:08:39,943 DEV : loss 1.2300747632980347 - score 0.5\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:08:39,944 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:08:44,942 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:44,942 Testing using best model ...\n",
+      "2021-09-21 21:08:44,944 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:26:30,594 \t0.75\n",
-      "2021-09-08 01:26:30,595 \n",
+      "2021-09-21 21:08:50,409 \t1.0\n",
+      "2021-09-21 21:08:50,409 \n",
       "Results:\n",
-      "- F-score (micro) 0.75\n",
-      "- F-score (macro) 0.5\n",
-      "- Accuracy 0.75\n",
+      "- F-score (micro) 1.0\n",
+      "- F-score (macro) 0.75\n",
+      "- Accuracy 1.0\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "     sadness     1.0000    1.0000    1.0000         1\n",
-      "    optimism     0.0000    0.0000    0.0000         1\n",
-      "       anger     0.0000    0.0000    0.0000         0\n",
+      "     sadness     0.0000    0.0000    0.0000         0\n",
+      "    optimism     1.0000    1.0000    1.0000         1\n",
+      "       anger     1.0000    1.0000    1.0000         1\n",
       "         joy     1.0000    1.0000    1.0000         2\n",
       "\n",
-      "   micro avg     0.7500    0.7500    0.7500         4\n",
-      "   macro avg     0.5000    0.5000    0.5000         4\n",
-      "weighted avg     0.7500    0.7500    0.7500         4\n",
-      " samples avg     0.7500    0.7500    0.7500         4\n",
+      "   micro avg     1.0000    1.0000    1.0000         4\n",
+      "   macro avg     0.7500    0.7500    0.7500         4\n",
+      "weighted avg     1.0000    1.0000    1.0000         4\n",
+      " samples avg     1.0000    1.0000    1.0000         4\n",
       "\n",
-      "2021-09-08 01:26:30,595 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.5792325056433408\n"
+      "2021-09-21 21:08:50,409 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.6564334085778781\n"
      ]
     }
    ],
@@ -2881,11 +2884,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "0c4025f0",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.6704288939051919, 0.6433408577878104, 0.654627539503386, 0.6636568848758465, 0.6501128668171557]\n",
+      "0.009619537585852165\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -2905,25 +2920,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:31:14,370 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:09:02,731 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:31:18,407 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:09:07,020 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 43842.90it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 44241.12it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:31:18,410 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 01:31:18,413 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:18,414 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:09:07,022 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 21:09:07,211 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:07,213 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3236,25 +3251,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:31:18,415 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:18,416 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:31:18,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:18,416 Parameters:\n",
-      "2021-09-08 01:31:18,416  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:31:18,417  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:31:18,417  - patience: \"3\"\n",
-      "2021-09-08 01:31:18,417  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:31:18,418  - max_epochs: \"10\"\n",
-      "2021-09-08 01:31:18,418  - shuffle: \"True\"\n",
-      "2021-09-08 01:31:18,418  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:31:18,418  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:31:18,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:18,419 Model training base path: \"temp\"\n",
-      "2021-09-08 01:31:18,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:18,420 Device: cuda:0\n",
-      "2021-09-08 01:31:18,420 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:18,420 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:31:18,426 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:09:07,214 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:07,214 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:09:07,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:07,215 Parameters:\n",
+      "2021-09-21 21:09:07,215  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:09:07,215  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:09:07,216  - patience: \"3\"\n",
+      "2021-09-21 21:09:07,216  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:09:07,216  - max_epochs: \"10\"\n",
+      "2021-09-21 21:09:07,216  - shuffle: \"True\"\n",
+      "2021-09-21 21:09:07,217  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:09:07,217  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:09:07,217 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:07,218 Model training base path: \"temp\"\n",
+      "2021-09-21 21:09:07,218 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:07,218 Device: cuda:0\n",
+      "2021-09-21 21:09:07,218 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:07,219 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -3268,175 +3282,171 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:31:18,610 epoch 1 - iter 3/32 - loss 0.21058473 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 01:31:18,781 epoch 1 - iter 6/32 - loss 0.14670679 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:31:18,992 epoch 1 - iter 9/32 - loss 0.57103992 - samples/sec: 14.23 - lr: 0.020000\n",
-      "2021-09-08 01:31:19,166 epoch 1 - iter 12/32 - loss 0.64325958 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 01:31:19,369 epoch 1 - iter 15/32 - loss 0.66823440 - samples/sec: 14.81 - lr: 0.020000\n",
-      "2021-09-08 01:31:19,539 epoch 1 - iter 18/32 - loss 0.62066307 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 01:31:19,725 epoch 1 - iter 21/32 - loss 0.53842150 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 01:31:19,896 epoch 1 - iter 24/32 - loss 0.52940298 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 01:31:20,079 epoch 1 - iter 27/32 - loss 0.50161061 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:31:20,273 epoch 1 - iter 30/32 - loss 0.55593343 - samples/sec: 15.44 - lr: 0.020000\n",
-      "2021-09-08 01:31:20,402 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:20,402 EPOCH 1 done: loss 0.5247 - lr 0.0200000\n",
-      "2021-09-08 01:31:20,592 DEV : loss 1.8873531818389893 - score 0.0\n",
-      "2021-09-08 01:31:20,593 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:31:26,627 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:26,859 epoch 2 - iter 3/32 - loss 1.50524710 - samples/sec: 13.95 - lr: 0.020000\n",
-      "2021-09-08 01:31:27,029 epoch 2 - iter 6/32 - loss 1.14926818 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 01:31:27,256 epoch 2 - iter 9/32 - loss 0.97547432 - samples/sec: 13.23 - lr: 0.020000\n",
-      "2021-09-08 01:31:27,425 epoch 2 - iter 12/32 - loss 0.93558038 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:31:27,627 epoch 2 - iter 15/32 - loss 0.90614016 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 01:31:27,797 epoch 2 - iter 18/32 - loss 0.78737414 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:31:27,984 epoch 2 - iter 21/32 - loss 0.71588115 - samples/sec: 16.12 - lr: 0.020000\n",
-      "2021-09-08 01:31:28,157 epoch 2 - iter 24/32 - loss 0.63569909 - samples/sec: 17.34 - lr: 0.020000\n",
-      "2021-09-08 01:31:28,352 epoch 2 - iter 27/32 - loss 0.68397859 - samples/sec: 15.43 - lr: 0.020000\n",
-      "2021-09-08 01:31:28,523 epoch 2 - iter 30/32 - loss 0.69109795 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 01:31:28,670 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:28,670 EPOCH 2 done: loss 0.7896 - lr 0.0200000\n",
-      "2021-09-08 01:31:28,772 DEV : loss 0.9687559008598328 - score 0.5\n",
-      "2021-09-08 01:31:28,773 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:09:07,429 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:07,605 epoch 1 - iter 3/32 - loss 0.71191667 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 21:09:07,751 epoch 1 - iter 6/32 - loss 0.47647048 - samples/sec: 20.65 - lr: 0.020000\n",
+      "2021-09-21 21:09:07,917 epoch 1 - iter 9/32 - loss 0.86757185 - samples/sec: 18.12 - lr: 0.020000\n",
+      "2021-09-21 21:09:08,066 epoch 1 - iter 12/32 - loss 1.00311933 - samples/sec: 20.21 - lr: 0.020000\n",
+      "2021-09-21 21:09:08,220 epoch 1 - iter 15/32 - loss 0.93867653 - samples/sec: 19.60 - lr: 0.020000\n",
+      "2021-09-21 21:09:08,380 epoch 1 - iter 18/32 - loss 0.84654028 - samples/sec: 18.78 - lr: 0.020000\n",
+      "2021-09-21 21:09:08,534 epoch 1 - iter 21/32 - loss 0.73851982 - samples/sec: 19.49 - lr: 0.020000\n",
+      "2021-09-21 21:09:08,699 epoch 1 - iter 24/32 - loss 0.65393512 - samples/sec: 18.29 - lr: 0.020000\n",
+      "2021-09-21 21:09:08,856 epoch 1 - iter 27/32 - loss 0.71239063 - samples/sec: 19.15 - lr: 0.020000\n",
+      "2021-09-21 21:09:09,010 epoch 1 - iter 30/32 - loss 0.66060644 - samples/sec: 19.63 - lr: 0.020000\n",
+      "2021-09-21 21:09:09,105 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:09,106 EPOCH 1 done: loss 0.6309 - lr 0.0200000\n",
+      "2021-09-21 21:09:09,371 DEV : loss 0.6700336337089539 - score 0.75\n",
+      "2021-09-21 21:09:09,372 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:31:32,540 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:32,734 epoch 3 - iter 3/32 - loss 0.38327900 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:31:32,924 epoch 3 - iter 6/32 - loss 0.32351058 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 01:31:33,111 epoch 3 - iter 9/32 - loss 0.24216258 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:31:33,294 epoch 3 - iter 12/32 - loss 0.34229395 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:31:33,475 epoch 3 - iter 15/32 - loss 0.29841045 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 01:31:33,651 epoch 3 - iter 18/32 - loss 0.39822383 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:31:33,842 epoch 3 - iter 21/32 - loss 0.54911585 - samples/sec: 15.76 - lr: 0.020000\n",
-      "2021-09-08 01:31:34,039 epoch 3 - iter 24/32 - loss 0.53642373 - samples/sec: 15.24 - lr: 0.020000\n",
-      "2021-09-08 01:31:34,220 epoch 3 - iter 27/32 - loss 0.49146591 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:31:34,412 epoch 3 - iter 30/32 - loss 0.45904283 - samples/sec: 15.67 - lr: 0.020000\n",
-      "2021-09-08 01:31:34,526 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:34,527 EPOCH 3 done: loss 0.4445 - lr 0.0200000\n",
-      "2021-09-08 01:31:34,622 DEV : loss 0.34937310218811035 - score 0.5\n",
-      "2021-09-08 01:31:34,623 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:09:13,797 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:14,065 epoch 2 - iter 3/32 - loss 0.28783665 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,332 epoch 2 - iter 6/32 - loss 0.44134976 - samples/sec: 11.26 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,571 epoch 2 - iter 9/32 - loss 0.36776534 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,829 epoch 2 - iter 12/32 - loss 0.39270653 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,088 epoch 2 - iter 15/32 - loss 0.45899592 - samples/sec: 11.60 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,348 epoch 2 - iter 18/32 - loss 0.48384168 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,612 epoch 2 - iter 21/32 - loss 0.61226032 - samples/sec: 11.43 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,852 epoch 2 - iter 24/32 - loss 0.59762559 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,099 epoch 2 - iter 27/32 - loss 0.54410964 - samples/sec: 12.19 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,384 epoch 2 - iter 30/32 - loss 0.56194112 - samples/sec: 10.56 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,564 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:16,565 EPOCH 2 done: loss 0.5395 - lr 0.0200000\n",
+      "2021-09-21 21:09:16,731 DEV : loss 0.7941477298736572 - score 0.25\n",
+      "2021-09-21 21:09:16,732 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:16,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:17,041 epoch 3 - iter 3/32 - loss 0.31484556 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,294 epoch 3 - iter 6/32 - loss 0.22847354 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,563 epoch 3 - iter 9/32 - loss 0.36785738 - samples/sec: 11.19 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,789 epoch 3 - iter 12/32 - loss 0.27853481 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:09:18,014 epoch 3 - iter 15/32 - loss 0.24258920 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:09:18,217 epoch 3 - iter 18/32 - loss 0.22544108 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:09:18,392 epoch 3 - iter 21/32 - loss 0.27392350 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:09:18,599 epoch 3 - iter 24/32 - loss 0.29378000 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 21:09:18,799 epoch 3 - iter 27/32 - loss 0.26307836 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:09:19,013 epoch 3 - iter 30/32 - loss 0.27329450 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 21:09:19,156 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:19,156 EPOCH 3 done: loss 0.3401 - lr 0.0200000\n",
+      "2021-09-21 21:09:19,392 DEV : loss 0.6322993040084839 - score 0.75\n",
+      "2021-09-21 21:09:19,395 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:31:38,681 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:38,885 epoch 4 - iter 3/32 - loss 0.00755715 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:31:39,061 epoch 4 - iter 6/32 - loss 0.05242387 - samples/sec: 17.16 - lr: 0.020000\n",
-      "2021-09-08 01:31:39,267 epoch 4 - iter 9/32 - loss 0.11568798 - samples/sec: 14.62 - lr: 0.020000\n",
-      "2021-09-08 01:31:39,431 epoch 4 - iter 12/32 - loss 0.09612905 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 01:31:39,632 epoch 4 - iter 15/32 - loss 0.19351701 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 01:31:39,820 epoch 4 - iter 18/32 - loss 0.23457969 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 01:31:39,998 epoch 4 - iter 21/32 - loss 0.25673570 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:31:40,194 epoch 4 - iter 24/32 - loss 0.23365266 - samples/sec: 15.38 - lr: 0.020000\n",
-      "2021-09-08 01:31:40,379 epoch 4 - iter 27/32 - loss 0.21324745 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 01:31:40,570 epoch 4 - iter 30/32 - loss 0.19223139 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:31:40,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:40,689 EPOCH 4 done: loss 0.1808 - lr 0.0200000\n",
-      "2021-09-08 01:31:40,778 DEV : loss 0.37400388717651367 - score 0.75\n",
-      "2021-09-08 01:31:40,779 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:31:45,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:45,198 epoch 5 - iter 3/32 - loss 0.00227144 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:31:45,398 epoch 5 - iter 6/32 - loss 0.01568968 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:31:45,556 epoch 5 - iter 9/32 - loss 0.01229110 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:31:45,746 epoch 5 - iter 12/32 - loss 0.01057441 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 01:31:45,923 epoch 5 - iter 15/32 - loss 0.07276175 - samples/sec: 17.05 - lr: 0.020000\n",
-      "2021-09-08 01:31:46,125 epoch 5 - iter 18/32 - loss 0.07812792 - samples/sec: 14.86 - lr: 0.020000\n",
-      "2021-09-08 01:31:46,304 epoch 5 - iter 21/32 - loss 0.15947463 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 01:31:46,498 epoch 5 - iter 24/32 - loss 0.18034311 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:31:46,679 epoch 5 - iter 27/32 - loss 0.18411417 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 01:31:46,859 epoch 5 - iter 30/32 - loss 0.19800263 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:31:46,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:46,974 EPOCH 5 done: loss 0.1860 - lr 0.0200000\n",
-      "2021-09-08 01:31:47,089 DEV : loss 0.44691526889801025 - score 0.75\n",
-      "2021-09-08 01:31:47,090 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:31:47,092 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:47,270 epoch 6 - iter 3/32 - loss 0.00722715 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 01:31:47,457 epoch 6 - iter 6/32 - loss 0.02078457 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:31:47,621 epoch 6 - iter 9/32 - loss 0.01597028 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 01:31:47,815 epoch 6 - iter 12/32 - loss 0.05141907 - samples/sec: 15.49 - lr: 0.020000\n",
-      "2021-09-08 01:31:47,989 epoch 6 - iter 15/32 - loss 0.06655935 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:31:48,174 epoch 6 - iter 18/32 - loss 0.06316734 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 01:31:48,345 epoch 6 - iter 21/32 - loss 0.05454023 - samples/sec: 17.64 - lr: 0.020000\n",
-      "2021-09-08 01:31:48,528 epoch 6 - iter 24/32 - loss 0.08940154 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 01:31:48,711 epoch 6 - iter 27/32 - loss 0.08088410 - samples/sec: 16.45 - lr: 0.020000\n",
-      "2021-09-08 01:31:48,885 epoch 6 - iter 30/32 - loss 0.10752826 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 01:31:49,018 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:09:26,774 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:27,016 epoch 4 - iter 3/32 - loss 0.20066331 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,269 epoch 4 - iter 6/32 - loss 0.10732698 - samples/sec: 11.86 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,543 epoch 4 - iter 9/32 - loss 0.07788357 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,791 epoch 4 - iter 12/32 - loss 0.08314149 - samples/sec: 12.13 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,029 epoch 4 - iter 15/32 - loss 0.16572451 - samples/sec: 12.64 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,286 epoch 4 - iter 18/32 - loss 0.14788507 - samples/sec: 11.69 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,559 epoch 4 - iter 21/32 - loss 0.18079135 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,792 epoch 4 - iter 24/32 - loss 0.16201323 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,015 epoch 4 - iter 27/32 - loss 0.14861469 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,280 epoch 4 - iter 30/32 - loss 0.15678842 - samples/sec: 11.34 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,449 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:29,449 EPOCH 4 done: loss 0.1810 - lr 0.0200000\n",
+      "2021-09-21 21:09:29,628 DEV : loss 0.8532770276069641 - score 0.5\n",
+      "2021-09-21 21:09:29,629 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:29,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:29,932 epoch 5 - iter 3/32 - loss 0.24337673 - samples/sec: 11.69 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,187 epoch 5 - iter 6/32 - loss 0.20455327 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,439 epoch 5 - iter 9/32 - loss 0.13681692 - samples/sec: 11.94 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,692 epoch 5 - iter 12/32 - loss 0.15441843 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,919 epoch 5 - iter 15/32 - loss 0.12458626 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,159 epoch 5 - iter 18/32 - loss 0.12680655 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,395 epoch 5 - iter 21/32 - loss 0.12559633 - samples/sec: 12.73 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,616 epoch 5 - iter 24/32 - loss 0.18469711 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,802 epoch 5 - iter 27/32 - loss 0.16568105 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,999 epoch 5 - iter 30/32 - loss 0.15161804 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:32,136 EPOCH 5 done: loss 0.1634 - lr 0.0200000\n",
+      "2021-09-21 21:09:32,249 DEV : loss 1.2293845415115356 - score 0.5\n",
+      "2021-09-21 21:09:32,250 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:09:32,252 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:32,497 epoch 6 - iter 3/32 - loss 0.00984539 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,684 epoch 6 - iter 6/32 - loss 0.02145270 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,826 epoch 6 - iter 9/32 - loss 0.01609011 - samples/sec: 21.30 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,022 epoch 6 - iter 12/32 - loss 0.01742040 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,200 epoch 6 - iter 15/32 - loss 0.01422007 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,376 epoch 6 - iter 18/32 - loss 0.01213486 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,564 epoch 6 - iter 21/32 - loss 0.01155743 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,762 epoch 6 - iter 24/32 - loss 0.03802711 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,954 epoch 6 - iter 27/32 - loss 0.03663072 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:09:34,147 epoch 6 - iter 30/32 - loss 0.03330640 - samples/sec: 15.57 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:31:49,019 EPOCH 6 done: loss 0.1014 - lr 0.0200000\n",
-      "2021-09-08 01:31:49,128 DEV : loss 0.213444322347641 - score 1.0\n",
-      "2021-09-08 01:31:49,128 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:31:53,028 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:53,226 epoch 7 - iter 3/32 - loss 0.00177107 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:31:53,383 epoch 7 - iter 6/32 - loss 0.00332509 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:31:53,548 epoch 7 - iter 9/32 - loss 0.00660266 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 01:31:53,714 epoch 7 - iter 12/32 - loss 0.10877791 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 01:31:53,872 epoch 7 - iter 15/32 - loss 0.08778880 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 01:31:54,065 epoch 7 - iter 18/32 - loss 0.10374396 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:31:54,236 epoch 7 - iter 21/32 - loss 0.09021222 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:31:54,434 epoch 7 - iter 24/32 - loss 0.08266628 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 01:31:54,606 epoch 7 - iter 27/32 - loss 0.07437422 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 01:31:54,783 epoch 7 - iter 30/32 - loss 0.06718974 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 01:31:54,911 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:54,912 EPOCH 7 done: loss 0.0631 - lr 0.0200000\n",
-      "2021-09-08 01:31:54,987 DEV : loss 0.03662747144699097 - score 1.0\n",
-      "2021-09-08 01:31:54,988 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:31:58,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:31:59,182 epoch 8 - iter 3/32 - loss 0.01428422 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 01:31:59,361 epoch 8 - iter 6/32 - loss 0.00804759 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 01:31:59,535 epoch 8 - iter 9/32 - loss 0.05805398 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 01:31:59,727 epoch 8 - iter 12/32 - loss 0.04396867 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 01:31:59,896 epoch 8 - iter 15/32 - loss 0.03620816 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:32:00,084 epoch 8 - iter 18/32 - loss 0.03053530 - samples/sec: 16.01 - lr: 0.020000\n",
-      "2021-09-08 01:32:00,258 epoch 8 - iter 21/32 - loss 0.02626083 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:32:00,443 epoch 8 - iter 24/32 - loss 0.02325818 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 01:32:00,609 epoch 8 - iter 27/32 - loss 0.02093633 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 01:32:00,800 epoch 8 - iter 30/32 - loss 0.01947877 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 01:32:00,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:00,906 EPOCH 8 done: loss 0.0183 - lr 0.0200000\n",
-      "2021-09-08 01:32:00,997 DEV : loss 0.013292904011905193 - score 1.0\n",
-      "2021-09-08 01:32:00,997 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:32:05,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:05,367 epoch 9 - iter 3/32 - loss 0.00072679 - samples/sec: 15.59 - lr: 0.020000\n",
-      "2021-09-08 01:32:05,540 epoch 9 - iter 6/32 - loss 0.00616832 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 01:32:05,737 epoch 9 - iter 9/32 - loss 0.00556822 - samples/sec: 15.35 - lr: 0.020000\n",
-      "2021-09-08 01:32:05,899 epoch 9 - iter 12/32 - loss 0.02090265 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 01:32:06,099 epoch 9 - iter 15/32 - loss 0.01784164 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 01:32:06,259 epoch 9 - iter 18/32 - loss 0.01503632 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:32:06,470 epoch 9 - iter 21/32 - loss 0.01340483 - samples/sec: 14.23 - lr: 0.020000\n",
-      "2021-09-08 01:32:06,628 epoch 9 - iter 24/32 - loss 0.01185405 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:32:06,830 epoch 9 - iter 27/32 - loss 0.01067389 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 01:32:06,987 epoch 9 - iter 30/32 - loss 0.01003788 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:32:07,099 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:07,100 EPOCH 9 done: loss 0.0095 - lr 0.0200000\n",
-      "2021-09-08 01:32:07,222 DEV : loss 0.01967196725308895 - score 1.0\n",
-      "2021-09-08 01:32:07,223 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:32:07,225 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:07,421 epoch 10 - iter 3/32 - loss 0.00071595 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 01:32:07,723 epoch 10 - iter 6/32 - loss 0.00088290 - samples/sec: 9.95 - lr: 0.020000\n",
-      "2021-09-08 01:32:07,882 epoch 10 - iter 9/32 - loss 0.00075747 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:32:08,085 epoch 10 - iter 12/32 - loss 0.00091807 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 01:32:08,243 epoch 10 - iter 15/32 - loss 0.00131540 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:32:08,432 epoch 10 - iter 18/32 - loss 0.00244126 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 01:32:08,595 epoch 10 - iter 21/32 - loss 0.00216508 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 01:32:08,791 epoch 10 - iter 24/32 - loss 0.00193703 - samples/sec: 15.33 - lr: 0.020000\n",
-      "2021-09-08 01:32:08,950 epoch 10 - iter 27/32 - loss 0.00176700 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 01:32:09,149 epoch 10 - iter 30/32 - loss 0.00165936 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:32:09,267 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:09,268 EPOCH 10 done: loss 0.0016 - lr 0.0200000\n",
-      "2021-09-08 01:32:09,343 DEV : loss 0.01275169849395752 - score 1.0\n",
-      "2021-09-08 01:32:09,344 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:32:17,203 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:17,203 Testing using best model ...\n",
-      "2021-09-08 01:32:17,205 loading file temp/best-model.pt\n",
+      "2021-09-21 21:09:34,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:34,290 EPOCH 6 done: loss 0.0520 - lr 0.0200000\n",
+      "2021-09-21 21:09:34,495 DEV : loss 0.8748328685760498 - score 0.75\n",
+      "2021-09-21 21:09:34,496 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:09:34,568 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:34,758 epoch 7 - iter 3/32 - loss 0.00624406 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 21:09:34,922 epoch 7 - iter 6/32 - loss 0.00348896 - samples/sec: 18.38 - lr: 0.020000\n",
+      "2021-09-21 21:09:35,116 epoch 7 - iter 9/32 - loss 0.14007876 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 21:09:35,307 epoch 7 - iter 12/32 - loss 0.10555085 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 21:09:35,489 epoch 7 - iter 15/32 - loss 0.08458072 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 21:09:35,689 epoch 7 - iter 18/32 - loss 0.07076075 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:09:35,875 epoch 7 - iter 21/32 - loss 0.06104002 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 21:09:36,055 epoch 7 - iter 24/32 - loss 0.05426607 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 21:09:36,244 epoch 7 - iter 27/32 - loss 0.11170043 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 21:09:36,400 epoch 7 - iter 30/32 - loss 0.10190578 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 21:09:36,499 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:36,500 EPOCH 7 done: loss 0.0955 - lr 0.0200000\n",
+      "2021-09-21 21:09:36,649 DEV : loss 1.07704758644104 - score 0.75\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:09:36,650 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:09:36,656 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:36,819 epoch 8 - iter 3/32 - loss 0.00807796 - samples/sec: 21.13 - lr: 0.010000\n",
+      "2021-09-21 21:09:36,978 epoch 8 - iter 6/32 - loss 0.00704075 - samples/sec: 18.97 - lr: 0.010000\n",
+      "2021-09-21 21:09:37,123 epoch 8 - iter 9/32 - loss 0.00480472 - samples/sec: 20.82 - lr: 0.010000\n",
+      "2021-09-21 21:09:37,272 epoch 8 - iter 12/32 - loss 0.00403635 - samples/sec: 20.23 - lr: 0.010000\n",
+      "2021-09-21 21:09:37,427 epoch 8 - iter 15/32 - loss 0.00328017 - samples/sec: 19.40 - lr: 0.010000\n",
+      "2021-09-21 21:09:37,573 epoch 8 - iter 18/32 - loss 0.01260466 - samples/sec: 20.60 - lr: 0.010000\n",
+      "2021-09-21 21:09:37,729 epoch 8 - iter 21/32 - loss 0.01089047 - samples/sec: 19.39 - lr: 0.010000\n",
+      "2021-09-21 21:09:37,872 epoch 8 - iter 24/32 - loss 0.00962114 - samples/sec: 20.93 - lr: 0.010000\n",
+      "2021-09-21 21:09:38,030 epoch 8 - iter 27/32 - loss 0.00861557 - samples/sec: 19.17 - lr: 0.010000\n",
+      "2021-09-21 21:09:38,187 epoch 8 - iter 30/32 - loss 0.00791042 - samples/sec: 19.19 - lr: 0.010000\n",
+      "2021-09-21 21:09:38,296 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:38,296 EPOCH 8 done: loss 0.0075 - lr 0.0100000\n",
+      "2021-09-21 21:09:38,468 DEV : loss 1.55706787109375 - score 0.5\n",
+      "2021-09-21 21:09:38,468 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:38,545 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:38,717 epoch 9 - iter 3/32 - loss 0.00035723 - samples/sec: 19.28 - lr: 0.010000\n",
+      "2021-09-21 21:09:38,869 epoch 9 - iter 6/32 - loss 0.00047323 - samples/sec: 19.80 - lr: 0.010000\n",
+      "2021-09-21 21:09:39,021 epoch 9 - iter 9/32 - loss 0.00110460 - samples/sec: 19.80 - lr: 0.010000\n",
+      "2021-09-21 21:09:39,178 epoch 9 - iter 12/32 - loss 0.00109576 - samples/sec: 19.23 - lr: 0.010000\n",
+      "2021-09-21 21:09:39,331 epoch 9 - iter 15/32 - loss 0.00103236 - samples/sec: 19.71 - lr: 0.010000\n",
+      "2021-09-21 21:09:39,479 epoch 9 - iter 18/32 - loss 0.00088984 - samples/sec: 20.28 - lr: 0.010000\n",
+      "2021-09-21 21:09:39,628 epoch 9 - iter 21/32 - loss 0.00087773 - samples/sec: 20.16 - lr: 0.010000\n",
+      "2021-09-21 21:09:39,782 epoch 9 - iter 24/32 - loss 0.00111305 - samples/sec: 19.58 - lr: 0.010000\n",
+      "2021-09-21 21:09:39,939 epoch 9 - iter 27/32 - loss 0.00113913 - samples/sec: 19.23 - lr: 0.010000\n",
+      "2021-09-21 21:09:40,093 epoch 9 - iter 30/32 - loss 0.00108243 - samples/sec: 19.51 - lr: 0.010000\n",
+      "2021-09-21 21:09:40,195 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:40,195 EPOCH 9 done: loss 0.0010 - lr 0.0100000\n",
+      "2021-09-21 21:09:40,639 DEV : loss 1.5962519645690918 - score 0.5\n",
+      "2021-09-21 21:09:40,640 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:09:40,642 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:40,829 epoch 10 - iter 3/32 - loss 0.00051492 - samples/sec: 17.99 - lr: 0.010000\n",
+      "2021-09-21 21:09:40,996 epoch 10 - iter 6/32 - loss 0.00043190 - samples/sec: 18.04 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,183 epoch 10 - iter 9/32 - loss 0.00038406 - samples/sec: 16.09 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,382 epoch 10 - iter 12/32 - loss 0.00065039 - samples/sec: 15.11 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,623 epoch 10 - iter 15/32 - loss 0.00061061 - samples/sec: 12.46 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,820 epoch 10 - iter 18/32 - loss 0.00079259 - samples/sec: 15.27 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,035 epoch 10 - iter 21/32 - loss 0.00105906 - samples/sec: 14.01 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,242 epoch 10 - iter 24/32 - loss 0.00097080 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,463 epoch 10 - iter 27/32 - loss 0.00099738 - samples/sec: 13.60 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,658 epoch 10 - iter 30/32 - loss 0.00093797 - samples/sec: 15.45 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,779 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:42,780 EPOCH 10 done: loss 0.0010 - lr 0.0100000\n",
+      "2021-09-21 21:09:42,919 DEV : loss 1.615719199180603 - score 0.5\n",
+      "2021-09-21 21:09:42,920 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:09:54,162 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:54,162 Testing using best model ...\n",
+      "2021-09-21 21:09:54,164 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:32:22,120 \t0.25\n",
-      "2021-09-08 01:32:22,121 \n",
+      "2021-09-21 21:09:59,713 \t0.25\n",
+      "2021-09-21 21:09:59,714 \n",
       "Results:\n",
       "- F-score (micro) 0.25\n",
       "- F-score (macro) 0.1667\n",
@@ -3445,36 +3455,36 @@
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
-      "this text expresses optimism     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses anger     0.5000    1.0000    0.6667         1\n",
-      "     this text expresses joy     0.0000    0.0000    0.0000         1\n",
+      " this text expresses sadness     1.0000    0.5000    0.6667         2\n",
+      "this text expresses optimism     0.0000    0.0000    0.0000         2\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
       "\n",
       "                   micro avg     0.2500    0.2500    0.2500         4\n",
-      "                   macro avg     0.1250    0.2500    0.1667         4\n",
-      "                weighted avg     0.1250    0.2500    0.1667         4\n",
+      "                   macro avg     0.2500    0.1250    0.1667         4\n",
+      "                weighted avg     0.5000    0.2500    0.3333         4\n",
       "                 samples avg     0.2500    0.2500    0.2500         4\n",
       "\n",
-      "2021-09-08 01:32:22,121 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:32,886 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:09:59,714 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:16,749 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:32:37,267 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:10:21,170 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 43178.42it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 42994.00it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:37,269 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 01:32:37,461 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:37,463 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:10:21,173 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 21:10:21,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:21,183 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3787,21 +3797,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:37,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:37,464 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:32:37,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:37,465 Parameters:\n",
-      "2021-09-08 01:32:37,465  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:32:37,465  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:32:37,465  - patience: \"3\"\n",
-      "2021-09-08 01:32:37,466  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:32:37,466  - max_epochs: \"10\"\n",
-      "2021-09-08 01:32:37,466  - shuffle: \"True\"\n",
-      "2021-09-08 01:32:37,467  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:32:37,467  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:32:37,467 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:37,467 Model training base path: \"temp\"\n",
-      "2021-09-08 01:32:37,468 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:10:21,184 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:21,184 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:10:21,185 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:21,185 Parameters:\n",
+      "2021-09-21 21:10:21,185  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:10:21,186  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:10:21,186  - patience: \"3\"\n",
+      "2021-09-21 21:10:21,186  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:10:21,186  - max_epochs: \"10\"\n",
+      "2021-09-21 21:10:21,187  - shuffle: \"True\"\n",
+      "2021-09-21 21:10:21,187  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:10:21,187  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:10:21,187 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:21,188 Model training base path: \"temp\"\n",
+      "2021-09-21 21:10:21,188 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:21,188 Device: cuda:0\n",
+      "2021-09-21 21:10:21,189 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:21,189 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:10:21,196 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3815,213 +3829,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:37,468 Device: cuda:0\n",
-      "2021-09-08 01:32:37,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:37,469 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:32:37,668 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:37,862 epoch 1 - iter 3/32 - loss 0.33723141 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 01:32:38,075 epoch 1 - iter 6/32 - loss 0.21398394 - samples/sec: 14.15 - lr: 0.020000\n",
-      "2021-09-08 01:32:38,247 epoch 1 - iter 9/32 - loss 0.74879328 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:32:38,450 epoch 1 - iter 12/32 - loss 0.74353150 - samples/sec: 14.81 - lr: 0.020000\n",
-      "2021-09-08 01:32:38,631 epoch 1 - iter 15/32 - loss 0.63933971 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 01:32:38,822 epoch 1 - iter 18/32 - loss 0.71976151 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,015 epoch 1 - iter 21/32 - loss 0.74116766 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,210 epoch 1 - iter 24/32 - loss 0.66268140 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,405 epoch 1 - iter 27/32 - loss 0.66556052 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,605 epoch 1 - iter 30/32 - loss 0.70594294 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 01:32:39,720 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:39,720 EPOCH 1 done: loss 0.6752 - lr 0.0200000\n",
-      "2021-09-08 01:32:39,821 DEV : loss 0.5885910987854004 - score 0.5\n",
-      "2021-09-08 01:32:39,822 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:10:21,550 epoch 1 - iter 3/32 - loss 0.25653500 - samples/sec: 9.16 - lr: 0.020000\n",
+      "2021-09-21 21:10:21,773 epoch 1 - iter 6/32 - loss 0.14812205 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:10:21,999 epoch 1 - iter 9/32 - loss 0.11475405 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 21:10:22,249 epoch 1 - iter 12/32 - loss 0.57393534 - samples/sec: 12.01 - lr: 0.020000\n",
+      "2021-09-21 21:10:22,534 epoch 1 - iter 15/32 - loss 0.59876388 - samples/sec: 10.55 - lr: 0.020000\n",
+      "2021-09-21 21:10:22,754 epoch 1 - iter 18/32 - loss 0.66144022 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 21:10:22,999 epoch 1 - iter 21/32 - loss 0.70089632 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 21:10:23,224 epoch 1 - iter 24/32 - loss 0.68784688 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 21:10:23,471 epoch 1 - iter 27/32 - loss 0.74744716 - samples/sec: 12.21 - lr: 0.020000\n",
+      "2021-09-21 21:10:23,697 epoch 1 - iter 30/32 - loss 0.71501355 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 21:10:23,840 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:23,840 EPOCH 1 done: loss 0.6812 - lr 0.0200000\n",
+      "2021-09-21 21:10:23,988 DEV : loss 0.7187809944152832 - score 0.25\n",
+      "2021-09-21 21:10:23,989 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:10:28,037 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:28,282 epoch 2 - iter 3/32 - loss 0.06365036 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,497 epoch 2 - iter 6/32 - loss 0.26775222 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,711 epoch 2 - iter 9/32 - loss 0.30496125 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,940 epoch 2 - iter 12/32 - loss 0.39692498 - samples/sec: 13.16 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,148 epoch 2 - iter 15/32 - loss 0.48338928 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,367 epoch 2 - iter 18/32 - loss 0.51443318 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,605 epoch 2 - iter 21/32 - loss 0.50051735 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,841 epoch 2 - iter 24/32 - loss 0.47014819 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 21:10:30,015 epoch 2 - iter 27/32 - loss 0.48909053 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 21:10:30,173 epoch 2 - iter 30/32 - loss 0.49081321 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 21:10:30,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:30,287 EPOCH 2 done: loss 0.4679 - lr 0.0200000\n",
+      "2021-09-21 21:10:30,361 DEV : loss 1.1098039150238037 - score 0.5\n",
+      "2021-09-21 21:10:30,362 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:32:44,544 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:44,714 epoch 2 - iter 3/32 - loss 0.57800099 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 01:32:44,876 epoch 2 - iter 6/32 - loss 0.30986411 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:32:45,047 epoch 2 - iter 9/32 - loss 0.41787681 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 01:32:45,200 epoch 2 - iter 12/32 - loss 0.37101388 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 01:32:45,391 epoch 2 - iter 15/32 - loss 0.36792070 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:32:45,549 epoch 2 - iter 18/32 - loss 0.35934205 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 01:32:45,733 epoch 2 - iter 21/32 - loss 0.49829492 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:32:45,878 epoch 2 - iter 24/32 - loss 0.54475002 - samples/sec: 20.82 - lr: 0.020000\n",
-      "2021-09-08 01:32:46,058 epoch 2 - iter 27/32 - loss 0.55530261 - samples/sec: 16.64 - lr: 0.020000\n",
-      "2021-09-08 01:32:46,210 epoch 2 - iter 30/32 - loss 0.53330437 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 01:32:46,320 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:46,321 EPOCH 2 done: loss 0.5355 - lr 0.0200000\n",
-      "2021-09-08 01:32:47,608 DEV : loss 0.2813258767127991 - score 0.75\n",
-      "2021-09-08 01:32:47,609 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:10:38,260 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:38,441 epoch 3 - iter 3/32 - loss 0.10286167 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,612 epoch 3 - iter 6/32 - loss 0.27625258 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,782 epoch 3 - iter 9/32 - loss 0.42098397 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,960 epoch 3 - iter 12/32 - loss 0.32829412 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,171 epoch 3 - iter 15/32 - loss 0.30431091 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,364 epoch 3 - iter 18/32 - loss 0.25477848 - samples/sec: 15.62 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,581 epoch 3 - iter 21/32 - loss 0.28925154 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,805 epoch 3 - iter 24/32 - loss 0.37105435 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,992 epoch 3 - iter 27/32 - loss 0.42302210 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,148 epoch 3 - iter 30/32 - loss 0.42378943 - samples/sec: 19.33 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,297 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:40,297 EPOCH 3 done: loss 0.4033 - lr 0.0200000\n",
+      "2021-09-21 21:10:40,504 DEV : loss 0.35838189721107483 - score 1.0\n",
+      "2021-09-21 21:10:40,504 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:32:52,171 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:52,337 epoch 3 - iter 3/32 - loss 0.09384931 - samples/sec: 20.33 - lr: 0.020000\n",
-      "2021-09-08 01:32:52,519 epoch 3 - iter 6/32 - loss 0.08519685 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 01:32:52,667 epoch 3 - iter 9/32 - loss 0.22566598 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 01:32:52,847 epoch 3 - iter 12/32 - loss 0.32395521 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 01:32:52,994 epoch 3 - iter 15/32 - loss 0.27970919 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 01:32:53,154 epoch 3 - iter 18/32 - loss 0.26742009 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 01:32:53,318 epoch 3 - iter 21/32 - loss 0.24462211 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:32:53,486 epoch 3 - iter 24/32 - loss 0.21613201 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 01:32:53,633 epoch 3 - iter 27/32 - loss 0.24157343 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 01:32:53,814 epoch 3 - iter 30/32 - loss 0.24616851 - samples/sec: 16.69 - lr: 0.020000\n",
-      "2021-09-08 01:32:53,929 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:53,929 EPOCH 3 done: loss 0.2711 - lr 0.0200000\n",
-      "2021-09-08 01:32:54,118 DEV : loss 0.5379340052604675 - score 0.75\n",
-      "2021-09-08 01:32:54,119 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:32:54,195 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:54,384 epoch 4 - iter 3/32 - loss 0.00294905 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 01:32:54,544 epoch 4 - iter 6/32 - loss 0.03269417 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 01:32:54,710 epoch 4 - iter 9/32 - loss 0.05844427 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:32:54,865 epoch 4 - iter 12/32 - loss 0.04691980 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 01:32:55,015 epoch 4 - iter 15/32 - loss 0.09428338 - samples/sec: 20.12 - lr: 0.020000\n",
-      "2021-09-08 01:32:55,573 epoch 4 - iter 18/32 - loss 0.07968695 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 01:32:55,712 epoch 4 - iter 21/32 - loss 0.07077706 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 01:32:55,870 epoch 4 - iter 24/32 - loss 0.06831417 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,035 epoch 4 - iter 27/32 - loss 0.07667661 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,182 epoch 4 - iter 30/32 - loss 0.07138957 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:56,308 EPOCH 4 done: loss 0.0796 - lr 0.0200000\n",
-      "2021-09-08 01:32:56,372 DEV : loss 0.6019312143325806 - score 0.75\n",
-      "2021-09-08 01:32:56,373 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:32:56,376 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:56,566 epoch 5 - iter 3/32 - loss 0.00911676 - samples/sec: 17.05 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,713 epoch 5 - iter 6/32 - loss 0.04256178 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 01:32:56,862 epoch 5 - iter 9/32 - loss 0.02893831 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,032 epoch 5 - iter 12/32 - loss 0.05361655 - samples/sec: 17.70 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,181 epoch 5 - iter 15/32 - loss 0.07956024 - samples/sec: 20.27 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,337 epoch 5 - iter 18/32 - loss 0.06810457 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,502 epoch 5 - iter 21/32 - loss 0.06230813 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,646 epoch 5 - iter 24/32 - loss 0.05611290 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,810 epoch 5 - iter 27/32 - loss 0.05056936 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 01:32:57,954 epoch 5 - iter 30/32 - loss 0.08998102 - samples/sec: 20.82 - lr: 0.020000\n",
-      "2021-09-08 01:32:58,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:58,076 EPOCH 5 done: loss 0.0848 - lr 0.0200000\n",
-      "2021-09-08 01:32:58,159 DEV : loss 0.7066447138786316 - score 0.75\n",
-      "2021-09-08 01:32:58,160 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:32:58,162 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:32:58,354 epoch 6 - iter 3/32 - loss 0.01326240 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 01:32:58,497 epoch 6 - iter 6/32 - loss 0.01052771 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 01:32:58,678 epoch 6 - iter 9/32 - loss 0.00731043 - samples/sec: 16.69 - lr: 0.020000\n",
-      "2021-09-08 01:32:58,826 epoch 6 - iter 12/32 - loss 0.02780287 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 01:32:59,005 epoch 6 - iter 15/32 - loss 0.02233340 - samples/sec: 16.85 - lr: 0.020000\n",
-      "2021-09-08 01:32:59,189 epoch 6 - iter 18/32 - loss 0.07113912 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 01:32:59,394 epoch 6 - iter 21/32 - loss 0.06103714 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 01:32:59,577 epoch 6 - iter 24/32 - loss 0.05365080 - samples/sec: 16.48 - lr: 0.020000\n"
+      "2021-09-21 21:10:46,677 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:47,005 epoch 4 - iter 3/32 - loss 0.43521270 - samples/sec: 10.07 - lr: 0.020000\n",
+      "2021-09-21 21:10:47,301 epoch 4 - iter 6/32 - loss 0.34415155 - samples/sec: 10.14 - lr: 0.020000\n",
+      "2021-09-21 21:10:47,598 epoch 4 - iter 9/32 - loss 0.26362012 - samples/sec: 10.14 - lr: 0.020000\n",
+      "2021-09-21 21:10:47,885 epoch 4 - iter 12/32 - loss 0.27975010 - samples/sec: 10.47 - lr: 0.020000\n",
+      "2021-09-21 21:10:48,211 epoch 4 - iter 15/32 - loss 0.34858716 - samples/sec: 9.22 - lr: 0.020000\n",
+      "2021-09-21 21:10:48,492 epoch 4 - iter 18/32 - loss 0.29227813 - samples/sec: 10.68 - lr: 0.020000\n",
+      "2021-09-21 21:10:48,765 epoch 4 - iter 21/32 - loss 0.26802992 - samples/sec: 11.01 - lr: 0.020000\n",
+      "2021-09-21 21:10:49,014 epoch 4 - iter 24/32 - loss 0.25832478 - samples/sec: 12.12 - lr: 0.020000\n",
+      "2021-09-21 21:10:49,195 epoch 4 - iter 27/32 - loss 0.27938854 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 21:10:49,360 epoch 4 - iter 30/32 - loss 0.27432709 - samples/sec: 18.29 - lr: 0.020000\n",
+      "2021-09-21 21:10:49,469 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:49,470 EPOCH 4 done: loss 0.2592 - lr 0.0200000\n",
+      "2021-09-21 21:10:51,180 DEV : loss 0.7828958034515381 - score 0.75\n",
+      "2021-09-21 21:10:51,181 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:10:51,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:51,403 epoch 5 - iter 3/32 - loss 0.00090361 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 21:10:51,575 epoch 5 - iter 6/32 - loss 0.03188533 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 21:10:51,754 epoch 5 - iter 9/32 - loss 0.07269824 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 21:10:51,921 epoch 5 - iter 12/32 - loss 0.05754236 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 21:10:52,086 epoch 5 - iter 15/32 - loss 0.04820502 - samples/sec: 18.30 - lr: 0.020000\n",
+      "2021-09-21 21:10:52,248 epoch 5 - iter 18/32 - loss 0.04274858 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 21:10:52,410 epoch 5 - iter 21/32 - loss 0.04142043 - samples/sec: 18.63 - lr: 0.020000\n",
+      "2021-09-21 21:10:52,577 epoch 5 - iter 24/32 - loss 0.10854551 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 21:10:52,723 epoch 5 - iter 27/32 - loss 0.21263736 - samples/sec: 20.63 - lr: 0.020000\n",
+      "2021-09-21 21:10:52,861 epoch 5 - iter 30/32 - loss 0.20254563 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 21:10:52,965 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:52,965 EPOCH 5 done: loss 0.1920 - lr 0.0200000\n",
+      "2021-09-21 21:10:53,177 DEV : loss 0.5847264528274536 - score 1.0\n",
+      "2021-09-21 21:10:53,178 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:10:53,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:53,442 epoch 6 - iter 3/32 - loss 0.32215274 - samples/sec: 18.87 - lr: 0.020000\n",
+      "2021-09-21 21:10:53,601 epoch 6 - iter 6/32 - loss 0.16854837 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:10:53,766 epoch 6 - iter 9/32 - loss 0.12314323 - samples/sec: 18.22 - lr: 0.020000\n",
+      "2021-09-21 21:10:53,922 epoch 6 - iter 12/32 - loss 0.09409321 - samples/sec: 19.37 - lr: 0.020000\n",
+      "2021-09-21 21:10:54,088 epoch 6 - iter 15/32 - loss 0.07925528 - samples/sec: 18.14 - lr: 0.020000\n",
+      "2021-09-21 21:10:54,237 epoch 6 - iter 18/32 - loss 0.06621216 - samples/sec: 20.17 - lr: 0.020000\n",
+      "2021-09-21 21:10:54,367 epoch 6 - iter 21/32 - loss 0.05684406 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 21:10:54,512 epoch 6 - iter 24/32 - loss 0.09266818 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 21:10:54,675 epoch 6 - iter 27/32 - loss 0.09518858 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:10:54,843 epoch 6 - iter 30/32 - loss 0.11269618 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:10:54,949 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:32:59,749 epoch 6 - iter 27/32 - loss 0.05602754 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 01:32:59,918 epoch 6 - iter 30/32 - loss 0.05056923 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:33:00,043 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:00,044 EPOCH 6 done: loss 0.0475 - lr 0.0200000\n",
-      "2021-09-08 01:33:00,124 DEV : loss 0.6864331960678101 - score 0.75\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:33:00,125 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:33:00,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:00,302 epoch 7 - iter 3/32 - loss 0.00082017 - samples/sec: 18.76 - lr: 0.010000\n",
-      "2021-09-08 01:33:00,467 epoch 7 - iter 6/32 - loss 0.00271062 - samples/sec: 18.27 - lr: 0.010000\n",
-      "2021-09-08 01:33:00,635 epoch 7 - iter 9/32 - loss 0.00193759 - samples/sec: 17.97 - lr: 0.010000\n",
-      "2021-09-08 01:33:00,801 epoch 7 - iter 12/32 - loss 0.16844076 - samples/sec: 18.12 - lr: 0.010000\n",
-      "2021-09-08 01:33:00,976 epoch 7 - iter 15/32 - loss 0.13709523 - samples/sec: 17.20 - lr: 0.010000\n",
-      "2021-09-08 01:33:01,155 epoch 7 - iter 18/32 - loss 0.11468344 - samples/sec: 16.89 - lr: 0.010000\n",
-      "2021-09-08 01:33:01,319 epoch 7 - iter 21/32 - loss 0.09907683 - samples/sec: 18.29 - lr: 0.010000\n",
-      "2021-09-08 01:33:01,503 epoch 7 - iter 24/32 - loss 0.08691114 - samples/sec: 16.40 - lr: 0.010000\n",
-      "2021-09-08 01:33:01,695 epoch 7 - iter 27/32 - loss 0.07742971 - samples/sec: 15.67 - lr: 0.010000\n",
-      "2021-09-08 01:33:01,871 epoch 7 - iter 30/32 - loss 0.06994789 - samples/sec: 17.07 - lr: 0.010000\n",
-      "2021-09-08 01:33:01,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:01,989 EPOCH 7 done: loss 0.0656 - lr 0.0100000\n",
-      "2021-09-08 01:33:02,077 DEV : loss 0.8099143505096436 - score 0.75\n",
-      "2021-09-08 01:33:02,079 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:33:02,083 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:02,261 epoch 8 - iter 3/32 - loss 0.00472892 - samples/sec: 18.72 - lr: 0.010000\n",
-      "2021-09-08 01:33:02,405 epoch 8 - iter 6/32 - loss 0.00377411 - samples/sec: 20.93 - lr: 0.010000\n",
-      "2021-09-08 01:33:02,577 epoch 8 - iter 9/32 - loss 0.00290811 - samples/sec: 17.53 - lr: 0.010000\n",
-      "2021-09-08 01:33:02,761 epoch 8 - iter 12/32 - loss 0.00238057 - samples/sec: 16.34 - lr: 0.010000\n",
-      "2021-09-08 01:33:02,925 epoch 8 - iter 15/32 - loss 0.00203961 - samples/sec: 18.35 - lr: 0.010000\n",
-      "2021-09-08 01:33:03,108 epoch 8 - iter 18/32 - loss 0.00218835 - samples/sec: 16.49 - lr: 0.010000\n",
-      "2021-09-08 01:33:03,288 epoch 8 - iter 21/32 - loss 0.00198422 - samples/sec: 16.72 - lr: 0.010000\n",
-      "2021-09-08 01:33:03,456 epoch 8 - iter 24/32 - loss 0.00178474 - samples/sec: 17.98 - lr: 0.010000\n",
-      "2021-09-08 01:33:03,628 epoch 8 - iter 27/32 - loss 0.00161072 - samples/sec: 17.46 - lr: 0.010000\n",
-      "2021-09-08 01:33:03,785 epoch 8 - iter 30/32 - loss 0.00152824 - samples/sec: 19.16 - lr: 0.010000\n",
-      "2021-09-08 01:33:03,890 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:03,890 EPOCH 8 done: loss 0.0014 - lr 0.0100000\n",
-      "2021-09-08 01:33:04,248 DEV : loss 0.8212323784828186 - score 0.75\n",
-      "2021-09-08 01:33:04,249 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:33:04,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:04,647 epoch 9 - iter 3/32 - loss 0.00132129 - samples/sec: 19.01 - lr: 0.010000\n",
-      "2021-09-08 01:33:04,804 epoch 9 - iter 6/32 - loss 0.00147067 - samples/sec: 19.23 - lr: 0.010000\n",
-      "2021-09-08 01:33:04,965 epoch 9 - iter 9/32 - loss 0.00162837 - samples/sec: 18.74 - lr: 0.010000\n",
-      "2021-09-08 01:33:05,121 epoch 9 - iter 12/32 - loss 0.00131679 - samples/sec: 19.31 - lr: 0.010000\n",
-      "2021-09-08 01:33:05,278 epoch 9 - iter 15/32 - loss 0.00123513 - samples/sec: 19.26 - lr: 0.010000\n",
-      "2021-09-08 01:33:05,437 epoch 9 - iter 18/32 - loss 0.00145441 - samples/sec: 18.87 - lr: 0.010000\n",
-      "2021-09-08 01:33:05,594 epoch 9 - iter 21/32 - loss 0.00133953 - samples/sec: 19.25 - lr: 0.010000\n",
-      "2021-09-08 01:33:05,752 epoch 9 - iter 24/32 - loss 0.00129148 - samples/sec: 19.11 - lr: 0.010000\n",
-      "2021-09-08 01:33:05,913 epoch 9 - iter 27/32 - loss 0.00120926 - samples/sec: 18.66 - lr: 0.010000\n",
-      "2021-09-08 01:33:06,070 epoch 9 - iter 30/32 - loss 0.00119475 - samples/sec: 19.24 - lr: 0.010000\n",
-      "2021-09-08 01:33:06,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:06,175 EPOCH 9 done: loss 0.0011 - lr 0.0100000\n",
-      "2021-09-08 01:33:06,468 DEV : loss 0.8051367402076721 - score 0.75\n",
-      "2021-09-08 01:33:06,469 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:33:06,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:06,816 epoch 10 - iter 3/32 - loss 0.00082112 - samples/sec: 18.66 - lr: 0.010000\n",
-      "2021-09-08 01:33:06,976 epoch 10 - iter 6/32 - loss 0.01014609 - samples/sec: 18.78 - lr: 0.010000\n",
-      "2021-09-08 01:33:07,138 epoch 10 - iter 9/32 - loss 0.00883142 - samples/sec: 18.64 - lr: 0.010000\n",
-      "2021-09-08 01:33:07,294 epoch 10 - iter 12/32 - loss 0.00671371 - samples/sec: 19.28 - lr: 0.010000\n",
-      "2021-09-08 01:33:07,450 epoch 10 - iter 15/32 - loss 0.00591603 - samples/sec: 19.31 - lr: 0.010000\n",
-      "2021-09-08 01:33:07,606 epoch 10 - iter 18/32 - loss 0.00496055 - samples/sec: 19.38 - lr: 0.010000\n",
-      "2021-09-08 01:33:07,761 epoch 10 - iter 21/32 - loss 0.00436848 - samples/sec: 19.41 - lr: 0.010000\n",
-      "2021-09-08 01:33:07,922 epoch 10 - iter 24/32 - loss 0.00387854 - samples/sec: 18.66 - lr: 0.010000\n",
-      "2021-09-08 01:33:08,084 epoch 10 - iter 27/32 - loss 0.00351135 - samples/sec: 18.70 - lr: 0.010000\n",
-      "2021-09-08 01:33:08,240 epoch 10 - iter 30/32 - loss 0.00325257 - samples/sec: 19.31 - lr: 0.010000\n",
-      "2021-09-08 01:33:08,344 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:08,344 EPOCH 10 done: loss 0.0031 - lr 0.0100000\n",
-      "2021-09-08 01:33:08,416 DEV : loss 0.8832200765609741 - score 0.75\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:33:08,417 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:33:12,399 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:12,400 Testing using best model ...\n",
-      "2021-09-08 01:33:12,401 loading file temp/best-model.pt\n",
+      "2021-09-21 21:10:54,950 EPOCH 6 done: loss 0.1059 - lr 0.0200000\n",
+      "2021-09-21 21:10:55,151 DEV : loss 1.2047182321548462 - score 0.75\n",
+      "2021-09-21 21:10:55,152 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:10:55,241 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:55,425 epoch 7 - iter 3/32 - loss 0.01081119 - samples/sec: 18.48 - lr: 0.020000\n",
+      "2021-09-21 21:10:55,590 epoch 7 - iter 6/32 - loss 0.01310831 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 21:10:55,745 epoch 7 - iter 9/32 - loss 0.01095590 - samples/sec: 19.48 - lr: 0.020000\n",
+      "2021-09-21 21:10:55,876 epoch 7 - iter 12/32 - loss 0.00842154 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 21:10:56,019 epoch 7 - iter 15/32 - loss 0.01520316 - samples/sec: 21.16 - lr: 0.020000\n",
+      "2021-09-21 21:10:56,171 epoch 7 - iter 18/32 - loss 0.01410038 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 21:10:56,325 epoch 7 - iter 21/32 - loss 0.02238803 - samples/sec: 19.56 - lr: 0.020000\n",
+      "2021-09-21 21:10:56,487 epoch 7 - iter 24/32 - loss 0.03743241 - samples/sec: 18.55 - lr: 0.020000\n",
+      "2021-09-21 21:10:56,657 epoch 7 - iter 27/32 - loss 0.03339639 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:10:56,819 epoch 7 - iter 30/32 - loss 0.03020217 - samples/sec: 18.56 - lr: 0.020000\n",
+      "2021-09-21 21:10:56,926 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:56,926 EPOCH 7 done: loss 0.0284 - lr 0.0200000\n",
+      "2021-09-21 21:10:59,325 DEV : loss 0.7070633172988892 - score 1.0\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:10:59,326 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:11:01,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:02,089 epoch 8 - iter 3/32 - loss 0.00206234 - samples/sec: 22.19 - lr: 0.010000\n",
+      "2021-09-21 21:11:02,233 epoch 8 - iter 6/32 - loss 0.02467313 - samples/sec: 20.98 - lr: 0.010000\n",
+      "2021-09-21 21:11:02,379 epoch 8 - iter 9/32 - loss 0.02073413 - samples/sec: 20.69 - lr: 0.010000\n",
+      "2021-09-21 21:11:02,518 epoch 8 - iter 12/32 - loss 0.01568631 - samples/sec: 21.65 - lr: 0.010000\n",
+      "2021-09-21 21:11:02,665 epoch 8 - iter 15/32 - loss 0.02516075 - samples/sec: 20.51 - lr: 0.010000\n",
+      "2021-09-21 21:11:02,810 epoch 8 - iter 18/32 - loss 0.02114898 - samples/sec: 20.82 - lr: 0.010000\n",
+      "2021-09-21 21:11:02,955 epoch 8 - iter 21/32 - loss 0.01881028 - samples/sec: 20.91 - lr: 0.010000\n",
+      "2021-09-21 21:11:03,089 epoch 8 - iter 24/32 - loss 0.01762303 - samples/sec: 22.60 - lr: 0.010000\n",
+      "2021-09-21 21:11:03,225 epoch 8 - iter 27/32 - loss 0.02273738 - samples/sec: 22.14 - lr: 0.010000\n",
+      "2021-09-21 21:11:03,355 epoch 8 - iter 30/32 - loss 0.02073351 - samples/sec: 23.06 - lr: 0.010000\n",
+      "2021-09-21 21:11:03,443 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:03,444 EPOCH 8 done: loss 0.0195 - lr 0.0100000\n",
+      "2021-09-21 21:11:03,634 DEV : loss 1.0608856678009033 - score 0.75\n",
+      "2021-09-21 21:11:03,635 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:11:03,719 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:03,865 epoch 9 - iter 3/32 - loss 0.00502943 - samples/sec: 22.72 - lr: 0.010000\n",
+      "2021-09-21 21:11:03,994 epoch 9 - iter 6/32 - loss 0.00267367 - samples/sec: 23.27 - lr: 0.010000\n",
+      "2021-09-21 21:11:04,123 epoch 9 - iter 9/32 - loss 0.00223738 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 21:11:04,255 epoch 9 - iter 12/32 - loss 0.00219651 - samples/sec: 22.90 - lr: 0.010000\n",
+      "2021-09-21 21:11:04,383 epoch 9 - iter 15/32 - loss 0.00183664 - samples/sec: 23.46 - lr: 0.010000\n",
+      "2021-09-21 21:11:04,515 epoch 9 - iter 18/32 - loss 0.00171043 - samples/sec: 22.95 - lr: 0.010000\n",
+      "2021-09-21 21:11:04,646 epoch 9 - iter 21/32 - loss 0.00160317 - samples/sec: 23.03 - lr: 0.010000\n",
+      "2021-09-21 21:11:04,776 epoch 9 - iter 24/32 - loss 0.00144387 - samples/sec: 23.14 - lr: 0.010000\n",
+      "2021-09-21 21:11:04,904 epoch 9 - iter 27/32 - loss 0.00133922 - samples/sec: 23.50 - lr: 0.010000\n",
+      "2021-09-21 21:11:05,034 epoch 9 - iter 30/32 - loss 0.00150378 - samples/sec: 23.13 - lr: 0.010000\n",
+      "2021-09-21 21:11:05,124 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:05,125 EPOCH 9 done: loss 0.0132 - lr 0.0100000\n",
+      "2021-09-21 21:11:05,301 DEV : loss 0.8304525017738342 - score 0.5\n",
+      "2021-09-21 21:11:05,302 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:11:05,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:05,541 epoch 10 - iter 3/32 - loss 0.00302132 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 21:11:05,674 epoch 10 - iter 6/32 - loss 0.03586257 - samples/sec: 22.67 - lr: 0.010000\n",
+      "2021-09-21 21:11:05,803 epoch 10 - iter 9/32 - loss 0.02407276 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 21:11:05,937 epoch 10 - iter 12/32 - loss 0.01847824 - samples/sec: 22.53 - lr: 0.010000\n",
+      "2021-09-21 21:11:06,067 epoch 10 - iter 15/32 - loss 0.01524429 - samples/sec: 23.11 - lr: 0.010000\n",
+      "2021-09-21 21:11:06,199 epoch 10 - iter 18/32 - loss 0.01276773 - samples/sec: 22.75 - lr: 0.010000\n",
+      "2021-09-21 21:11:06,330 epoch 10 - iter 21/32 - loss 0.01100461 - samples/sec: 23.06 - lr: 0.010000\n",
+      "2021-09-21 21:11:06,463 epoch 10 - iter 24/32 - loss 0.01461401 - samples/sec: 22.65 - lr: 0.010000\n",
+      "2021-09-21 21:11:06,596 epoch 10 - iter 27/32 - loss 0.01319693 - samples/sec: 22.69 - lr: 0.010000\n",
+      "2021-09-21 21:11:06,734 epoch 10 - iter 30/32 - loss 0.01195024 - samples/sec: 21.92 - lr: 0.010000\n",
+      "2021-09-21 21:11:06,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:06,824 EPOCH 10 done: loss 0.0112 - lr 0.0100000\n",
+      "2021-09-21 21:11:06,993 DEV : loss 0.7465540766716003 - score 0.75\n",
+      "2021-09-21 21:11:06,994 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:11:14,265 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:14,266 Testing using best model ...\n",
+      "2021-09-21 21:11:14,267 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:33:17,380 \t1.0\n",
-      "2021-09-08 01:33:17,380 \n",
+      "2021-09-21 21:11:20,848 \t0.75\n",
+      "2021-09-21 21:11:20,849 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.75\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.4167\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     1.0000    1.0000    1.0000         1\n",
-      "this text expresses optimism     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses anger     1.0000    1.0000    1.0000         1\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
+      "this text expresses optimism     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses anger     0.5000    1.0000    0.6667         1\n",
       "     this text expresses joy     1.0000    1.0000    1.0000         2\n",
       "\n",
-      "                   micro avg     1.0000    1.0000    1.0000         4\n",
-      "                   macro avg     0.7500    0.7500    0.7500         4\n",
-      "                weighted avg     1.0000    1.0000    1.0000         4\n",
-      "                 samples avg     1.0000    1.0000    1.0000         4\n",
+      "                   micro avg     0.7500    0.7500    0.7500         4\n",
+      "                   macro avg     0.3750    0.5000    0.4167         4\n",
+      "                weighted avg     0.6250    0.7500    0.6667         4\n",
+      "                 samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 01:33:17,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:27,092 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:11:20,849 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:39,052 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:33:31,536 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:11:43,767 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 42248.17it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 39652.03it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:31,539 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 01:33:31,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:31,731 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:11:43,769 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 21:11:43,778 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:43,780 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4334,24 +4344,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:31,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:31,732 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:33:31,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:31,732 Parameters:\n",
-      "2021-09-08 01:33:31,732  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:33:31,733  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:33:31,733  - patience: \"3\"\n",
-      "2021-09-08 01:33:31,733  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:33:31,734  - max_epochs: \"10\"\n",
-      "2021-09-08 01:33:31,734  - shuffle: \"True\"\n",
-      "2021-09-08 01:33:31,734  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:33:31,734  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:33:31,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:31,735 Model training base path: \"temp\"\n",
-      "2021-09-08 01:33:31,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:31,736 Device: cuda:0\n",
-      "2021-09-08 01:33:31,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:31,736 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:11:43,780 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:43,781 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:11:43,781 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:43,781 Parameters:\n",
+      "2021-09-21 21:11:43,781  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:11:43,782  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:11:43,782  - patience: \"3\"\n",
+      "2021-09-21 21:11:43,782  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:11:43,782  - max_epochs: \"10\"\n",
+      "2021-09-21 21:11:43,783  - shuffle: \"True\"\n",
+      "2021-09-21 21:11:43,783  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:11:43,783  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:11:43,784 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:43,784 Model training base path: \"temp\"\n",
+      "2021-09-21 21:11:43,784 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:43,784 Device: cuda:0\n",
+      "2021-09-21 21:11:43,785 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:43,785 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:11:43,792 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4365,213 +4376,208 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:31,827 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:32,022 epoch 1 - iter 3/32 - loss 0.47656434 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,167 epoch 1 - iter 6/32 - loss 0.49112139 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,343 epoch 1 - iter 9/32 - loss 0.53102874 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,505 epoch 1 - iter 12/32 - loss 0.89799983 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,657 epoch 1 - iter 15/32 - loss 0.87721639 - samples/sec: 19.86 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,858 epoch 1 - iter 18/32 - loss 0.90008734 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,006 epoch 1 - iter 21/32 - loss 0.83587773 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,207 epoch 1 - iter 24/32 - loss 0.76429179 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,370 epoch 1 - iter 27/32 - loss 0.76322734 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,531 epoch 1 - iter 30/32 - loss 0.76025518 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,676 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:33,677 EPOCH 1 done: loss 0.7673 - lr 0.0200000\n",
-      "2021-09-08 01:33:33,743 DEV : loss 0.5577859878540039 - score 0.5\n",
-      "2021-09-08 01:33:33,743 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:33:40,582 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:40,805 epoch 2 - iter 3/32 - loss 0.42206985 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 01:33:40,951 epoch 2 - iter 6/32 - loss 0.60235447 - samples/sec: 20.64 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,132 epoch 2 - iter 9/32 - loss 0.54054906 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,318 epoch 2 - iter 12/32 - loss 0.50007022 - samples/sec: 16.16 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,464 epoch 2 - iter 15/32 - loss 0.55021740 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,643 epoch 2 - iter 18/32 - loss 0.54989514 - samples/sec: 16.77 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,800 epoch 2 - iter 21/32 - loss 0.54641972 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,972 epoch 2 - iter 24/32 - loss 0.51444283 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,130 epoch 2 - iter 27/32 - loss 0.47244542 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,281 epoch 2 - iter 30/32 - loss 0.50252790 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,411 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:42,412 EPOCH 2 done: loss 0.4782 - lr 0.0200000\n",
-      "2021-09-08 01:33:42,507 DEV : loss 0.7360023856163025 - score 0.75\n",
-      "2021-09-08 01:33:42,508 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:11:44,034 epoch 1 - iter 3/32 - loss 0.25179243 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:11:44,251 epoch 1 - iter 6/32 - loss 0.37327333 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 21:11:44,454 epoch 1 - iter 9/32 - loss 0.52691178 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:11:44,657 epoch 1 - iter 12/32 - loss 0.68052444 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:11:44,869 epoch 1 - iter 15/32 - loss 0.65273096 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 21:11:45,084 epoch 1 - iter 18/32 - loss 0.77640923 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:11:45,305 epoch 1 - iter 21/32 - loss 0.71569844 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 21:11:45,491 epoch 1 - iter 24/32 - loss 0.62917259 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 21:11:45,689 epoch 1 - iter 27/32 - loss 0.66420768 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 21:11:45,880 epoch 1 - iter 30/32 - loss 0.63298468 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 21:11:46,008 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:46,008 EPOCH 1 done: loss 0.6212 - lr 0.0200000\n",
+      "2021-09-21 21:11:46,084 DEV : loss 0.49044057726860046 - score 1.0\n",
+      "2021-09-21 21:11:46,085 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:33:49,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:49,617 epoch 3 - iter 3/32 - loss 0.77319662 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:33:49,759 epoch 3 - iter 6/32 - loss 0.45624614 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:33:49,920 epoch 3 - iter 9/32 - loss 0.35824524 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 01:33:50,112 epoch 3 - iter 12/32 - loss 0.36283411 - samples/sec: 15.68 - lr: 0.020000\n",
-      "2021-09-08 01:33:50,257 epoch 3 - iter 15/32 - loss 0.39303460 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 01:33:50,450 epoch 3 - iter 18/32 - loss 0.39815234 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 01:33:50,596 epoch 3 - iter 21/32 - loss 0.40913412 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 01:33:50,776 epoch 3 - iter 24/32 - loss 0.38977902 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:33:50,941 epoch 3 - iter 27/32 - loss 0.39037174 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:33:51,091 epoch 3 - iter 30/32 - loss 0.38981669 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 01:33:51,240 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:51,241 EPOCH 3 done: loss 0.3909 - lr 0.0200000\n",
-      "2021-09-08 01:33:51,797 DEV : loss 0.8167864084243774 - score 0.5\n",
-      "2021-09-08 01:33:51,798 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:33:51,800 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:51,973 epoch 4 - iter 3/32 - loss 0.14320668 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,125 epoch 4 - iter 6/32 - loss 0.15306082 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,304 epoch 4 - iter 9/32 - loss 0.10362381 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,451 epoch 4 - iter 12/32 - loss 0.17121414 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,608 epoch 4 - iter 15/32 - loss 0.13902437 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,756 epoch 4 - iter 18/32 - loss 0.24629772 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,928 epoch 4 - iter 21/32 - loss 0.22896986 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,099 epoch 4 - iter 24/32 - loss 0.25507903 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,239 epoch 4 - iter 27/32 - loss 0.24902135 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,437 epoch 4 - iter 30/32 - loss 0.31493232 - samples/sec: 15.16 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:53,532 EPOCH 4 done: loss 0.3200 - lr 0.0200000\n",
-      "2021-09-08 01:33:53,609 DEV : loss 0.5641918778419495 - score 0.75\n",
-      "2021-09-08 01:33:53,609 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:04,701 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:04,916 epoch 5 - iter 3/32 - loss 0.00531210 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:34:05,062 epoch 5 - iter 6/32 - loss 0.20345398 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 01:34:05,240 epoch 5 - iter 9/32 - loss 0.14680246 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 01:34:05,408 epoch 5 - iter 12/32 - loss 0.13085705 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 01:34:05,563 epoch 5 - iter 15/32 - loss 0.16861491 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 01:34:05,746 epoch 5 - iter 18/32 - loss 0.15753033 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:34:05,886 epoch 5 - iter 21/32 - loss 0.15449498 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 01:34:06,070 epoch 5 - iter 24/32 - loss 0.14153313 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 01:34:06,212 epoch 5 - iter 27/32 - loss 0.15690650 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 01:34:06,390 epoch 5 - iter 30/32 - loss 0.19041950 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 01:34:06,485 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:06,485 EPOCH 5 done: loss 0.1831 - lr 0.0200000\n",
-      "2021-09-08 01:34:06,905 DEV : loss 0.47327080368995667 - score 0.75\n",
-      "2021-09-08 01:34:07,098 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:14,343 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:14,519 epoch 6 - iter 3/32 - loss 0.06835654 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:34:14,680 epoch 6 - iter 6/32 - loss 0.09251610 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 01:34:14,840 epoch 6 - iter 9/32 - loss 0.12138673 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 01:34:14,989 epoch 6 - iter 12/32 - loss 0.21048780 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 01:34:15,178 epoch 6 - iter 15/32 - loss 0.18064866 - samples/sec: 15.98 - lr: 0.020000\n",
-      "2021-09-08 01:34:15,316 epoch 6 - iter 18/32 - loss 0.17177113 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 01:34:15,492 epoch 6 - iter 21/32 - loss 0.15042221 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 01:34:15,643 epoch 6 - iter 24/32 - loss 0.13758638 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 01:34:15,790 epoch 6 - iter 27/32 - loss 0.12556856 - samples/sec: 20.48 - lr: 0.020000\n"
+      "2021-09-21 21:11:53,119 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:53,287 epoch 2 - iter 3/32 - loss 0.00807517 - samples/sec: 21.13 - lr: 0.020000\n",
+      "2021-09-21 21:11:53,450 epoch 2 - iter 6/32 - loss 0.61958803 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 21:11:53,600 epoch 2 - iter 9/32 - loss 0.62100765 - samples/sec: 20.20 - lr: 0.020000\n",
+      "2021-09-21 21:11:53,759 epoch 2 - iter 12/32 - loss 0.55300629 - samples/sec: 18.87 - lr: 0.020000\n",
+      "2021-09-21 21:11:53,926 epoch 2 - iter 15/32 - loss 0.50005398 - samples/sec: 18.06 - lr: 0.020000\n",
+      "2021-09-21 21:11:54,072 epoch 2 - iter 18/32 - loss 0.50797269 - samples/sec: 20.55 - lr: 0.020000\n",
+      "2021-09-21 21:11:54,224 epoch 2 - iter 21/32 - loss 0.55599281 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 21:11:54,378 epoch 2 - iter 24/32 - loss 0.50911142 - samples/sec: 19.52 - lr: 0.020000\n",
+      "2021-09-21 21:11:54,533 epoch 2 - iter 27/32 - loss 0.50010165 - samples/sec: 19.39 - lr: 0.020000\n",
+      "2021-09-21 21:11:54,695 epoch 2 - iter 30/32 - loss 0.49391772 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 21:11:54,796 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:54,797 EPOCH 2 done: loss 0.4669 - lr 0.0200000\n",
+      "2021-09-21 21:12:02,046 DEV : loss 0.9204941391944885 - score 0.25\n",
+      "2021-09-21 21:12:02,047 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:12:02,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:02,299 epoch 3 - iter 3/32 - loss 0.40637292 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,512 epoch 3 - iter 6/32 - loss 0.31434525 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,695 epoch 3 - iter 9/32 - loss 0.26730262 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,910 epoch 3 - iter 12/32 - loss 0.39788619 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,108 epoch 3 - iter 15/32 - loss 0.35840663 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,283 epoch 3 - iter 18/32 - loss 0.31175440 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,473 epoch 3 - iter 21/32 - loss 0.27794813 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,644 epoch 3 - iter 24/32 - loss 0.31306000 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,818 epoch 3 - iter 27/32 - loss 0.33061331 - samples/sec: 17.38 - lr: 0.020000\n",
+      "2021-09-21 21:12:04,016 epoch 3 - iter 30/32 - loss 0.30048461 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:12:04,130 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:04,131 EPOCH 3 done: loss 0.2819 - lr 0.0200000\n",
+      "2021-09-21 21:12:04,237 DEV : loss 1.2922866344451904 - score 0.25\n",
+      "2021-09-21 21:12:04,238 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:12:04,382 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:04,590 epoch 4 - iter 3/32 - loss 0.13288288 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 21:12:04,759 epoch 4 - iter 6/32 - loss 0.07198861 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 21:12:04,936 epoch 4 - iter 9/32 - loss 0.05001990 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 21:12:05,108 epoch 4 - iter 12/32 - loss 0.03917552 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 21:12:05,288 epoch 4 - iter 15/32 - loss 0.07329491 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 21:12:05,464 epoch 4 - iter 18/32 - loss 0.09198607 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 21:12:05,642 epoch 4 - iter 21/32 - loss 0.08355904 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 21:12:05,821 epoch 4 - iter 24/32 - loss 0.14846559 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 21:12:06,005 epoch 4 - iter 27/32 - loss 0.14904605 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 21:12:06,189 epoch 4 - iter 30/32 - loss 0.17269459 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 21:12:06,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:06,287 EPOCH 4 done: loss 0.1621 - lr 0.0200000\n",
+      "2021-09-21 21:12:06,480 DEV : loss 0.6959068179130554 - score 0.75\n",
+      "2021-09-21 21:12:06,481 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:12:06,570 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:06,742 epoch 5 - iter 3/32 - loss 0.00196859 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 21:12:06,894 epoch 5 - iter 6/32 - loss 0.01434140 - samples/sec: 19.89 - lr: 0.020000\n",
+      "2021-09-21 21:12:07,055 epoch 5 - iter 9/32 - loss 0.22821640 - samples/sec: 18.65 - lr: 0.020000\n",
+      "2021-09-21 21:12:07,209 epoch 5 - iter 12/32 - loss 0.17237592 - samples/sec: 19.59 - lr: 0.020000\n",
+      "2021-09-21 21:12:07,366 epoch 5 - iter 15/32 - loss 0.14307466 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 21:12:07,516 epoch 5 - iter 18/32 - loss 0.14555492 - samples/sec: 20.02 - lr: 0.020000\n",
+      "2021-09-21 21:12:07,663 epoch 5 - iter 21/32 - loss 0.12615131 - samples/sec: 20.47 - lr: 0.020000\n",
+      "2021-09-21 21:12:07,823 epoch 5 - iter 24/32 - loss 0.11180791 - samples/sec: 18.81 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,001 epoch 5 - iter 27/32 - loss 0.10115719 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,240 epoch 5 - iter 30/32 - loss 0.17102198 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,378 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:08,378 EPOCH 5 done: loss 0.1609 - lr 0.0200000\n",
+      "2021-09-21 21:12:08,532 DEV : loss 0.6803886890411377 - score 0.75\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:12:08,532 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:12:08,534 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:08,783 epoch 6 - iter 3/32 - loss 0.00070810 - samples/sec: 13.49 - lr: 0.010000\n",
+      "2021-09-21 21:12:09,051 epoch 6 - iter 6/32 - loss 0.00110030 - samples/sec: 11.25 - lr: 0.010000\n",
+      "2021-09-21 21:12:09,282 epoch 6 - iter 9/32 - loss 0.13435738 - samples/sec: 12.98 - lr: 0.010000\n",
+      "2021-09-21 21:12:09,501 epoch 6 - iter 12/32 - loss 0.10631961 - samples/sec: 13.79 - lr: 0.010000\n",
+      "2021-09-21 21:12:09,723 epoch 6 - iter 15/32 - loss 0.08543166 - samples/sec: 13.54 - lr: 0.010000\n",
+      "2021-09-21 21:12:09,899 epoch 6 - iter 18/32 - loss 0.07162351 - samples/sec: 17.08 - lr: 0.010000\n",
+      "2021-09-21 21:12:10,145 epoch 6 - iter 21/32 - loss 0.10953635 - samples/sec: 12.22 - lr: 0.010000\n",
+      "2021-09-21 21:12:10,333 epoch 6 - iter 24/32 - loss 0.10100746 - samples/sec: 15.99 - lr: 0.010000\n",
+      "2021-09-21 21:12:10,547 epoch 6 - iter 27/32 - loss 0.10014232 - samples/sec: 14.06 - lr: 0.010000\n",
+      "2021-09-21 21:12:10,761 epoch 6 - iter 30/32 - loss 0.09069924 - samples/sec: 14.10 - lr: 0.010000\n",
+      "2021-09-21 21:12:10,908 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:34:15,980 epoch 6 - iter 30/32 - loss 0.11615962 - samples/sec: 15.79 - lr: 0.020000\n",
-      "2021-09-08 01:34:16,074 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:16,075 EPOCH 6 done: loss 0.1282 - lr 0.0200000\n",
-      "2021-09-08 01:34:16,247 DEV : loss 0.38594967126846313 - score 0.75\n",
-      "2021-09-08 01:34:16,248 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:22,214 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:22,408 epoch 7 - iter 3/32 - loss 0.02118875 - samples/sec: 16.95 - lr: 0.020000\n",
-      "2021-09-08 01:34:22,561 epoch 7 - iter 6/32 - loss 0.01500578 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 01:34:22,784 epoch 7 - iter 9/32 - loss 0.07722818 - samples/sec: 13.50 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,059 epoch 7 - iter 12/32 - loss 0.06256595 - samples/sec: 10.95 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,239 epoch 7 - iter 15/32 - loss 0.06916494 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,424 epoch 7 - iter 18/32 - loss 0.08000513 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,592 epoch 7 - iter 21/32 - loss 0.06876911 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,779 epoch 7 - iter 24/32 - loss 0.06026621 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,989 epoch 7 - iter 27/32 - loss 0.07311319 - samples/sec: 14.36 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,245 epoch 7 - iter 30/32 - loss 0.09851003 - samples/sec: 11.74 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,360 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:24,361 EPOCH 7 done: loss 0.0933 - lr 0.0200000\n",
-      "2021-09-08 01:34:24,465 DEV : loss 0.42176729440689087 - score 0.75\n",
-      "2021-09-08 01:34:24,466 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:34:24,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:24,689 epoch 8 - iter 3/32 - loss 0.02096597 - samples/sec: 14.69 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,856 epoch 8 - iter 6/32 - loss 0.01065829 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,045 epoch 8 - iter 9/32 - loss 0.01049503 - samples/sec: 15.91 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,248 epoch 8 - iter 12/32 - loss 0.01295451 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,499 epoch 8 - iter 15/32 - loss 0.01514471 - samples/sec: 12.00 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,675 epoch 8 - iter 18/32 - loss 0.01506063 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,849 epoch 8 - iter 21/32 - loss 0.05056072 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,043 epoch 8 - iter 24/32 - loss 0.06613574 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,198 epoch 8 - iter 27/32 - loss 0.05894649 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,395 epoch 8 - iter 30/32 - loss 0.05337888 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,546 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:26,547 EPOCH 8 done: loss 0.0501 - lr 0.0200000\n",
-      "2021-09-08 01:34:26,619 DEV : loss 0.22019214928150177 - score 0.75\n",
-      "2021-09-08 01:34:26,621 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:33,123 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:33,312 epoch 9 - iter 3/32 - loss 0.00666256 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:34:33,459 epoch 9 - iter 6/32 - loss 0.00547421 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 01:34:33,678 epoch 9 - iter 9/32 - loss 0.01023875 - samples/sec: 13.75 - lr: 0.020000\n",
-      "2021-09-08 01:34:33,868 epoch 9 - iter 12/32 - loss 0.01075626 - samples/sec: 15.89 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,045 epoch 9 - iter 15/32 - loss 0.00906453 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,237 epoch 9 - iter 18/32 - loss 0.01251423 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,409 epoch 9 - iter 21/32 - loss 0.01263569 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,592 epoch 9 - iter 24/32 - loss 0.01166586 - samples/sec: 16.53 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,782 epoch 9 - iter 27/32 - loss 0.01039542 - samples/sec: 15.82 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,957 epoch 9 - iter 30/32 - loss 0.00947996 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,082 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:35,082 EPOCH 9 done: loss 0.0384 - lr 0.0200000\n",
-      "2021-09-08 01:34:35,189 DEV : loss 0.12943041324615479 - score 1.0\n",
-      "2021-09-08 01:34:35,190 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:41,594 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:41,795 epoch 10 - iter 3/32 - loss 0.01361140 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 01:34:41,968 epoch 10 - iter 6/32 - loss 0.00701215 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 01:34:42,155 epoch 10 - iter 9/32 - loss 0.00473246 - samples/sec: 16.06 - lr: 0.020000\n",
-      "2021-09-08 01:34:42,338 epoch 10 - iter 12/32 - loss 0.00410332 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:34:42,564 epoch 10 - iter 15/32 - loss 0.00371847 - samples/sec: 13.31 - lr: 0.020000\n",
-      "2021-09-08 01:34:42,828 epoch 10 - iter 18/32 - loss 0.00311964 - samples/sec: 11.41 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,022 epoch 10 - iter 21/32 - loss 0.01257721 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,179 epoch 10 - iter 24/32 - loss 0.01113361 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,348 epoch 10 - iter 27/32 - loss 0.01011143 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,526 epoch 10 - iter 30/32 - loss 0.00984390 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,628 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:43,628 EPOCH 10 done: loss 0.0094 - lr 0.0200000\n",
-      "2021-09-08 01:34:43,813 DEV : loss 0.500827431678772 - score 1.0\n",
-      "2021-09-08 01:34:43,815 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:34:56,714 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:56,715 Testing using best model ...\n",
-      "2021-09-08 01:34:56,716 loading file temp/best-model.pt\n",
+      "2021-09-21 21:12:10,908 EPOCH 6 done: loss 0.0876 - lr 0.0100000\n",
+      "2021-09-21 21:12:11,076 DEV : loss 1.2372658252716064 - score 0.5\n",
+      "2021-09-21 21:12:11,077 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:12:11,079 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:11,349 epoch 7 - iter 3/32 - loss 0.00081666 - samples/sec: 12.85 - lr: 0.010000\n",
+      "2021-09-21 21:12:11,583 epoch 7 - iter 6/32 - loss 0.02696627 - samples/sec: 12.87 - lr: 0.010000\n",
+      "2021-09-21 21:12:11,784 epoch 7 - iter 9/32 - loss 0.02098756 - samples/sec: 15.01 - lr: 0.010000\n",
+      "2021-09-21 21:12:12,023 epoch 7 - iter 12/32 - loss 0.08593226 - samples/sec: 12.58 - lr: 0.010000\n",
+      "2021-09-21 21:12:12,226 epoch 7 - iter 15/32 - loss 0.07027083 - samples/sec: 14.80 - lr: 0.010000\n",
+      "2021-09-21 21:12:12,433 epoch 7 - iter 18/32 - loss 0.05930431 - samples/sec: 14.58 - lr: 0.010000\n",
+      "2021-09-21 21:12:12,694 epoch 7 - iter 21/32 - loss 0.05113861 - samples/sec: 11.51 - lr: 0.010000\n",
+      "2021-09-21 21:12:12,931 epoch 7 - iter 24/32 - loss 0.04481036 - samples/sec: 12.71 - lr: 0.010000\n",
+      "2021-09-21 21:12:13,161 epoch 7 - iter 27/32 - loss 0.04020261 - samples/sec: 13.05 - lr: 0.010000\n",
+      "2021-09-21 21:12:13,373 epoch 7 - iter 30/32 - loss 0.03693172 - samples/sec: 14.19 - lr: 0.010000\n",
+      "2021-09-21 21:12:13,519 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:13,519 EPOCH 7 done: loss 0.0348 - lr 0.0100000\n",
+      "2021-09-21 21:12:13,645 DEV : loss 1.3090544939041138 - score 0.5\n",
+      "2021-09-21 21:12:13,646 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:12:13,648 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:13,854 epoch 8 - iter 3/32 - loss 0.00100589 - samples/sec: 17.59 - lr: 0.010000\n",
+      "2021-09-21 21:12:14,032 epoch 8 - iter 6/32 - loss 0.00924394 - samples/sec: 16.94 - lr: 0.010000\n",
+      "2021-09-21 21:12:14,214 epoch 8 - iter 9/32 - loss 0.00817365 - samples/sec: 16.61 - lr: 0.010000\n",
+      "2021-09-21 21:12:14,389 epoch 8 - iter 12/32 - loss 0.00670794 - samples/sec: 17.15 - lr: 0.010000\n",
+      "2021-09-21 21:12:14,560 epoch 8 - iter 15/32 - loss 0.00554924 - samples/sec: 17.62 - lr: 0.010000\n",
+      "2021-09-21 21:12:14,734 epoch 8 - iter 18/32 - loss 0.00478237 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 21:12:14,924 epoch 8 - iter 21/32 - loss 0.00415553 - samples/sec: 15.88 - lr: 0.010000\n",
+      "2021-09-21 21:12:15,108 epoch 8 - iter 24/32 - loss 0.00379078 - samples/sec: 16.36 - lr: 0.010000\n",
+      "2021-09-21 21:12:15,279 epoch 8 - iter 27/32 - loss 0.00367884 - samples/sec: 17.62 - lr: 0.010000\n",
+      "2021-09-21 21:12:15,445 epoch 8 - iter 30/32 - loss 0.00360470 - samples/sec: 18.16 - lr: 0.010000\n",
+      "2021-09-21 21:12:15,544 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:16,341 EPOCH 8 done: loss 0.0035 - lr 0.0100000\n",
+      "2021-09-21 21:12:16,449 DEV : loss 1.19693124294281 - score 0.5\n",
+      "2021-09-21 21:12:16,450 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:12:17,260 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:17,478 epoch 9 - iter 3/32 - loss 0.00573265 - samples/sec: 15.82 - lr: 0.010000\n",
+      "2021-09-21 21:12:17,679 epoch 9 - iter 6/32 - loss 0.05265146 - samples/sec: 14.98 - lr: 0.010000\n",
+      "2021-09-21 21:12:17,888 epoch 9 - iter 9/32 - loss 0.03525645 - samples/sec: 14.44 - lr: 0.010000\n",
+      "2021-09-21 21:12:18,046 epoch 9 - iter 12/32 - loss 0.02782958 - samples/sec: 19.09 - lr: 0.010000\n",
+      "2021-09-21 21:12:18,227 epoch 9 - iter 15/32 - loss 0.02242531 - samples/sec: 16.63 - lr: 0.010000\n",
+      "2021-09-21 21:12:18,441 epoch 9 - iter 18/32 - loss 0.01882092 - samples/sec: 14.02 - lr: 0.010000\n",
+      "2021-09-21 21:12:18,641 epoch 9 - iter 21/32 - loss 0.01636433 - samples/sec: 15.06 - lr: 0.010000\n",
+      "2021-09-21 21:12:18,850 epoch 9 - iter 24/32 - loss 0.01460412 - samples/sec: 14.44 - lr: 0.010000\n",
+      "2021-09-21 21:12:19,041 epoch 9 - iter 27/32 - loss 0.01596389 - samples/sec: 15.77 - lr: 0.010000\n",
+      "2021-09-21 21:12:19,247 epoch 9 - iter 30/32 - loss 0.01462403 - samples/sec: 14.59 - lr: 0.010000\n",
+      "2021-09-21 21:12:19,394 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:19,396 EPOCH 9 done: loss 0.0138 - lr 0.0100000\n",
+      "2021-09-21 21:12:19,552 DEV : loss 1.1992971897125244 - score 0.5\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:12:19,553 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:12:19,556 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:19,787 epoch 10 - iter 3/32 - loss 0.00032438 - samples/sec: 14.77 - lr: 0.005000\n",
+      "2021-09-21 21:12:20,012 epoch 10 - iter 6/32 - loss 0.00959069 - samples/sec: 13.34 - lr: 0.005000\n",
+      "2021-09-21 21:12:20,205 epoch 10 - iter 9/32 - loss 0.00652769 - samples/sec: 15.64 - lr: 0.005000\n",
+      "2021-09-21 21:12:20,442 epoch 10 - iter 12/32 - loss 0.00534990 - samples/sec: 12.67 - lr: 0.005000\n",
+      "2021-09-21 21:12:20,714 epoch 10 - iter 15/32 - loss 0.00435869 - samples/sec: 11.06 - lr: 0.005000\n",
+      "2021-09-21 21:12:20,984 epoch 10 - iter 18/32 - loss 0.00379814 - samples/sec: 11.16 - lr: 0.005000\n",
+      "2021-09-21 21:12:21,231 epoch 10 - iter 21/32 - loss 0.00336582 - samples/sec: 12.17 - lr: 0.005000\n",
+      "2021-09-21 21:12:21,489 epoch 10 - iter 24/32 - loss 0.00308851 - samples/sec: 11.65 - lr: 0.005000\n",
+      "2021-09-21 21:12:21,764 epoch 10 - iter 27/32 - loss 0.00283170 - samples/sec: 10.94 - lr: 0.005000\n",
+      "2021-09-21 21:12:22,031 epoch 10 - iter 30/32 - loss 0.00275489 - samples/sec: 11.25 - lr: 0.005000\n",
+      "2021-09-21 21:12:22,212 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:22,212 EPOCH 10 done: loss 0.0026 - lr 0.0050000\n",
+      "2021-09-21 21:12:22,412 DEV : loss 1.2272155284881592 - score 0.5\n",
+      "2021-09-21 21:12:22,413 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:12:28,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:28,906 Testing using best model ...\n",
+      "2021-09-21 21:12:28,941 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:35:01,935 \t0.75\n",
-      "2021-09-08 01:35:01,936 \n",
+      "2021-09-21 21:12:37,470 \t0.25\n",
+      "2021-09-21 21:12:37,471 \n",
       "Results:\n",
-      "- F-score (micro) 0.75\n",
-      "- F-score (macro) 0.5\n",
-      "- Accuracy 0.75\n",
+      "- F-score (micro) 0.25\n",
+      "- F-score (macro) 0.125\n",
+      "- Accuracy 0.25\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     1.0000    1.0000    1.0000         1\n",
-      "this text expresses optimism     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses anger     1.0000    1.0000    1.0000         2\n",
-      "     this text expresses joy     0.0000    0.0000    0.0000         1\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
+      "this text expresses optimism     0.0000    0.0000    0.0000         2\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
+      "     this text expresses joy     0.3333    1.0000    0.5000         1\n",
       "\n",
-      "                   micro avg     0.7500    0.7500    0.7500         4\n",
-      "                   macro avg     0.5000    0.5000    0.5000         4\n",
-      "                weighted avg     0.7500    0.7500    0.7500         4\n",
-      "                 samples avg     0.7500    0.7500    0.7500         4\n",
+      "                   micro avg     0.2500    0.2500    0.2500         4\n",
+      "                   macro avg     0.0833    0.2500    0.1250         4\n",
+      "                weighted avg     0.0833    0.2500    0.1250         4\n",
+      "                 samples avg     0.2500    0.2500    0.2500         4\n",
       "\n",
-      "2021-09-08 01:35:01,936 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:12,631 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:12:37,471 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:52,626 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:35:16,832 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:12:56,689 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 41699.79it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 28739.05it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:16,835 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 01:35:16,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:16,846 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:12:56,693 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 21:12:56,864 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:56,866 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4884,25 +4890,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:16,846 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:16,847 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:35:16,847 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:16,847 Parameters:\n",
-      "2021-09-08 01:35:16,848  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:35:16,848  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:35:16,848  - patience: \"3\"\n",
-      "2021-09-08 01:35:16,848  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:35:16,849  - max_epochs: \"10\"\n",
-      "2021-09-08 01:35:16,849  - shuffle: \"True\"\n",
-      "2021-09-08 01:35:16,849  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:35:16,850  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:35:16,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:16,850 Model training base path: \"temp\"\n",
-      "2021-09-08 01:35:16,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:16,851 Device: cuda:0\n",
-      "2021-09-08 01:35:16,851 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:16,851 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:35:16,857 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:12:56,866 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:56,867 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:12:56,867 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:56,867 Parameters:\n",
+      "2021-09-21 21:12:56,868  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:12:56,868  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:12:56,868  - patience: \"3\"\n",
+      "2021-09-21 21:12:56,869  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:12:56,869  - max_epochs: \"10\"\n",
+      "2021-09-21 21:12:56,869  - shuffle: \"True\"\n",
+      "2021-09-21 21:12:56,870  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:12:56,870  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:12:56,870 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:56,871 Model training base path: \"temp\"\n",
+      "2021-09-21 21:12:56,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:56,871 Device: cuda:0\n",
+      "2021-09-21 21:12:56,872 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:56,872 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -4916,211 +4921,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:17,052 epoch 1 - iter 3/32 - loss 0.59522674 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 01:35:17,238 epoch 1 - iter 6/32 - loss 0.44095804 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 01:35:17,450 epoch 1 - iter 9/32 - loss 0.50877496 - samples/sec: 14.17 - lr: 0.020000\n",
-      "2021-09-08 01:35:17,633 epoch 1 - iter 12/32 - loss 0.56286312 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:35:17,825 epoch 1 - iter 15/32 - loss 0.65931675 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,044 epoch 1 - iter 18/32 - loss 0.63514693 - samples/sec: 13.77 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,242 epoch 1 - iter 21/32 - loss 0.58719301 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,433 epoch 1 - iter 24/32 - loss 0.51722411 - samples/sec: 15.83 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,637 epoch 1 - iter 27/32 - loss 0.63657779 - samples/sec: 14.74 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,820 epoch 1 - iter 30/32 - loss 0.62578147 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 01:35:18,944 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:18,944 EPOCH 1 done: loss 0.5973 - lr 0.0200000\n",
-      "2021-09-08 01:35:19,047 DEV : loss 1.3793144226074219 - score 0.0\n",
-      "2021-09-08 01:35:19,048 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:35:25,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:25,847 epoch 2 - iter 3/32 - loss 0.77079886 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:35:26,039 epoch 2 - iter 6/32 - loss 0.61456383 - samples/sec: 15.67 - lr: 0.020000\n",
-      "2021-09-08 01:35:26,214 epoch 2 - iter 9/32 - loss 0.59906505 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 01:35:26,389 epoch 2 - iter 12/32 - loss 0.54868145 - samples/sec: 17.19 - lr: 0.020000\n",
-      "2021-09-08 01:35:26,568 epoch 2 - iter 15/32 - loss 0.44547638 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:35:26,745 epoch 2 - iter 18/32 - loss 0.44564144 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:35:26,934 epoch 2 - iter 21/32 - loss 0.48929993 - samples/sec: 15.98 - lr: 0.020000\n",
-      "2021-09-08 01:35:27,134 epoch 2 - iter 24/32 - loss 0.49714062 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 01:35:27,341 epoch 2 - iter 27/32 - loss 0.46121114 - samples/sec: 14.57 - lr: 0.020000\n",
-      "2021-09-08 01:35:27,502 epoch 2 - iter 30/32 - loss 0.43286463 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 01:35:27,630 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:27,631 EPOCH 2 done: loss 0.5102 - lr 0.0200000\n",
-      "2021-09-08 01:35:27,732 DEV : loss 0.8504964113235474 - score 0.5\n",
-      "2021-09-08 01:35:27,732 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:35:31,992 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:32,258 epoch 3 - iter 3/32 - loss 0.95195105 - samples/sec: 14.00 - lr: 0.020000\n",
-      "2021-09-08 01:35:32,441 epoch 3 - iter 6/32 - loss 0.64864919 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:35:32,617 epoch 3 - iter 9/32 - loss 0.50780987 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:35:32,800 epoch 3 - iter 12/32 - loss 0.40585774 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:35:33,007 epoch 3 - iter 15/32 - loss 0.41177741 - samples/sec: 14.50 - lr: 0.020000\n",
-      "2021-09-08 01:35:33,201 epoch 3 - iter 18/32 - loss 0.34547881 - samples/sec: 15.59 - lr: 0.020000\n",
-      "2021-09-08 01:35:33,369 epoch 3 - iter 21/32 - loss 0.30900279 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 01:35:33,532 epoch 3 - iter 24/32 - loss 0.27333425 - samples/sec: 18.41 - lr: 0.020000\n",
-      "2021-09-08 01:35:33,717 epoch 3 - iter 27/32 - loss 0.28347179 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:35:33,960 epoch 3 - iter 30/32 - loss 0.38130157 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 01:35:34,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:34,075 EPOCH 3 done: loss 0.3582 - lr 0.0200000\n",
-      "2021-09-08 01:35:34,267 DEV : loss 0.45580315589904785 - score 0.5\n",
-      "2021-09-08 01:35:34,270 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:12:57,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:57,204 epoch 1 - iter 3/32 - loss 0.12388661 - samples/sec: 21.25 - lr: 0.020000\n",
+      "2021-09-21 21:12:57,346 epoch 1 - iter 6/32 - loss 0.22475110 - samples/sec: 21.24 - lr: 0.020000\n",
+      "2021-09-21 21:12:57,492 epoch 1 - iter 9/32 - loss 0.19078605 - samples/sec: 20.55 - lr: 0.020000\n",
+      "2021-09-21 21:12:57,635 epoch 1 - iter 12/32 - loss 0.30961854 - samples/sec: 21.15 - lr: 0.020000\n",
+      "2021-09-21 21:12:57,795 epoch 1 - iter 15/32 - loss 0.44971041 - samples/sec: 18.80 - lr: 0.020000\n",
+      "2021-09-21 21:12:57,941 epoch 1 - iter 18/32 - loss 0.45432852 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 21:12:58,088 epoch 1 - iter 21/32 - loss 0.52053849 - samples/sec: 20.57 - lr: 0.020000\n",
+      "2021-09-21 21:12:58,240 epoch 1 - iter 24/32 - loss 0.46314359 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 21:12:58,381 epoch 1 - iter 27/32 - loss 0.56156701 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 21:12:58,529 epoch 1 - iter 30/32 - loss 0.63756241 - samples/sec: 20.34 - lr: 0.020000\n",
+      "2021-09-21 21:12:58,626 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:58,627 EPOCH 1 done: loss 0.6143 - lr 0.0200000\n",
+      "2021-09-21 21:12:58,910 DEV : loss 1.1150811910629272 - score 0.5\n",
+      "2021-09-21 21:12:58,910 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:35:39,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:40,206 epoch 4 - iter 3/32 - loss 0.00517711 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 01:35:40,364 epoch 4 - iter 6/32 - loss 0.00969225 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:35:40,548 epoch 4 - iter 9/32 - loss 0.21469934 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:35:40,759 epoch 4 - iter 12/32 - loss 0.26494672 - samples/sec: 14.29 - lr: 0.020000\n",
-      "2021-09-08 01:35:41,000 epoch 4 - iter 15/32 - loss 0.25127265 - samples/sec: 12.49 - lr: 0.020000\n",
-      "2021-09-08 01:35:41,187 epoch 4 - iter 18/32 - loss 0.29713675 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:35:41,344 epoch 4 - iter 21/32 - loss 0.25617204 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 01:35:41,530 epoch 4 - iter 24/32 - loss 0.35018243 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:35:41,709 epoch 4 - iter 27/32 - loss 0.31169708 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 01:35:41,924 epoch 4 - iter 30/32 - loss 0.34042775 - samples/sec: 13.97 - lr: 0.020000\n",
-      "2021-09-08 01:35:42,050 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:42,050 EPOCH 4 done: loss 0.3589 - lr 0.0200000\n",
-      "2021-09-08 01:35:42,171 DEV : loss 0.34378957748413086 - score 0.5\n",
-      "2021-09-08 01:35:42,171 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:35:46,353 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:46,571 epoch 5 - iter 3/32 - loss 0.02355800 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 01:35:46,762 epoch 5 - iter 6/32 - loss 0.02345024 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,005 epoch 5 - iter 9/32 - loss 0.08516081 - samples/sec: 12.39 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,182 epoch 5 - iter 12/32 - loss 0.13460805 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,335 epoch 5 - iter 15/32 - loss 0.10851935 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,506 epoch 5 - iter 18/32 - loss 0.12770550 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,689 epoch 5 - iter 21/32 - loss 0.11120489 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,905 epoch 5 - iter 24/32 - loss 0.09826950 - samples/sec: 13.93 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,092 epoch 5 - iter 27/32 - loss 0.13099054 - samples/sec: 16.11 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,263 epoch 5 - iter 30/32 - loss 0.19630343 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,364 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:48,365 EPOCH 5 done: loss 0.2047 - lr 0.0200000\n",
-      "2021-09-08 01:35:48,457 DEV : loss 0.42573145031929016 - score 0.5\n",
-      "2021-09-08 01:35:48,457 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:35:48,459 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:48,636 epoch 6 - iter 3/32 - loss 0.59735160 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,781 epoch 6 - iter 6/32 - loss 0.30116722 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,974 epoch 6 - iter 9/32 - loss 0.24161902 - samples/sec: 15.57 - lr: 0.020000\n",
-      "2021-09-08 01:35:49,132 epoch 6 - iter 12/32 - loss 0.22476440 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:35:49,313 epoch 6 - iter 15/32 - loss 0.18534671 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 01:35:49,475 epoch 6 - iter 18/32 - loss 0.15637160 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 01:35:49,634 epoch 6 - iter 21/32 - loss 0.13420007 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 01:35:49,780 epoch 6 - iter 24/32 - loss 0.18233890 - samples/sec: 20.57 - lr: 0.020000\n",
-      "2021-09-08 01:35:49,954 epoch 6 - iter 27/32 - loss 0.17120460 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 01:35:50,091 epoch 6 - iter 30/32 - loss 0.15505769 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 01:35:50,221 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:13:02,687 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:03,013 epoch 2 - iter 3/32 - loss 0.89075613 - samples/sec: 10.51 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,286 epoch 2 - iter 6/32 - loss 1.02879959 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,522 epoch 2 - iter 9/32 - loss 0.81646121 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,737 epoch 2 - iter 12/32 - loss 0.80838962 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,931 epoch 2 - iter 15/32 - loss 0.76697725 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 21:13:04,091 epoch 2 - iter 18/32 - loss 0.64168389 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:13:04,269 epoch 2 - iter 21/32 - loss 0.63206811 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:13:04,471 epoch 2 - iter 24/32 - loss 0.60997486 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 21:13:04,674 epoch 2 - iter 27/32 - loss 0.60782369 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:13:04,897 epoch 2 - iter 30/32 - loss 0.55854126 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 21:13:05,032 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:05,033 EPOCH 2 done: loss 0.5249 - lr 0.0200000\n",
+      "2021-09-21 21:13:05,252 DEV : loss 1.5453870296478271 - score 0.25\n",
+      "2021-09-21 21:13:05,253 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:05,335 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:05,571 epoch 3 - iter 3/32 - loss 0.53643677 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:13:05,790 epoch 3 - iter 6/32 - loss 0.38057316 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 21:13:05,990 epoch 3 - iter 9/32 - loss 0.39305959 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 21:13:06,186 epoch 3 - iter 12/32 - loss 0.51556887 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 21:13:06,388 epoch 3 - iter 15/32 - loss 0.45405436 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 21:13:06,602 epoch 3 - iter 18/32 - loss 0.43221019 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 21:13:06,793 epoch 3 - iter 21/32 - loss 0.40746507 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 21:13:06,991 epoch 3 - iter 24/32 - loss 0.39282240 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:13:07,194 epoch 3 - iter 27/32 - loss 0.35383110 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 21:13:07,371 epoch 3 - iter 30/32 - loss 0.31906120 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 21:13:07,504 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:07,505 EPOCH 3 done: loss 0.3083 - lr 0.0200000\n",
+      "2021-09-21 21:13:07,739 DEV : loss 1.4968485832214355 - score 0.5\n",
+      "2021-09-21 21:13:07,743 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:13:07,749 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:08,050 epoch 4 - iter 3/32 - loss 0.15992518 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 21:13:08,332 epoch 4 - iter 6/32 - loss 0.41407949 - samples/sec: 10.67 - lr: 0.020000\n",
+      "2021-09-21 21:13:08,598 epoch 4 - iter 9/32 - loss 0.42172739 - samples/sec: 11.30 - lr: 0.020000\n",
+      "2021-09-21 21:13:08,865 epoch 4 - iter 12/32 - loss 0.33490932 - samples/sec: 11.28 - lr: 0.020000\n",
+      "2021-09-21 21:13:09,107 epoch 4 - iter 15/32 - loss 0.31237703 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 21:13:09,350 epoch 4 - iter 18/32 - loss 0.26414775 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:13:09,596 epoch 4 - iter 21/32 - loss 0.31373058 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 21:13:09,870 epoch 4 - iter 24/32 - loss 0.33615398 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 21:13:10,138 epoch 4 - iter 27/32 - loss 0.29949427 - samples/sec: 11.21 - lr: 0.020000\n",
+      "2021-09-21 21:13:10,409 epoch 4 - iter 30/32 - loss 0.26982415 - samples/sec: 11.10 - lr: 0.020000\n",
+      "2021-09-21 21:13:10,581 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:10,581 EPOCH 4 done: loss 0.2530 - lr 0.0200000\n",
+      "2021-09-21 21:13:10,766 DEV : loss 1.326778531074524 - score 0.5\n",
+      "2021-09-21 21:13:10,771 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:13:10,773 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:11,077 epoch 5 - iter 3/32 - loss 0.02283344 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:13:11,348 epoch 5 - iter 6/32 - loss 0.12573517 - samples/sec: 11.08 - lr: 0.020000\n",
+      "2021-09-21 21:13:11,603 epoch 5 - iter 9/32 - loss 0.14873650 - samples/sec: 11.80 - lr: 0.020000\n",
+      "2021-09-21 21:13:11,873 epoch 5 - iter 12/32 - loss 0.11187152 - samples/sec: 11.15 - lr: 0.020000\n",
+      "2021-09-21 21:13:12,146 epoch 5 - iter 15/32 - loss 0.21805279 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 21:13:12,404 epoch 5 - iter 18/32 - loss 0.18404394 - samples/sec: 11.68 - lr: 0.020000\n",
+      "2021-09-21 21:13:12,680 epoch 5 - iter 21/32 - loss 0.18325138 - samples/sec: 10.89 - lr: 0.020000\n",
+      "2021-09-21 21:13:12,954 epoch 5 - iter 24/32 - loss 0.16744557 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 21:13:13,209 epoch 5 - iter 27/32 - loss 0.14904519 - samples/sec: 11.78 - lr: 0.020000\n",
+      "2021-09-21 21:13:13,440 epoch 5 - iter 30/32 - loss 0.13640158 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 21:13:13,582 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:13,582 EPOCH 5 done: loss 0.1294 - lr 0.0200000\n",
+      "2021-09-21 21:13:13,735 DEV : loss 2.0221452713012695 - score 0.25\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:13:13,736 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:13:13,737 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:14,041 epoch 6 - iter 3/32 - loss 1.11442614 - samples/sec: 11.13 - lr: 0.010000\n",
+      "2021-09-21 21:13:14,277 epoch 6 - iter 6/32 - loss 0.55876683 - samples/sec: 12.73 - lr: 0.010000\n",
+      "2021-09-21 21:13:14,516 epoch 6 - iter 9/32 - loss 0.37307483 - samples/sec: 12.62 - lr: 0.010000\n",
+      "2021-09-21 21:13:14,742 epoch 6 - iter 12/32 - loss 0.28091509 - samples/sec: 13.31 - lr: 0.010000\n",
+      "2021-09-21 21:13:14,980 epoch 6 - iter 15/32 - loss 0.28757777 - samples/sec: 12.63 - lr: 0.010000\n",
+      "2021-09-21 21:13:15,235 epoch 6 - iter 18/32 - loss 0.24370015 - samples/sec: 11.79 - lr: 0.010000\n",
+      "2021-09-21 21:13:15,474 epoch 6 - iter 21/32 - loss 0.27597628 - samples/sec: 12.58 - lr: 0.010000\n",
+      "2021-09-21 21:13:15,686 epoch 6 - iter 24/32 - loss 0.30573148 - samples/sec: 14.16 - lr: 0.010000\n",
+      "2021-09-21 21:13:15,905 epoch 6 - iter 27/32 - loss 0.27252505 - samples/sec: 13.74 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:50,222 EPOCH 6 done: loss 0.1684 - lr 0.0200000\n",
-      "2021-09-08 01:35:50,417 DEV : loss 0.3507169485092163 - score 1.0\n",
-      "2021-09-08 01:35:50,417 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:35:55,564 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,785 epoch 7 - iter 3/32 - loss 0.34091920 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 01:35:55,964 epoch 7 - iter 6/32 - loss 0.18780613 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,143 epoch 7 - iter 9/32 - loss 0.12695216 - samples/sec: 16.78 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,313 epoch 7 - iter 12/32 - loss 0.10409233 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,502 epoch 7 - iter 15/32 - loss 0.22618929 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,704 epoch 7 - iter 18/32 - loss 0.19074578 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,947 epoch 7 - iter 21/32 - loss 0.18060429 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 01:35:57,141 epoch 7 - iter 24/32 - loss 0.16201355 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:35:57,338 epoch 7 - iter 27/32 - loss 0.20376408 - samples/sec: 15.27 - lr: 0.020000\n",
-      "2021-09-08 01:35:57,538 epoch 7 - iter 30/32 - loss 0.24126368 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:35:57,659 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:57,659 EPOCH 7 done: loss 0.2266 - lr 0.0200000\n",
-      "2021-09-08 01:35:57,853 DEV : loss 0.7636547684669495 - score 0.5\n",
-      "2021-09-08 01:35:57,853 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:35:57,929 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:58,189 epoch 8 - iter 3/32 - loss 0.17356982 - samples/sec: 14.20 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,368 epoch 8 - iter 6/32 - loss 0.09283043 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,560 epoch 8 - iter 9/32 - loss 0.06308704 - samples/sec: 15.68 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,728 epoch 8 - iter 12/32 - loss 0.04773435 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,905 epoch 8 - iter 15/32 - loss 0.03847571 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:35:59,086 epoch 8 - iter 18/32 - loss 0.07595168 - samples/sec: 16.59 - lr: 0.020000\n",
-      "2021-09-08 01:35:59,230 epoch 8 - iter 21/32 - loss 0.07719413 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 01:35:59,406 epoch 8 - iter 24/32 - loss 0.06969428 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:35:59,549 epoch 8 - iter 27/32 - loss 0.06377073 - samples/sec: 21.03 - lr: 0.020000\n",
-      "2021-09-08 01:35:59,712 epoch 8 - iter 30/32 - loss 0.05787901 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 01:35:59,843 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:59,843 EPOCH 8 done: loss 0.0565 - lr 0.0200000\n",
-      "2021-09-08 01:36:00,029 DEV : loss 0.30507445335388184 - score 0.75\n",
-      "2021-09-08 01:36:00,030 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:36:00,102 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:00,257 epoch 9 - iter 3/32 - loss 0.04173460 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 01:36:00,411 epoch 9 - iter 6/32 - loss 0.11293696 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 01:36:00,592 epoch 9 - iter 9/32 - loss 0.07576849 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 01:36:00,730 epoch 9 - iter 12/32 - loss 0.05687786 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 01:36:00,878 epoch 9 - iter 15/32 - loss 0.04578628 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,030 epoch 9 - iter 18/32 - loss 0.03920951 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,183 epoch 9 - iter 21/32 - loss 0.03377321 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,351 epoch 9 - iter 24/32 - loss 0.03745558 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,491 epoch 9 - iter 27/32 - loss 0.06713416 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,683 epoch 9 - iter 30/32 - loss 0.06087722 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,773 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:01,774 EPOCH 9 done: loss 0.0573 - lr 0.0200000\n",
-      "2021-09-08 01:36:01,960 DEV : loss 0.48653262853622437 - score 0.75\n",
-      "2021-09-08 01:36:01,961 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:36:02,037 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:02,235 epoch 10 - iter 3/32 - loss 0.00037245 - samples/sec: 16.30 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,374 epoch 10 - iter 6/32 - loss 0.06968004 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,557 epoch 10 - iter 9/32 - loss 0.07483667 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,694 epoch 10 - iter 12/32 - loss 0.08519279 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,861 epoch 10 - iter 15/32 - loss 0.07251633 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,045 epoch 10 - iter 18/32 - loss 0.06077087 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,224 epoch 10 - iter 21/32 - loss 0.05392417 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,421 epoch 10 - iter 24/32 - loss 0.04747993 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,592 epoch 10 - iter 27/32 - loss 0.04241135 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,771 epoch 10 - iter 30/32 - loss 0.13696693 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:03,897 EPOCH 10 done: loss 0.1285 - lr 0.0200000\n",
-      "2021-09-08 01:36:04,025 DEV : loss 0.15648573637008667 - score 1.0\n",
-      "2021-09-08 01:36:04,027 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:36:14,441 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:14,441 Testing using best model ...\n",
-      "2021-09-08 01:36:14,443 loading file temp/best-model.pt\n",
+      "2021-09-21 21:13:16,140 epoch 6 - iter 30/32 - loss 0.29339156 - samples/sec: 12.81 - lr: 0.010000\n",
+      "2021-09-21 21:13:16,306 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:16,306 EPOCH 6 done: loss 0.2772 - lr 0.0100000\n",
+      "2021-09-21 21:13:16,476 DEV : loss 1.821991205215454 - score 0.25\n",
+      "2021-09-21 21:13:16,477 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:16,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:16,739 epoch 7 - iter 3/32 - loss 0.00378378 - samples/sec: 12.47 - lr: 0.010000\n",
+      "2021-09-21 21:13:16,981 epoch 7 - iter 6/32 - loss 0.00685685 - samples/sec: 12.43 - lr: 0.010000\n",
+      "2021-09-21 21:13:17,223 epoch 7 - iter 9/32 - loss 0.05455886 - samples/sec: 12.41 - lr: 0.010000\n",
+      "2021-09-21 21:13:17,446 epoch 7 - iter 12/32 - loss 0.12402521 - samples/sec: 13.52 - lr: 0.010000\n",
+      "2021-09-21 21:13:17,688 epoch 7 - iter 15/32 - loss 0.09963507 - samples/sec: 12.42 - lr: 0.010000\n",
+      "2021-09-21 21:13:17,933 epoch 7 - iter 18/32 - loss 0.08335643 - samples/sec: 12.29 - lr: 0.010000\n",
+      "2021-09-21 21:13:18,154 epoch 7 - iter 21/32 - loss 0.07537498 - samples/sec: 13.63 - lr: 0.010000\n",
+      "2021-09-21 21:13:18,353 epoch 7 - iter 24/32 - loss 0.06629528 - samples/sec: 15.12 - lr: 0.010000\n",
+      "2021-09-21 21:13:18,541 epoch 7 - iter 27/32 - loss 0.05984876 - samples/sec: 16.05 - lr: 0.010000\n",
+      "2021-09-21 21:13:18,748 epoch 7 - iter 30/32 - loss 0.06636065 - samples/sec: 14.51 - lr: 0.010000\n",
+      "2021-09-21 21:13:18,893 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,893 EPOCH 7 done: loss 0.0659 - lr 0.0100000\n",
+      "2021-09-21 21:13:19,127 DEV : loss 1.89614737033844 - score 0.5\n",
+      "2021-09-21 21:13:19,128 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:13:19,203 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:19,479 epoch 8 - iter 3/32 - loss 0.38239970 - samples/sec: 12.38 - lr: 0.010000\n",
+      "2021-09-21 21:13:19,689 epoch 8 - iter 6/32 - loss 0.19972987 - samples/sec: 14.33 - lr: 0.010000\n",
+      "2021-09-21 21:13:19,907 epoch 8 - iter 9/32 - loss 0.13378472 - samples/sec: 13.75 - lr: 0.010000\n",
+      "2021-09-21 21:13:20,112 epoch 8 - iter 12/32 - loss 0.11997146 - samples/sec: 14.71 - lr: 0.010000\n",
+      "2021-09-21 21:13:20,302 epoch 8 - iter 15/32 - loss 0.09627211 - samples/sec: 15.84 - lr: 0.010000\n",
+      "2021-09-21 21:13:20,468 epoch 8 - iter 18/32 - loss 0.08076949 - samples/sec: 18.15 - lr: 0.010000\n",
+      "2021-09-21 21:13:20,638 epoch 8 - iter 21/32 - loss 0.07008090 - samples/sec: 17.67 - lr: 0.010000\n",
+      "2021-09-21 21:13:20,811 epoch 8 - iter 24/32 - loss 0.07157979 - samples/sec: 17.51 - lr: 0.010000\n",
+      "2021-09-21 21:13:20,985 epoch 8 - iter 27/32 - loss 0.06386094 - samples/sec: 17.31 - lr: 0.010000\n",
+      "2021-09-21 21:13:21,147 epoch 8 - iter 30/32 - loss 0.06227872 - samples/sec: 18.59 - lr: 0.010000\n",
+      "2021-09-21 21:13:21,257 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:21,257 EPOCH 8 done: loss 0.0584 - lr 0.0100000\n",
+      "2021-09-21 21:13:21,460 DEV : loss 1.9131041765213013 - score 0.25\n",
+      "2021-09-21 21:13:21,461 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:13:21,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:21,730 epoch 9 - iter 3/32 - loss 0.00136974 - samples/sec: 18.35 - lr: 0.010000\n",
+      "2021-09-21 21:13:21,907 epoch 9 - iter 6/32 - loss 0.00125082 - samples/sec: 16.97 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,083 epoch 9 - iter 9/32 - loss 0.03037181 - samples/sec: 17.09 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,262 epoch 9 - iter 12/32 - loss 0.07249122 - samples/sec: 16.83 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,438 epoch 9 - iter 15/32 - loss 0.05823751 - samples/sec: 17.13 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,603 epoch 9 - iter 18/32 - loss 0.04868024 - samples/sec: 18.24 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,784 epoch 9 - iter 21/32 - loss 0.05080581 - samples/sec: 16.63 - lr: 0.010000\n",
+      "2021-09-21 21:13:22,954 epoch 9 - iter 24/32 - loss 0.04520909 - samples/sec: 17.74 - lr: 0.010000\n",
+      "2021-09-21 21:13:23,130 epoch 9 - iter 27/32 - loss 0.04033743 - samples/sec: 17.09 - lr: 0.010000\n",
+      "2021-09-21 21:13:23,361 epoch 9 - iter 30/32 - loss 0.03639317 - samples/sec: 13.04 - lr: 0.010000\n",
+      "2021-09-21 21:13:23,528 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:23,528 EPOCH 9 done: loss 0.0342 - lr 0.0100000\n",
+      "2021-09-21 21:13:23,778 DEV : loss 1.803827166557312 - score 0.25\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:13:23,779 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:13:23,858 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:24,123 epoch 10 - iter 3/32 - loss 0.00262918 - samples/sec: 13.00 - lr: 0.005000\n",
+      "2021-09-21 21:13:24,354 epoch 10 - iter 6/32 - loss 0.00206236 - samples/sec: 13.02 - lr: 0.005000\n",
+      "2021-09-21 21:13:24,584 epoch 10 - iter 9/32 - loss 0.00185005 - samples/sec: 13.06 - lr: 0.005000\n",
+      "2021-09-21 21:13:24,834 epoch 10 - iter 12/32 - loss 0.02599232 - samples/sec: 12.02 - lr: 0.005000\n",
+      "2021-09-21 21:13:25,057 epoch 10 - iter 15/32 - loss 0.02087351 - samples/sec: 13.53 - lr: 0.005000\n",
+      "2021-09-21 21:13:25,316 epoch 10 - iter 18/32 - loss 0.01933132 - samples/sec: 11.58 - lr: 0.005000\n",
+      "2021-09-21 21:13:25,597 epoch 10 - iter 21/32 - loss 0.01672460 - samples/sec: 10.71 - lr: 0.005000\n",
+      "2021-09-21 21:13:25,856 epoch 10 - iter 24/32 - loss 0.01488753 - samples/sec: 11.61 - lr: 0.005000\n",
+      "2021-09-21 21:13:26,120 epoch 10 - iter 27/32 - loss 0.01331639 - samples/sec: 11.38 - lr: 0.005000\n",
+      "2021-09-21 21:13:26,381 epoch 10 - iter 30/32 - loss 0.01618570 - samples/sec: 11.54 - lr: 0.005000\n",
+      "2021-09-21 21:13:26,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:26,554 EPOCH 10 done: loss 0.0154 - lr 0.0050000\n",
+      "2021-09-21 21:13:26,727 DEV : loss 1.7932665348052979 - score 0.25\n",
+      "2021-09-21 21:13:26,728 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:37,836 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:37,837 Testing using best model ...\n",
+      "2021-09-21 21:13:37,861 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:36:22,497 \t0.25\n",
-      "2021-09-08 01:36:22,498 \n",
+      "2021-09-21 21:13:47,316 \t0.75\n",
+      "2021-09-21 21:13:47,317 \n",
       "Results:\n",
-      "- F-score (micro) 0.25\n",
-      "- F-score (macro) 0.1667\n",
-      "- Accuracy 0.25\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.4167\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
       " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
-      "this text expresses optimism     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      "     this text expresses joy     1.0000    0.5000    0.6667         2\n",
+      "this text expresses optimism     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses anger     0.5000    1.0000    0.6667         1\n",
+      "     this text expresses joy     1.0000    1.0000    1.0000         2\n",
       "\n",
-      "                   micro avg     0.2500    0.2500    0.2500         4\n",
-      "                   macro avg     0.2500    0.1250    0.1667         4\n",
-      "                weighted avg     0.5000    0.2500    0.3333         4\n",
-      "                 samples avg     0.2500    0.2500    0.2500         4\n",
+      "                   micro avg     0.7500    0.7500    0.7500         4\n",
+      "                   macro avg     0.3750    0.5000    0.4167         4\n",
+      "                weighted avg     0.6250    0.7500    0.6667         4\n",
+      "                 samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 01:36:22,498 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:34,258 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:13:47,317 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:02,198 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:36:38,460 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:14:06,493 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 42036.45it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 43043.03it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:38,462 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 01:36:38,471 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:38,473 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:14:06,496 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 21:14:06,658 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,660 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5433,25 +5436,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:38,474 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:38,474 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:36:38,474 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:38,475 Parameters:\n",
-      "2021-09-08 01:36:38,475  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:36:38,475  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:36:38,476  - patience: \"3\"\n",
-      "2021-09-08 01:36:38,476  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:36:38,476  - max_epochs: \"10\"\n",
-      "2021-09-08 01:36:38,476  - shuffle: \"True\"\n",
-      "2021-09-08 01:36:38,477  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:36:38,477  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:36:38,477 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:38,477 Model training base path: \"temp\"\n",
-      "2021-09-08 01:36:38,478 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:38,478 Device: cuda:0\n",
-      "2021-09-08 01:36:38,478 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:38,479 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:36:38,485 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:14:06,661 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,661 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:14:06,661 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,661 Parameters:\n",
+      "2021-09-21 21:14:06,662  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:14:06,662  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:14:06,662  - patience: \"3\"\n",
+      "2021-09-21 21:14:06,662  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:14:06,663  - max_epochs: \"10\"\n",
+      "2021-09-21 21:14:06,663  - shuffle: \"True\"\n",
+      "2021-09-21 21:14:06,663  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:14:06,664  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:14:06,664 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,664 Model training base path: \"temp\"\n",
+      "2021-09-21 21:14:06,664 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,665 Device: cuda:0\n",
+      "2021-09-21 21:14:06,665 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,665 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -5465,190 +5467,192 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:38,764 epoch 1 - iter 3/32 - loss 0.62984245 - samples/sec: 13.04 - lr: 0.020000\n",
-      "2021-09-08 01:36:38,953 epoch 1 - iter 6/32 - loss 0.41648177 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 01:36:39,150 epoch 1 - iter 9/32 - loss 0.36945644 - samples/sec: 15.27 - lr: 0.020000\n",
-      "2021-09-08 01:36:39,353 epoch 1 - iter 12/32 - loss 0.64718673 - samples/sec: 14.85 - lr: 0.020000\n",
-      "2021-09-08 01:36:39,554 epoch 1 - iter 15/32 - loss 0.69273999 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 01:36:39,752 epoch 1 - iter 18/32 - loss 0.68727035 - samples/sec: 15.28 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,005 epoch 1 - iter 21/32 - loss 0.65274485 - samples/sec: 11.88 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,181 epoch 1 - iter 24/32 - loss 0.58787756 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,372 epoch 1 - iter 27/32 - loss 0.69697960 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,562 epoch 1 - iter 30/32 - loss 0.68308685 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,691 EPOCH 1 done: loss 0.6454 - lr 0.0200000\n",
-      "2021-09-08 01:36:40,786 DEV : loss 1.197000503540039 - score 0.25\n",
-      "2021-09-08 01:36:40,787 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:14:06,870 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:07,075 epoch 1 - iter 3/32 - loss 0.71595469 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 21:14:07,287 epoch 1 - iter 6/32 - loss 0.49208672 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 21:14:07,478 epoch 1 - iter 9/32 - loss 0.52836839 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 21:14:07,687 epoch 1 - iter 12/32 - loss 0.56539753 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 21:14:07,897 epoch 1 - iter 15/32 - loss 0.51361021 - samples/sec: 14.35 - lr: 0.020000\n",
+      "2021-09-21 21:14:08,129 epoch 1 - iter 18/32 - loss 0.49498024 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 21:14:08,325 epoch 1 - iter 21/32 - loss 0.47416722 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 21:14:08,542 epoch 1 - iter 24/32 - loss 0.42890776 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 21:14:08,757 epoch 1 - iter 27/32 - loss 0.59447541 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:14:08,974 epoch 1 - iter 30/32 - loss 0.69549281 - samples/sec: 13.89 - lr: 0.020000\n",
+      "2021-09-21 21:14:09,107 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:09,108 EPOCH 1 done: loss 0.6649 - lr 0.0200000\n",
+      "2021-09-21 21:14:09,338 DEV : loss 0.9043613076210022 - score 0.5\n",
+      "2021-09-21 21:14:09,339 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:14:18,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:18,235 epoch 2 - iter 3/32 - loss 0.35644846 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 21:14:18,392 epoch 2 - iter 6/32 - loss 0.47556716 - samples/sec: 19.23 - lr: 0.020000\n",
+      "2021-09-21 21:14:18,559 epoch 2 - iter 9/32 - loss 0.91922221 - samples/sec: 18.07 - lr: 0.020000\n",
+      "2021-09-21 21:14:18,727 epoch 2 - iter 12/32 - loss 0.80018159 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 21:14:18,888 epoch 2 - iter 15/32 - loss 0.68566474 - samples/sec: 18.76 - lr: 0.020000\n",
+      "2021-09-21 21:14:19,059 epoch 2 - iter 18/32 - loss 0.60757729 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:14:19,219 epoch 2 - iter 21/32 - loss 0.56824684 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 21:14:19,389 epoch 2 - iter 24/32 - loss 0.60849317 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 21:14:19,547 epoch 2 - iter 27/32 - loss 0.62866710 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 21:14:19,709 epoch 2 - iter 30/32 - loss 0.62892944 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 21:14:19,818 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:19,818 EPOCH 2 done: loss 0.6219 - lr 0.0200000\n",
+      "2021-09-21 21:14:19,899 DEV : loss 0.8734991550445557 - score 0.25\n",
+      "2021-09-21 21:14:19,899 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:14:19,901 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:20,074 epoch 3 - iter 3/32 - loss 0.39134076 - samples/sec: 19.26 - lr: 0.020000\n",
+      "2021-09-21 21:14:20,225 epoch 3 - iter 6/32 - loss 0.27692489 - samples/sec: 19.94 - lr: 0.020000\n",
+      "2021-09-21 21:14:20,378 epoch 3 - iter 9/32 - loss 0.44504789 - samples/sec: 19.63 - lr: 0.020000\n",
+      "2021-09-21 21:14:20,543 epoch 3 - iter 12/32 - loss 0.44455208 - samples/sec: 18.24 - lr: 0.020000\n",
+      "2021-09-21 21:14:20,700 epoch 3 - iter 15/32 - loss 0.41008835 - samples/sec: 19.17 - lr: 0.020000\n",
+      "2021-09-21 21:14:20,860 epoch 3 - iter 18/32 - loss 0.39373976 - samples/sec: 18.83 - lr: 0.020000\n",
+      "2021-09-21 21:14:21,007 epoch 3 - iter 21/32 - loss 0.38750156 - samples/sec: 20.42 - lr: 0.020000\n",
+      "2021-09-21 21:14:21,165 epoch 3 - iter 24/32 - loss 0.43453044 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 21:14:21,336 epoch 3 - iter 27/32 - loss 0.41861581 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 21:14:21,491 epoch 3 - iter 30/32 - loss 0.38404953 - samples/sec: 19.37 - lr: 0.020000\n",
+      "2021-09-21 21:14:21,610 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:21,611 EPOCH 3 done: loss 0.3797 - lr 0.0200000\n",
+      "2021-09-21 21:14:21,694 DEV : loss 0.5616363286972046 - score 0.5\n",
+      "2021-09-21 21:14:21,697 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:36:45,662 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:45,830 epoch 2 - iter 3/32 - loss 0.52761132 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 01:36:46,023 epoch 2 - iter 6/32 - loss 0.38967978 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 01:36:46,190 epoch 2 - iter 9/32 - loss 0.67424931 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 01:36:46,383 epoch 2 - iter 12/32 - loss 0.68209316 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 01:36:46,541 epoch 2 - iter 15/32 - loss 0.74403096 - samples/sec: 19.02 - lr: 0.020000\n",
-      "2021-09-08 01:36:46,728 epoch 2 - iter 18/32 - loss 0.78042627 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:36:46,870 epoch 2 - iter 21/32 - loss 0.82437520 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 01:36:47,026 epoch 2 - iter 24/32 - loss 0.77744545 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:36:47,204 epoch 2 - iter 27/32 - loss 0.73685366 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 01:36:47,351 epoch 2 - iter 30/32 - loss 0.77638545 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:36:47,490 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:47,491 EPOCH 2 done: loss 0.7410 - lr 0.0200000\n",
-      "2021-09-08 01:36:47,874 DEV : loss 0.6680124998092651 - score 0.0\n",
-      "2021-09-08 01:36:47,875 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:36:48,033 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:48,220 epoch 3 - iter 3/32 - loss 0.16511190 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:36:48,361 epoch 3 - iter 6/32 - loss 0.40070545 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:36:48,538 epoch 3 - iter 9/32 - loss 0.39774336 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:36:48,700 epoch 3 - iter 12/32 - loss 0.44346928 - samples/sec: 18.49 - lr: 0.020000\n",
-      "2021-09-08 01:36:48,848 epoch 3 - iter 15/32 - loss 0.55479176 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 01:36:49,039 epoch 3 - iter 18/32 - loss 0.49337776 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:36:49,198 epoch 3 - iter 21/32 - loss 0.42965588 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 01:36:49,367 epoch 3 - iter 24/32 - loss 0.43568233 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:36:49,538 epoch 3 - iter 27/32 - loss 0.46147875 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:36:49,684 epoch 3 - iter 30/32 - loss 0.42989518 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 01:36:49,820 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:49,821 EPOCH 3 done: loss 0.4068 - lr 0.0200000\n",
-      "2021-09-08 01:36:50,163 DEV : loss 0.27876079082489014 - score 1.0\n",
-      "2021-09-08 01:36:50,164 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:14:25,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:25,952 epoch 4 - iter 3/32 - loss 0.86966005 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:14:26,146 epoch 4 - iter 6/32 - loss 0.51366326 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:14:26,352 epoch 4 - iter 9/32 - loss 0.37110817 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 21:14:26,595 epoch 4 - iter 12/32 - loss 0.57313845 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:14:26,818 epoch 4 - iter 15/32 - loss 0.49851713 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:14:27,055 epoch 4 - iter 18/32 - loss 0.46904736 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 21:14:27,244 epoch 4 - iter 21/32 - loss 0.44754780 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 21:14:27,478 epoch 4 - iter 24/32 - loss 0.43082593 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 21:14:27,708 epoch 4 - iter 27/32 - loss 0.39075853 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 21:14:27,885 epoch 4 - iter 30/32 - loss 0.36593568 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 21:14:27,993 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:27,994 EPOCH 4 done: loss 0.3661 - lr 0.0200000\n",
+      "2021-09-21 21:14:28,153 DEV : loss 0.5338634848594666 - score 0.75\n",
+      "2021-09-21 21:14:28,154 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:36:57,390 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:57,593 epoch 4 - iter 3/32 - loss 0.25478970 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:36:57,736 epoch 4 - iter 6/32 - loss 0.20073126 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 01:36:57,929 epoch 4 - iter 9/32 - loss 0.21843118 - samples/sec: 15.62 - lr: 0.020000\n",
-      "2021-09-08 01:36:58,078 epoch 4 - iter 12/32 - loss 0.16778657 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:36:58,241 epoch 4 - iter 15/32 - loss 0.14965360 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 01:36:58,410 epoch 4 - iter 18/32 - loss 0.12964899 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:36:58,562 epoch 4 - iter 21/32 - loss 0.14783308 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 01:36:58,759 epoch 4 - iter 24/32 - loss 0.14039650 - samples/sec: 15.31 - lr: 0.020000\n",
-      "2021-09-08 01:36:58,899 epoch 4 - iter 27/32 - loss 0.13006387 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 01:36:59,074 epoch 4 - iter 30/32 - loss 0.11952344 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:36:59,179 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:59,180 EPOCH 4 done: loss 0.1143 - lr 0.0200000\n",
-      "2021-09-08 01:36:59,376 DEV : loss 1.2285659313201904 - score 0.25\n",
-      "2021-09-08 01:36:59,376 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:36:59,478 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:59,691 epoch 5 - iter 3/32 - loss 0.00168871 - samples/sec: 15.73 - lr: 0.020000\n",
-      "2021-09-08 01:36:59,835 epoch 5 - iter 6/32 - loss 0.10478237 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 01:36:59,986 epoch 5 - iter 9/32 - loss 0.16563721 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 01:37:00,180 epoch 5 - iter 12/32 - loss 0.14471126 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:37:00,324 epoch 5 - iter 15/32 - loss 0.14571733 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 01:37:00,512 epoch 5 - iter 18/32 - loss 0.16681566 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 01:37:00,656 epoch 5 - iter 21/32 - loss 0.18032792 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 01:37:00,838 epoch 5 - iter 24/32 - loss 0.17892092 - samples/sec: 16.59 - lr: 0.020000\n",
-      "2021-09-08 01:37:00,986 epoch 5 - iter 27/32 - loss 0.15951078 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 01:37:01,134 epoch 5 - iter 30/32 - loss 0.14520221 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 01:37:01,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:01,255 EPOCH 5 done: loss 0.1456 - lr 0.0200000\n",
-      "2021-09-08 01:37:04,795 DEV : loss 0.4611838757991791 - score 0.75\n",
-      "2021-09-08 01:37:04,796 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:37:04,801 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:05,007 epoch 6 - iter 3/32 - loss 0.03818135 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,172 epoch 6 - iter 6/32 - loss 0.43664952 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,335 epoch 6 - iter 9/32 - loss 0.29579521 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,530 epoch 6 - iter 12/32 - loss 0.22719027 - samples/sec: 15.46 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,735 epoch 6 - iter 15/32 - loss 0.22866074 - samples/sec: 14.68 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,920 epoch 6 - iter 18/32 - loss 0.19113447 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,081 epoch 6 - iter 21/32 - loss 0.21754707 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,250 epoch 6 - iter 24/32 - loss 0.19172728 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,419 epoch 6 - iter 27/32 - loss 0.18096380 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,623 epoch 6 - iter 30/32 - loss 0.16352588 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,756 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:14:32,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:32,354 epoch 5 - iter 3/32 - loss 0.06675909 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 21:14:32,563 epoch 5 - iter 6/32 - loss 0.03599504 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 21:14:32,751 epoch 5 - iter 9/32 - loss 0.05805874 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:14:32,958 epoch 5 - iter 12/32 - loss 0.07671266 - samples/sec: 14.57 - lr: 0.020000\n",
+      "2021-09-21 21:14:33,156 epoch 5 - iter 15/32 - loss 0.08128896 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 21:14:33,374 epoch 5 - iter 18/32 - loss 0.07160438 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 21:14:33,573 epoch 5 - iter 21/32 - loss 0.06697909 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 21:14:33,775 epoch 5 - iter 24/32 - loss 0.07048888 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 21:14:33,976 epoch 5 - iter 27/32 - loss 0.07230523 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 21:14:34,171 epoch 5 - iter 30/32 - loss 0.08160443 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 21:14:34,322 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:34,322 EPOCH 5 done: loss 0.0930 - lr 0.0200000\n",
+      "2021-09-21 21:14:34,484 DEV : loss 0.820777952671051 - score 0.75\n",
+      "2021-09-21 21:14:34,484 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:14:34,486 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:34,713 epoch 6 - iter 3/32 - loss 0.00398288 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 21:14:34,911 epoch 6 - iter 6/32 - loss 0.42484484 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 21:14:35,104 epoch 6 - iter 9/32 - loss 0.34307273 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:14:35,317 epoch 6 - iter 12/32 - loss 0.30363576 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 21:14:35,489 epoch 6 - iter 15/32 - loss 0.24465841 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 21:14:35,677 epoch 6 - iter 18/32 - loss 0.27070813 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:14:35,869 epoch 6 - iter 21/32 - loss 0.24231662 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 21:14:36,054 epoch 6 - iter 24/32 - loss 0.23494807 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 21:14:36,267 epoch 6 - iter 27/32 - loss 0.22612780 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:14:36,480 epoch 6 - iter 30/32 - loss 0.20370851 - samples/sec: 14.13 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:06,757 EPOCH 6 done: loss 0.1634 - lr 0.0200000\n",
-      "2021-09-08 01:37:06,879 DEV : loss 0.526609480381012 - score 0.5\n",
-      "2021-09-08 01:37:06,880 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:37:06,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:07,078 epoch 7 - iter 3/32 - loss 0.06025163 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,264 epoch 7 - iter 6/32 - loss 0.03618303 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,486 epoch 7 - iter 9/32 - loss 0.03243012 - samples/sec: 13.53 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,644 epoch 7 - iter 12/32 - loss 0.03674883 - samples/sec: 19.09 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,837 epoch 7 - iter 15/32 - loss 0.02946748 - samples/sec: 15.60 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,041 epoch 7 - iter 18/32 - loss 0.02928982 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,241 epoch 7 - iter 21/32 - loss 0.02515434 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,457 epoch 7 - iter 24/32 - loss 0.02206548 - samples/sec: 13.93 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,629 epoch 7 - iter 27/32 - loss 0.02074934 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,797 epoch 7 - iter 30/32 - loss 0.03114931 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:08,905 EPOCH 7 done: loss 0.0292 - lr 0.0200000\n",
-      "2021-09-08 01:37:09,003 DEV : loss 0.9488797783851624 - score 0.5\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:37:09,003 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:37:09,005 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:09,244 epoch 8 - iter 3/32 - loss 0.00112420 - samples/sec: 13.71 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,493 epoch 8 - iter 6/32 - loss 0.00100532 - samples/sec: 12.09 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,688 epoch 8 - iter 9/32 - loss 0.01667487 - samples/sec: 15.48 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,855 epoch 8 - iter 12/32 - loss 0.01740329 - samples/sec: 18.04 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,034 epoch 8 - iter 15/32 - loss 0.01690099 - samples/sec: 16.82 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,249 epoch 8 - iter 18/32 - loss 0.01761936 - samples/sec: 13.96 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,429 epoch 8 - iter 21/32 - loss 0.01518837 - samples/sec: 16.77 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,599 epoch 8 - iter 24/32 - loss 0.01337406 - samples/sec: 17.71 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,766 epoch 8 - iter 27/32 - loss 0.02690221 - samples/sec: 17.95 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,969 epoch 8 - iter 30/32 - loss 0.02470234 - samples/sec: 14.89 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,143 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:11,143 EPOCH 8 done: loss 0.0258 - lr 0.0100000\n",
-      "2021-09-08 01:37:11,346 DEV : loss 0.840207576751709 - score 0.5\n",
-      "2021-09-08 01:37:11,347 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:37:11,422 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:11,669 epoch 9 - iter 3/32 - loss 0.00145027 - samples/sec: 14.73 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,839 epoch 9 - iter 6/32 - loss 0.00401996 - samples/sec: 17.73 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,004 epoch 9 - iter 9/32 - loss 0.00282877 - samples/sec: 18.19 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,195 epoch 9 - iter 12/32 - loss 0.00230667 - samples/sec: 15.75 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,393 epoch 9 - iter 15/32 - loss 0.00207519 - samples/sec: 15.25 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,576 epoch 9 - iter 18/32 - loss 0.00199093 - samples/sec: 16.41 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,751 epoch 9 - iter 21/32 - loss 0.00568700 - samples/sec: 17.24 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,924 epoch 9 - iter 24/32 - loss 0.00552896 - samples/sec: 17.41 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,115 epoch 9 - iter 27/32 - loss 0.00500643 - samples/sec: 15.72 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,338 epoch 9 - iter 30/32 - loss 0.00454828 - samples/sec: 13.51 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,442 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:13,442 EPOCH 9 done: loss 0.0045 - lr 0.0100000\n",
-      "2021-09-08 01:37:13,530 DEV : loss 0.9558792114257812 - score 0.5\n",
-      "2021-09-08 01:37:13,530 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:37:13,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:13,764 epoch 10 - iter 3/32 - loss 0.00136975 - samples/sec: 15.20 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,995 epoch 10 - iter 6/32 - loss 0.00116295 - samples/sec: 13.00 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,173 epoch 10 - iter 9/32 - loss 0.00098783 - samples/sec: 16.92 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,347 epoch 10 - iter 12/32 - loss 0.00111271 - samples/sec: 17.37 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,533 epoch 10 - iter 15/32 - loss 0.00388398 - samples/sec: 16.19 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,731 epoch 10 - iter 18/32 - loss 0.09942452 - samples/sec: 15.20 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,920 epoch 10 - iter 21/32 - loss 0.08531689 - samples/sec: 15.92 - lr: 0.010000\n",
-      "2021-09-08 01:37:15,090 epoch 10 - iter 24/32 - loss 0.07517424 - samples/sec: 17.75 - lr: 0.010000\n",
-      "2021-09-08 01:37:15,263 epoch 10 - iter 27/32 - loss 0.06713939 - samples/sec: 17.41 - lr: 0.010000\n",
-      "2021-09-08 01:37:15,453 epoch 10 - iter 30/32 - loss 0.06049788 - samples/sec: 15.86 - lr: 0.010000\n",
-      "2021-09-08 01:37:15,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:15,588 EPOCH 10 done: loss 0.0568 - lr 0.0100000\n",
-      "2021-09-08 01:37:15,800 DEV : loss 0.9790352582931519 - score 0.5\n",
-      "2021-09-08 01:37:15,801 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:37:24,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:24,151 Testing using best model ...\n",
-      "2021-09-08 01:37:24,296 loading file temp/best-model.pt\n",
+      "2021-09-21 21:14:36,635 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:36,636 EPOCH 6 done: loss 0.2034 - lr 0.0200000\n",
+      "2021-09-21 21:14:36,759 DEV : loss 0.5739323496818542 - score 0.75\n",
+      "2021-09-21 21:14:36,760 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:14:36,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:37,006 epoch 7 - iter 3/32 - loss 0.33470653 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 21:14:37,189 epoch 7 - iter 6/32 - loss 0.16908896 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 21:14:37,371 epoch 7 - iter 9/32 - loss 0.11534708 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 21:14:37,583 epoch 7 - iter 12/32 - loss 0.10045507 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 21:14:37,809 epoch 7 - iter 15/32 - loss 0.09440770 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:14:38,041 epoch 7 - iter 18/32 - loss 0.12965480 - samples/sec: 12.97 - lr: 0.020000\n",
+      "2021-09-21 21:14:38,239 epoch 7 - iter 21/32 - loss 0.11383620 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:14:38,443 epoch 7 - iter 24/32 - loss 0.13720959 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 21:14:38,667 epoch 7 - iter 27/32 - loss 0.17288160 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:14:38,859 epoch 7 - iter 30/32 - loss 0.15775827 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:14:39,017 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:39,017 EPOCH 7 done: loss 0.1516 - lr 0.0200000\n",
+      "2021-09-21 21:14:39,153 DEV : loss 0.5770760774612427 - score 0.75\n",
+      "2021-09-21 21:14:39,158 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:14:39,160 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:39,404 epoch 8 - iter 3/32 - loss 0.14391022 - samples/sec: 15.02 - lr: 0.020000\n",
+      "2021-09-21 21:14:39,604 epoch 8 - iter 6/32 - loss 0.21080880 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 21:14:39,812 epoch 8 - iter 9/32 - loss 0.14195852 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 21:14:40,010 epoch 8 - iter 12/32 - loss 0.11601588 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:14:40,213 epoch 8 - iter 15/32 - loss 0.09448339 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 21:14:40,418 epoch 8 - iter 18/32 - loss 0.08587809 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:14:40,602 epoch 8 - iter 21/32 - loss 0.09530991 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 21:14:40,796 epoch 8 - iter 24/32 - loss 0.08478382 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 21:14:41,006 epoch 8 - iter 27/32 - loss 0.07566708 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 21:14:41,185 epoch 8 - iter 30/32 - loss 0.06893347 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 21:14:41,344 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:41,344 EPOCH 8 done: loss 0.0649 - lr 0.0200000\n",
+      "2021-09-21 21:14:41,474 DEV : loss 0.5665558576583862 - score 0.75\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:14:41,475 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:14:41,477 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:41,719 epoch 9 - iter 3/32 - loss 0.00174791 - samples/sec: 14.31 - lr: 0.010000\n",
+      "2021-09-21 21:14:41,889 epoch 9 - iter 6/32 - loss 0.01391823 - samples/sec: 17.70 - lr: 0.010000\n",
+      "2021-09-21 21:14:42,105 epoch 9 - iter 9/32 - loss 0.02687844 - samples/sec: 13.98 - lr: 0.010000\n",
+      "2021-09-21 21:14:42,315 epoch 9 - iter 12/32 - loss 0.02113558 - samples/sec: 14.31 - lr: 0.010000\n",
+      "2021-09-21 21:14:42,506 epoch 9 - iter 15/32 - loss 0.01709093 - samples/sec: 15.75 - lr: 0.010000\n",
+      "2021-09-21 21:14:42,683 epoch 9 - iter 18/32 - loss 0.01489106 - samples/sec: 16.95 - lr: 0.010000\n",
+      "2021-09-21 21:14:42,898 epoch 9 - iter 21/32 - loss 0.01280553 - samples/sec: 14.02 - lr: 0.010000\n",
+      "2021-09-21 21:14:43,135 epoch 9 - iter 24/32 - loss 0.04946379 - samples/sec: 12.67 - lr: 0.010000\n",
+      "2021-09-21 21:14:43,323 epoch 9 - iter 27/32 - loss 0.04413504 - samples/sec: 16.00 - lr: 0.010000\n",
+      "2021-09-21 21:14:43,530 epoch 9 - iter 30/32 - loss 0.04013067 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 21:14:43,683 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:43,683 EPOCH 9 done: loss 0.0386 - lr 0.0100000\n",
+      "2021-09-21 21:14:43,806 DEV : loss 1.1010756492614746 - score 0.75\n",
+      "2021-09-21 21:14:43,808 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:14:43,810 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:44,082 epoch 10 - iter 3/32 - loss 0.04256178 - samples/sec: 13.09 - lr: 0.010000\n",
+      "2021-09-21 21:14:44,253 epoch 10 - iter 6/32 - loss 0.02701308 - samples/sec: 17.63 - lr: 0.010000\n",
+      "2021-09-21 21:14:44,428 epoch 10 - iter 9/32 - loss 0.02244727 - samples/sec: 17.16 - lr: 0.010000\n",
+      "2021-09-21 21:14:44,643 epoch 10 - iter 12/32 - loss 0.02155382 - samples/sec: 14.02 - lr: 0.010000\n",
+      "2021-09-21 21:14:44,852 epoch 10 - iter 15/32 - loss 0.01746266 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 21:14:45,049 epoch 10 - iter 18/32 - loss 0.01916251 - samples/sec: 15.30 - lr: 0.010000\n",
+      "2021-09-21 21:14:45,286 epoch 10 - iter 21/32 - loss 0.01856172 - samples/sec: 12.69 - lr: 0.010000\n",
+      "2021-09-21 21:14:45,498 epoch 10 - iter 24/32 - loss 0.01654987 - samples/sec: 14.17 - lr: 0.010000\n",
+      "2021-09-21 21:14:45,693 epoch 10 - iter 27/32 - loss 0.01480277 - samples/sec: 15.43 - lr: 0.010000\n",
+      "2021-09-21 21:14:45,918 epoch 10 - iter 30/32 - loss 0.01368571 - samples/sec: 13.40 - lr: 0.010000\n",
+      "2021-09-21 21:14:46,046 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:46,046 EPOCH 10 done: loss 0.0135 - lr 0.0100000\n",
+      "2021-09-21 21:14:46,203 DEV : loss 0.7548995614051819 - score 0.75\n",
+      "2021-09-21 21:14:46,205 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:14:50,205 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:50,205 Testing using best model ...\n",
+      "2021-09-21 21:14:50,207 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:37:29,557 \t0.5\n",
-      "2021-09-08 01:37:29,557 \n",
+      "2021-09-21 21:14:55,316 \t0.75\n",
+      "2021-09-21 21:14:55,317 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
+      "- F-score (micro) 0.75\n",
       "- F-score (macro) 0.4167\n",
-      "- Accuracy 0.5\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
+      " this text expresses sadness     1.0000    1.0000    1.0000         2\n",
       "this text expresses optimism     0.5000    1.0000    0.6667         1\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
-      "     this text expresses joy     1.0000    1.0000    1.0000         1\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                   micro avg     0.5000    0.5000    0.5000         4\n",
+      "                   micro avg     0.7500    0.7500    0.7500         4\n",
       "                   macro avg     0.3750    0.5000    0.4167         4\n",
-      "                weighted avg     0.3750    0.5000    0.4167         4\n",
-      "                 samples avg     0.5000    0.5000    0.5000         4\n",
+      "                weighted avg     0.6250    0.7500    0.6667         4\n",
+      "                 samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 01:37:29,558 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.6288107202680067\n"
+      "2021-09-21 21:14:55,317 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.6485762144053602\n"
      ]
     }
    ],
@@ -5721,11 +5725,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "86988d74",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.6599664991624791, 0.6532663316582915, 0.6767169179229481, 0.626465661641541, 0.626465661641541]\n",
+      "0.019603000118362204\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -5737,7 +5753,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5745,25 +5761,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:44,168 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:15:12,101 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:37:48,356 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:15:16,387 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 27578.99it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 41562.05it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:48,359 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 01:37:48,524 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,526 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:15:16,390 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
+      "2021-09-21 21:15:16,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:16,400 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6076,24 +6092,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:48,527 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,527 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:37:48,527 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,528 Parameters:\n",
-      "2021-09-08 01:37:48,528  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:37:48,528  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:37:48,529  - patience: \"3\"\n",
-      "2021-09-08 01:37:48,529  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:37:48,529  - max_epochs: \"10\"\n",
-      "2021-09-08 01:37:48,529  - shuffle: \"True\"\n",
-      "2021-09-08 01:37:48,530  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:37:48,530  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:37:48,530 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,530 Model training base path: \"temp\"\n",
-      "2021-09-08 01:37:48,531 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,531 Device: cuda:0\n",
-      "2021-09-08 01:37:48,531 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,532 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:15:16,401 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:16,401 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:15:16,402 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:16,402 Parameters:\n",
+      "2021-09-21 21:15:16,402  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:15:16,402  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:15:16,403  - patience: \"3\"\n",
+      "2021-09-21 21:15:16,403  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:15:16,403  - max_epochs: \"10\"\n",
+      "2021-09-21 21:15:16,404  - shuffle: \"True\"\n",
+      "2021-09-21 21:15:16,404  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:15:16,404  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:15:16,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:16,405 Model training base path: \"temp\"\n",
+      "2021-09-21 21:15:16,405 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:16,405 Device: cuda:0\n",
+      "2021-09-21 21:15:16,406 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:16,406 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:15:16,412 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:16,585 epoch 1 - iter 3/32 - loss 0.16562713 - samples/sec: 19.03 - lr: 0.020000\n"
      ]
     },
     {
@@ -6107,209 +6125,221 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:48,716 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,883 epoch 1 - iter 3/32 - loss 0.10001060 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,061 epoch 1 - iter 6/32 - loss 0.09776885 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,201 epoch 1 - iter 9/32 - loss 0.27294963 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,387 epoch 1 - iter 12/32 - loss 0.36773472 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,545 epoch 1 - iter 15/32 - loss 0.45340184 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,717 epoch 1 - iter 18/32 - loss 0.59625497 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,884 epoch 1 - iter 21/32 - loss 0.57405911 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:37:50,028 epoch 1 - iter 24/32 - loss 0.50313326 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 01:37:50,212 epoch 1 - iter 27/32 - loss 0.65194424 - samples/sec: 16.29 - lr: 0.020000\n",
-      "2021-09-08 01:37:50,366 epoch 1 - iter 30/32 - loss 0.63297886 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 01:37:50,503 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:50,504 EPOCH 1 done: loss 0.6427 - lr 0.0200000\n",
-      "2021-09-08 01:37:50,898 DEV : loss 0.8917031288146973 - score 0.5\n",
-      "2021-09-08 01:37:50,898 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:15:16,746 epoch 1 - iter 6/32 - loss 0.09588316 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,910 epoch 1 - iter 9/32 - loss 0.22379784 - samples/sec: 18.32 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,072 epoch 1 - iter 12/32 - loss 0.58283526 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,237 epoch 1 - iter 15/32 - loss 0.57551888 - samples/sec: 18.29 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,414 epoch 1 - iter 18/32 - loss 0.72499232 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,583 epoch 1 - iter 21/32 - loss 0.65516623 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,755 epoch 1 - iter 24/32 - loss 0.58117421 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,930 epoch 1 - iter 27/32 - loss 0.59479357 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,099 epoch 1 - iter 30/32 - loss 0.57410439 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,217 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:18,218 EPOCH 1 done: loss 0.6039 - lr 0.0200000\n",
+      "2021-09-21 21:15:18,297 DEV : loss 0.4264070391654968 - score 0.75\n",
+      "2021-09-21 21:15:18,297 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:37:59,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:59,354 epoch 2 - iter 3/32 - loss 0.23344089 - samples/sec: 12.78 - lr: 0.020000\n",
-      "2021-09-08 01:37:59,524 epoch 2 - iter 6/32 - loss 0.36747226 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:37:59,711 epoch 2 - iter 9/32 - loss 0.59278478 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:37:59,861 epoch 2 - iter 12/32 - loss 0.55927915 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 01:38:00,050 epoch 2 - iter 15/32 - loss 0.53203406 - samples/sec: 15.94 - lr: 0.020000\n",
-      "2021-09-08 01:38:00,195 epoch 2 - iter 18/32 - loss 0.56176919 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 01:38:00,349 epoch 2 - iter 21/32 - loss 0.49199433 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 01:38:00,517 epoch 2 - iter 24/32 - loss 0.45696642 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 01:38:00,665 epoch 2 - iter 27/32 - loss 0.43448362 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 01:38:00,815 epoch 2 - iter 30/32 - loss 0.40339124 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 01:38:00,937 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:00,937 EPOCH 2 done: loss 0.4544 - lr 0.0200000\n",
-      "2021-09-08 01:38:01,108 DEV : loss 1.3151347637176514 - score 0.25\n",
-      "2021-09-08 01:38:01,109 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:01,196 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:01,382 epoch 3 - iter 3/32 - loss 1.34650626 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 01:38:01,552 epoch 3 - iter 6/32 - loss 1.13236462 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 01:38:01,706 epoch 3 - iter 9/32 - loss 0.85318404 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 01:38:01,846 epoch 3 - iter 12/32 - loss 0.64932256 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 01:38:02,008 epoch 3 - iter 15/32 - loss 0.64390112 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 01:38:02,173 epoch 3 - iter 18/32 - loss 0.65678805 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 01:38:02,321 epoch 3 - iter 21/32 - loss 0.57946413 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 01:38:02,470 epoch 3 - iter 24/32 - loss 0.54903313 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 01:38:02,638 epoch 3 - iter 27/32 - loss 0.53936909 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 01:38:02,779 epoch 3 - iter 30/32 - loss 0.49054171 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 01:38:02,901 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:02,902 EPOCH 3 done: loss 0.4614 - lr 0.0200000\n",
-      "2021-09-08 01:38:03,121 DEV : loss 0.5880239605903625 - score 0.75\n",
-      "2021-09-08 01:38:03,129 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:15:22,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:22,857 epoch 2 - iter 3/32 - loss 1.04347258 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,042 epoch 2 - iter 6/32 - loss 0.75640448 - samples/sec: 16.26 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,225 epoch 2 - iter 9/32 - loss 0.72848531 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,396 epoch 2 - iter 12/32 - loss 0.66800198 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,575 epoch 2 - iter 15/32 - loss 0.68284794 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,758 epoch 2 - iter 18/32 - loss 0.59508300 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 21:15:23,913 epoch 2 - iter 21/32 - loss 0.61459689 - samples/sec: 19.45 - lr: 0.020000\n",
+      "2021-09-21 21:15:24,052 epoch 2 - iter 24/32 - loss 0.57480608 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 21:15:24,205 epoch 2 - iter 27/32 - loss 0.53386801 - samples/sec: 19.67 - lr: 0.020000\n",
+      "2021-09-21 21:15:24,382 epoch 2 - iter 30/32 - loss 0.51825050 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 21:15:24,514 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:24,515 EPOCH 2 done: loss 0.4925 - lr 0.0200000\n",
+      "2021-09-21 21:15:24,715 DEV : loss 0.5181623697280884 - score 0.75\n",
+      "2021-09-21 21:15:24,716 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:24,800 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:25,024 epoch 3 - iter 3/32 - loss 0.04293200 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,208 epoch 3 - iter 6/32 - loss 0.13233890 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,408 epoch 3 - iter 9/32 - loss 0.39390346 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,607 epoch 3 - iter 12/32 - loss 0.40898348 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,781 epoch 3 - iter 15/32 - loss 0.33021346 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,972 epoch 3 - iter 18/32 - loss 0.42259842 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,142 epoch 3 - iter 21/32 - loss 0.40409172 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,351 epoch 3 - iter 24/32 - loss 0.37684577 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,533 epoch 3 - iter 27/32 - loss 0.38081847 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,719 epoch 3 - iter 30/32 - loss 0.38302967 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:26,827 EPOCH 3 done: loss 0.3788 - lr 0.0200000\n",
+      "2021-09-21 21:15:29,465 DEV : loss 0.18952837586402893 - score 1.0\n",
+      "2021-09-21 21:15:29,466 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:38:09,006 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:09,270 epoch 4 - iter 3/32 - loss 0.00730898 - samples/sec: 13.73 - lr: 0.020000\n",
-      "2021-09-08 01:38:09,471 epoch 4 - iter 6/32 - loss 0.00982183 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:38:09,699 epoch 4 - iter 9/32 - loss 0.04366334 - samples/sec: 13.20 - lr: 0.020000\n",
-      "2021-09-08 01:38:09,885 epoch 4 - iter 12/32 - loss 0.10123881 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 01:38:10,075 epoch 4 - iter 15/32 - loss 0.08221725 - samples/sec: 15.82 - lr: 0.020000\n",
-      "2021-09-08 01:38:10,254 epoch 4 - iter 18/32 - loss 0.13027959 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:38:10,420 epoch 4 - iter 21/32 - loss 0.11220116 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:38:10,576 epoch 4 - iter 24/32 - loss 0.09892501 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:38:10,742 epoch 4 - iter 27/32 - loss 0.09454062 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 01:38:10,903 epoch 4 - iter 30/32 - loss 0.10047818 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:38:11,013 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:11,013 EPOCH 4 done: loss 0.1211 - lr 0.0200000\n",
-      "2021-09-08 01:38:11,227 DEV : loss 0.781762421131134 - score 0.5\n",
-      "2021-09-08 01:38:11,228 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:11,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:11,501 epoch 5 - iter 3/32 - loss 0.02058843 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:38:11,665 epoch 5 - iter 6/32 - loss 0.03920570 - samples/sec: 18.41 - lr: 0.020000\n",
-      "2021-09-08 01:38:11,831 epoch 5 - iter 9/32 - loss 0.10882326 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 01:38:11,987 epoch 5 - iter 12/32 - loss 0.08449147 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:38:12,144 epoch 5 - iter 15/32 - loss 0.06934562 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:38:12,309 epoch 5 - iter 18/32 - loss 0.06462923 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 01:38:12,472 epoch 5 - iter 21/32 - loss 0.06134383 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 01:38:12,632 epoch 5 - iter 24/32 - loss 0.05400014 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 01:38:12,795 epoch 5 - iter 27/32 - loss 0.05652255 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 01:38:12,950 epoch 5 - iter 30/32 - loss 0.05261186 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 01:38:13,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:13,056 EPOCH 5 done: loss 0.0493 - lr 0.0200000\n",
-      "2021-09-08 01:38:13,257 DEV : loss 1.0319136381149292 - score 0.75\n",
-      "2021-09-08 01:38:13,258 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:38:13,351 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:13,529 epoch 6 - iter 3/32 - loss 0.01055220 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 01:38:13,685 epoch 6 - iter 6/32 - loss 0.00724436 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 01:38:13,852 epoch 6 - iter 9/32 - loss 0.01755540 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 01:38:14,013 epoch 6 - iter 12/32 - loss 0.02427483 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 01:38:14,174 epoch 6 - iter 15/32 - loss 0.09813995 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:38:14,343 epoch 6 - iter 18/32 - loss 0.10802097 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 01:38:14,500 epoch 6 - iter 21/32 - loss 0.09362336 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:38:14,663 epoch 6 - iter 24/32 - loss 0.09112848 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 01:38:14,825 epoch 6 - iter 27/32 - loss 0.08137804 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 01:38:14,986 epoch 6 - iter 30/32 - loss 0.07334237 - samples/sec: 18.72 - lr: 0.020000\n"
+      "2021-09-21 21:15:37,484 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:37,672 epoch 4 - iter 3/32 - loss 0.17996684 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 21:15:37,871 epoch 4 - iter 6/32 - loss 0.11783091 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:15:38,081 epoch 4 - iter 9/32 - loss 0.23511102 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 21:15:38,314 epoch 4 - iter 12/32 - loss 0.27257459 - samples/sec: 12.90 - lr: 0.020000\n",
+      "2021-09-21 21:15:38,510 epoch 4 - iter 15/32 - loss 0.24550256 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 21:15:38,725 epoch 4 - iter 18/32 - loss 0.22825011 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 21:15:38,949 epoch 4 - iter 21/32 - loss 0.20968160 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:15:39,163 epoch 4 - iter 24/32 - loss 0.19616815 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:15:39,375 epoch 4 - iter 27/32 - loss 0.25518128 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 21:15:39,600 epoch 4 - iter 30/32 - loss 0.26392749 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 21:15:39,744 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:39,745 EPOCH 4 done: loss 0.2484 - lr 0.0200000\n",
+      "2021-09-21 21:15:41,013 DEV : loss 0.4405534863471985 - score 0.75\n",
+      "2021-09-21 21:15:41,013 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:41,022 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:41,356 epoch 5 - iter 3/32 - loss 0.02772426 - samples/sec: 10.41 - lr: 0.020000\n",
+      "2021-09-21 21:15:41,622 epoch 5 - iter 6/32 - loss 0.03060679 - samples/sec: 11.33 - lr: 0.020000\n",
+      "2021-09-21 21:15:41,866 epoch 5 - iter 9/32 - loss 0.21307241 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 21:15:42,157 epoch 5 - iter 12/32 - loss 0.27138205 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 21:15:42,378 epoch 5 - iter 15/32 - loss 0.22046534 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 21:15:42,635 epoch 5 - iter 18/32 - loss 0.20502754 - samples/sec: 11.69 - lr: 0.020000\n",
+      "2021-09-21 21:15:42,916 epoch 5 - iter 21/32 - loss 0.20083428 - samples/sec: 10.70 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,183 epoch 5 - iter 24/32 - loss 0.17844640 - samples/sec: 11.23 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,475 epoch 5 - iter 27/32 - loss 0.19526435 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,741 epoch 5 - iter 30/32 - loss 0.17940255 - samples/sec: 11.32 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,920 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:43,920 EPOCH 5 done: loss 0.1692 - lr 0.0200000\n",
+      "2021-09-21 21:15:44,131 DEV : loss 0.008005468174815178 - score 1.0\n",
+      "2021-09-21 21:15:44,134 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:15:47,763 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:48,054 epoch 6 - iter 3/32 - loss 0.02426090 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 21:15:48,257 epoch 6 - iter 6/32 - loss 0.01403287 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:15:48,547 epoch 6 - iter 9/32 - loss 0.01356176 - samples/sec: 10.38 - lr: 0.020000\n",
+      "2021-09-21 21:15:48,793 epoch 6 - iter 12/32 - loss 0.01094189 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,083 epoch 6 - iter 15/32 - loss 0.06386234 - samples/sec: 10.37 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,343 epoch 6 - iter 18/32 - loss 0.05402186 - samples/sec: 11.58 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,592 epoch 6 - iter 21/32 - loss 0.05831944 - samples/sec: 12.05 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,862 epoch 6 - iter 24/32 - loss 0.05212279 - samples/sec: 11.16 - lr: 0.020000\n",
+      "2021-09-21 21:15:50,135 epoch 6 - iter 27/32 - loss 0.10941282 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 21:15:50,424 epoch 6 - iter 30/32 - loss 0.10400140 - samples/sec: 10.40 - lr: 0.020000\n",
+      "2021-09-21 21:15:50,606 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:50,607 EPOCH 6 done: loss 0.0987 - lr 0.0200000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:15,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:15,095 EPOCH 6 done: loss 0.1248 - lr 0.0200000\n",
-      "2021-09-08 01:38:15,936 DEV : loss 0.8882318735122681 - score 0.75\n",
-      "2021-09-08 01:38:15,938 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:38:15,952 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:16,161 epoch 7 - iter 3/32 - loss 0.00234053 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,359 epoch 7 - iter 6/32 - loss 0.00199630 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,579 epoch 7 - iter 9/32 - loss 0.02030264 - samples/sec: 13.72 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,794 epoch 7 - iter 12/32 - loss 0.01531422 - samples/sec: 14.00 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,004 epoch 7 - iter 15/32 - loss 0.02090728 - samples/sec: 14.40 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,209 epoch 7 - iter 18/32 - loss 0.02292771 - samples/sec: 14.73 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,439 epoch 7 - iter 21/32 - loss 0.01978028 - samples/sec: 13.09 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,643 epoch 7 - iter 24/32 - loss 0.01772347 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,838 epoch 7 - iter 27/32 - loss 0.01587750 - samples/sec: 15.39 - lr: 0.020000\n",
-      "2021-09-08 01:38:18,038 epoch 7 - iter 30/32 - loss 0.02249284 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 01:38:18,167 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:18,168 EPOCH 7 done: loss 0.0505 - lr 0.0200000\n",
-      "2021-09-08 01:38:18,377 DEV : loss 0.8316888809204102 - score 0.75\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:38:18,380 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:38:18,448 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:18,631 epoch 8 - iter 3/32 - loss 0.00273773 - samples/sec: 17.74 - lr: 0.010000\n",
-      "2021-09-08 01:38:18,799 epoch 8 - iter 6/32 - loss 0.00164148 - samples/sec: 17.97 - lr: 0.010000\n",
-      "2021-09-08 01:38:18,984 epoch 8 - iter 9/32 - loss 0.01060770 - samples/sec: 16.28 - lr: 0.010000\n",
-      "2021-09-08 01:38:19,159 epoch 8 - iter 12/32 - loss 0.02342077 - samples/sec: 17.21 - lr: 0.010000\n",
-      "2021-09-08 01:38:19,293 epoch 8 - iter 15/32 - loss 0.01903923 - samples/sec: 22.51 - lr: 0.010000\n",
-      "2021-09-08 01:38:19,469 epoch 8 - iter 18/32 - loss 0.01635486 - samples/sec: 17.11 - lr: 0.010000\n",
-      "2021-09-08 01:38:19,615 epoch 8 - iter 21/32 - loss 0.01417453 - samples/sec: 20.61 - lr: 0.010000\n",
-      "2021-09-08 01:38:19,798 epoch 8 - iter 24/32 - loss 0.01256611 - samples/sec: 16.45 - lr: 0.010000\n",
-      "2021-09-08 01:38:19,940 epoch 8 - iter 27/32 - loss 0.01477405 - samples/sec: 21.26 - lr: 0.010000\n",
-      "2021-09-08 01:38:20,128 epoch 8 - iter 30/32 - loss 0.01624990 - samples/sec: 15.96 - lr: 0.010000\n",
-      "2021-09-08 01:38:20,220 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:20,220 EPOCH 8 done: loss 0.0156 - lr 0.0100000\n",
-      "2021-09-08 01:38:20,423 DEV : loss 0.9374256134033203 - score 0.75\n",
-      "2021-09-08 01:38:20,424 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:20,509 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:20,710 epoch 9 - iter 3/32 - loss 0.00142531 - samples/sec: 18.08 - lr: 0.010000\n",
-      "2021-09-08 01:38:20,848 epoch 9 - iter 6/32 - loss 0.00103612 - samples/sec: 21.80 - lr: 0.010000\n",
-      "2021-09-08 01:38:21,027 epoch 9 - iter 9/32 - loss 0.00102158 - samples/sec: 16.85 - lr: 0.010000\n",
-      "2021-09-08 01:38:21,162 epoch 9 - iter 12/32 - loss 0.00219809 - samples/sec: 22.28 - lr: 0.010000\n",
-      "2021-09-08 01:38:21,345 epoch 9 - iter 15/32 - loss 0.00210718 - samples/sec: 16.49 - lr: 0.010000\n",
-      "2021-09-08 01:38:21,483 epoch 9 - iter 18/32 - loss 0.00213591 - samples/sec: 21.86 - lr: 0.010000\n",
-      "2021-09-08 01:38:21,667 epoch 9 - iter 21/32 - loss 0.00205281 - samples/sec: 16.32 - lr: 0.010000\n",
-      "2021-09-08 01:38:21,828 epoch 9 - iter 24/32 - loss 0.00198872 - samples/sec: 18.77 - lr: 0.010000\n",
-      "2021-09-08 01:38:21,977 epoch 9 - iter 27/32 - loss 0.00185146 - samples/sec: 20.13 - lr: 0.010000\n",
-      "2021-09-08 01:38:22,161 epoch 9 - iter 30/32 - loss 0.00195289 - samples/sec: 16.44 - lr: 0.010000\n",
-      "2021-09-08 01:38:22,256 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:22,256 EPOCH 9 done: loss 0.0042 - lr 0.0100000\n",
-      "2021-09-08 01:38:22,435 DEV : loss 0.955722987651825 - score 0.75\n",
-      "2021-09-08 01:38:22,436 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:38:22,530 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:22,732 epoch 10 - iter 3/32 - loss 0.00303827 - samples/sec: 16.96 - lr: 0.010000\n",
-      "2021-09-08 01:38:22,912 epoch 10 - iter 6/32 - loss 0.00190733 - samples/sec: 18.97 - lr: 0.010000\n",
-      "2021-09-08 01:38:23,084 epoch 10 - iter 9/32 - loss 0.00207346 - samples/sec: 17.49 - lr: 0.010000\n",
-      "2021-09-08 01:38:23,220 epoch 10 - iter 12/32 - loss 0.00174240 - samples/sec: 22.30 - lr: 0.010000\n",
-      "2021-09-08 01:38:23,391 epoch 10 - iter 15/32 - loss 0.00412623 - samples/sec: 17.59 - lr: 0.010000\n",
-      "2021-09-08 01:38:23,563 epoch 10 - iter 18/32 - loss 0.00399350 - samples/sec: 17.58 - lr: 0.010000\n",
-      "2021-09-08 01:38:23,700 epoch 10 - iter 21/32 - loss 0.00345895 - samples/sec: 21.99 - lr: 0.010000\n",
-      "2021-09-08 01:38:23,897 epoch 10 - iter 24/32 - loss 0.00512083 - samples/sec: 15.27 - lr: 0.010000\n",
-      "2021-09-08 01:38:24,032 epoch 10 - iter 27/32 - loss 0.00459255 - samples/sec: 22.29 - lr: 0.010000\n",
-      "2021-09-08 01:38:24,188 epoch 10 - iter 30/32 - loss 0.01497796 - samples/sec: 19.27 - lr: 0.010000\n",
-      "2021-09-08 01:38:24,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:24,316 EPOCH 10 done: loss 0.0141 - lr 0.0100000\n",
-      "2021-09-08 01:38:24,431 DEV : loss 0.847375750541687 - score 0.75\n",
-      "2021-09-08 01:38:24,434 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:38:29,888 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:29,888 Testing using best model ...\n",
-      "2021-09-08 01:38:29,920 loading file temp/best-model.pt\n",
+      "2021-09-21 21:15:50,781 DEV : loss 0.06271302700042725 - score 1.0\n",
+      "2021-09-21 21:15:50,786 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:50,788 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:51,104 epoch 7 - iter 3/32 - loss 0.00412343 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:15:51,373 epoch 7 - iter 6/32 - loss 0.00224363 - samples/sec: 11.21 - lr: 0.020000\n",
+      "2021-09-21 21:15:51,612 epoch 7 - iter 9/32 - loss 0.00217233 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 21:15:51,891 epoch 7 - iter 12/32 - loss 0.00854264 - samples/sec: 10.75 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,175 epoch 7 - iter 15/32 - loss 0.01949610 - samples/sec: 10.58 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,415 epoch 7 - iter 18/32 - loss 0.01645554 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,651 epoch 7 - iter 21/32 - loss 0.01617266 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,897 epoch 7 - iter 24/32 - loss 0.01551354 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 21:15:53,161 epoch 7 - iter 27/32 - loss 0.01396453 - samples/sec: 11.39 - lr: 0.020000\n",
+      "2021-09-21 21:15:53,406 epoch 7 - iter 30/32 - loss 0.01261784 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:15:53,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:53,588 EPOCH 7 done: loss 0.0144 - lr 0.0200000\n",
+      "2021-09-21 21:15:53,788 DEV : loss 0.05891226977109909 - score 1.0\n",
+      "2021-09-21 21:15:53,790 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:15:53,792 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:54,084 epoch 8 - iter 3/32 - loss 0.00042735 - samples/sec: 11.61 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,333 epoch 8 - iter 6/32 - loss 0.00069649 - samples/sec: 12.09 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,544 epoch 8 - iter 9/32 - loss 0.00051690 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,766 epoch 8 - iter 12/32 - loss 0.00055984 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,983 epoch 8 - iter 15/32 - loss 0.00067218 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 21:15:55,258 epoch 8 - iter 18/32 - loss 0.00076713 - samples/sec: 10.93 - lr: 0.020000\n",
+      "2021-09-21 21:15:55,495 epoch 8 - iter 21/32 - loss 0.00156976 - samples/sec: 12.66 - lr: 0.020000\n",
+      "2021-09-21 21:15:55,772 epoch 8 - iter 24/32 - loss 0.00151098 - samples/sec: 10.87 - lr: 0.020000\n",
+      "2021-09-21 21:15:56,041 epoch 8 - iter 27/32 - loss 0.00230834 - samples/sec: 11.20 - lr: 0.020000\n",
+      "2021-09-21 21:15:56,318 epoch 8 - iter 30/32 - loss 0.00279897 - samples/sec: 10.85 - lr: 0.020000\n",
+      "2021-09-21 21:15:56,508 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:56,508 EPOCH 8 done: loss 0.0031 - lr 0.0200000\n",
+      "2021-09-21 21:15:56,717 DEV : loss 0.04847387224435806 - score 1.0\n",
+      "2021-09-21 21:15:56,724 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:15:56,725 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:57,035 epoch 9 - iter 3/32 - loss 0.00116160 - samples/sec: 11.48 - lr: 0.020000\n",
+      "2021-09-21 21:15:57,290 epoch 9 - iter 6/32 - loss 0.00066645 - samples/sec: 11.76 - lr: 0.020000\n",
+      "2021-09-21 21:15:57,562 epoch 9 - iter 9/32 - loss 0.00146791 - samples/sec: 11.06 - lr: 0.020000\n",
+      "2021-09-21 21:15:57,831 epoch 9 - iter 12/32 - loss 0.00124233 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,106 epoch 9 - iter 15/32 - loss 0.00111354 - samples/sec: 10.96 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,347 epoch 9 - iter 18/32 - loss 0.00137050 - samples/sec: 12.46 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,576 epoch 9 - iter 21/32 - loss 0.00121068 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,838 epoch 9 - iter 24/32 - loss 0.00107115 - samples/sec: 11.48 - lr: 0.020000\n",
+      "2021-09-21 21:15:59,084 epoch 9 - iter 27/32 - loss 0.00099761 - samples/sec: 12.19 - lr: 0.020000\n",
+      "2021-09-21 21:15:59,357 epoch 9 - iter 30/32 - loss 0.00093520 - samples/sec: 11.04 - lr: 0.020000\n",
+      "2021-09-21 21:15:59,532 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:59,533 EPOCH 9 done: loss 0.0009 - lr 0.0200000\n",
+      "2021-09-21 21:15:59,734 DEV : loss 0.05983671173453331 - score 1.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:15:59,739 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:15:59,741 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:00,037 epoch 10 - iter 3/32 - loss 0.00110799 - samples/sec: 11.41 - lr: 0.010000\n",
+      "2021-09-21 21:16:00,302 epoch 10 - iter 6/32 - loss 0.00067573 - samples/sec: 11.37 - lr: 0.010000\n",
+      "2021-09-21 21:16:00,570 epoch 10 - iter 9/32 - loss 0.00065679 - samples/sec: 11.21 - lr: 0.010000\n",
+      "2021-09-21 21:16:00,833 epoch 10 - iter 12/32 - loss 0.00073844 - samples/sec: 11.43 - lr: 0.010000\n",
+      "2021-09-21 21:16:01,089 epoch 10 - iter 15/32 - loss 0.00061120 - samples/sec: 11.76 - lr: 0.010000\n",
+      "2021-09-21 21:16:01,352 epoch 10 - iter 18/32 - loss 0.00056841 - samples/sec: 11.40 - lr: 0.010000\n",
+      "2021-09-21 21:16:01,629 epoch 10 - iter 21/32 - loss 0.00055166 - samples/sec: 10.87 - lr: 0.010000\n",
+      "2021-09-21 21:16:01,868 epoch 10 - iter 24/32 - loss 0.00058870 - samples/sec: 12.63 - lr: 0.010000\n",
+      "2021-09-21 21:16:02,077 epoch 10 - iter 27/32 - loss 0.00055473 - samples/sec: 14.38 - lr: 0.010000\n",
+      "2021-09-21 21:16:02,306 epoch 10 - iter 30/32 - loss 0.00064335 - samples/sec: 13.16 - lr: 0.010000\n",
+      "2021-09-21 21:16:02,482 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:02,482 EPOCH 10 done: loss 0.0006 - lr 0.0100000\n",
+      "2021-09-21 21:16:02,687 DEV : loss 0.05355245620012283 - score 1.0\n",
+      "2021-09-21 21:16:02,692 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:16:07,701 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:07,701 Testing using best model ...\n",
+      "2021-09-21 21:16:07,703 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:38:39,304 \t0.5\n",
-      "2021-09-08 01:38:39,305 \n",
+      "2021-09-21 21:16:15,468 \t0.75\n",
+      "2021-09-21 21:16:15,468 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.375\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.45\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                                                           precision    recall  f1-score   support\n",
       "\n",
-      "   emotions experienced when not in a state of well-being     1.0000    1.0000    1.0000         1\n",
-      "the optimistic feeling that all is going to turn out well     0.0000    0.0000    0.0000         2\n",
-      "                                 the state of being angry     0.0000    0.0000    0.0000         0\n",
-      "                           the emotion of great happiness     0.3333    1.0000    0.5000         1\n",
+      "   emotions experienced when not in a state of well-being     0.6667    1.0000    0.8000         2\n",
+      "the optimistic feeling that all is going to turn out well     0.0000    0.0000    0.0000         1\n",
+      "                                 the state of being angry     1.0000    1.0000    1.0000         1\n",
+      "                           the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                                micro avg     0.5000    0.5000    0.5000         4\n",
-      "                                                macro avg     0.3333    0.5000    0.3750         4\n",
-      "                                             weighted avg     0.3333    0.5000    0.3750         4\n",
-      "                                              samples avg     0.5000    0.5000    0.5000         4\n",
+      "                                                micro avg     0.7500    0.7500    0.7500         4\n",
+      "                                                macro avg     0.4167    0.5000    0.4500         4\n",
+      "                                             weighted avg     0.5833    0.7500    0.6500         4\n",
+      "                                              samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 01:38:39,305 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:53,176 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:16:15,469 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:31,470 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:38:57,588 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:16:35,826 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 36/36 [00:00<00:00, 45289.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:16:35,828 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 20424.04it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:57,593 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 01:38:57,693 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:57,695 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:16:36,196 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,198 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6622,241 +6652,235 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:57,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:57,696 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:38:57,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:57,697 Parameters:\n",
-      "2021-09-08 01:38:57,697  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:38:57,698  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:38:57,698  - patience: \"3\"\n",
-      "2021-09-08 01:38:57,698  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:38:57,699  - max_epochs: \"10\"\n",
-      "2021-09-08 01:38:57,699  - shuffle: \"True\"\n",
-      "2021-09-08 01:38:57,699  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:38:57,700  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:38:57,700 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:57,700 Model training base path: \"temp\"\n",
-      "2021-09-08 01:38:57,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:57,704 Device: cuda:0\n",
-      "2021-09-08 01:38:57,704 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:57,704 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "2021-09-21 21:16:36,198 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,199 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:16:36,199 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,199 Parameters:\n",
+      "2021-09-21 21:16:36,199  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:16:36,200  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:16:36,200  - patience: \"3\"\n",
+      "2021-09-21 21:16:36,200  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:16:36,201  - max_epochs: \"10\"\n",
+      "2021-09-21 21:16:36,201  - shuffle: \"True\"\n",
+      "2021-09-21 21:16:36,201  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:16:36,201  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:16:36,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,202 Model training base path: \"temp\"\n",
+      "2021-09-21 21:16:36,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,202 Device: cuda:0\n",
+      "2021-09-21 21:16:36,203 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,203 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:16:36,835 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,995 epoch 1 - iter 3/32 - loss 0.52741761 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 21:16:37,151 epoch 1 - iter 6/32 - loss 0.28269368 - samples/sec: 19.32 - lr: 0.020000\n",
+      "2021-09-21 21:16:37,329 epoch 1 - iter 9/32 - loss 0.20262182 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 21:16:37,498 epoch 1 - iter 12/32 - loss 0.62248490 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 21:16:37,675 epoch 1 - iter 15/32 - loss 0.57674243 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 21:16:37,860 epoch 1 - iter 18/32 - loss 0.52446697 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:16:38,036 epoch 1 - iter 21/32 - loss 0.59168725 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:16:38,205 epoch 1 - iter 24/32 - loss 0.53336616 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 21:16:38,363 epoch 1 - iter 27/32 - loss 0.63665644 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 21:16:38,501 epoch 1 - iter 30/32 - loss 0.68755640 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 21:16:38,593 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:38,594 EPOCH 1 done: loss 0.6556 - lr 0.0200000\n",
+      "2021-09-21 21:16:38,772 DEV : loss 0.5403187274932861 - score 0.5\n",
+      "2021-09-21 21:16:38,772 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:16:47,997 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:48,254 epoch 2 - iter 3/32 - loss 0.09433617 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 21:16:48,471 epoch 2 - iter 6/32 - loss 0.49545754 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 21:16:48,681 epoch 2 - iter 9/32 - loss 0.59026733 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:16:48,909 epoch 2 - iter 12/32 - loss 0.56653635 - samples/sec: 13.23 - lr: 0.020000\n",
+      "2021-09-21 21:16:49,133 epoch 2 - iter 15/32 - loss 0.61822723 - samples/sec: 13.42 - lr: 0.020000\n",
+      "2021-09-21 21:16:49,352 epoch 2 - iter 18/32 - loss 0.60715951 - samples/sec: 13.74 - lr: 0.020000\n",
+      "2021-09-21 21:16:49,568 epoch 2 - iter 21/32 - loss 0.71740840 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:16:49,803 epoch 2 - iter 24/32 - loss 0.68528733 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:16:50,018 epoch 2 - iter 27/32 - loss 0.61658459 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 21:16:50,186 epoch 2 - iter 30/32 - loss 0.56308373 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 21:16:50,301 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:50,301 EPOCH 2 done: loss 0.5425 - lr 0.0200000\n",
+      "2021-09-21 21:16:51,831 DEV : loss 0.8261685371398926 - score 0.25\n",
+      "2021-09-21 21:16:51,832 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:16:51,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:52,211 epoch 3 - iter 3/32 - loss 0.63156513 - samples/sec: 10.43 - lr: 0.020000\n",
+      "2021-09-21 21:16:52,438 epoch 3 - iter 6/32 - loss 0.67963540 - samples/sec: 13.24 - lr: 0.020000\n",
+      "2021-09-21 21:16:52,671 epoch 3 - iter 9/32 - loss 0.86235402 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 21:16:52,909 epoch 3 - iter 12/32 - loss 0.67469941 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 21:16:53,168 epoch 3 - iter 15/32 - loss 0.61030128 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 21:16:53,424 epoch 3 - iter 18/32 - loss 0.62933783 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 21:16:53,706 epoch 3 - iter 21/32 - loss 0.57039036 - samples/sec: 10.67 - lr: 0.020000\n",
+      "2021-09-21 21:16:53,941 epoch 3 - iter 24/32 - loss 0.50747981 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 21:16:54,175 epoch 3 - iter 27/32 - loss 0.52407884 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:16:54,449 epoch 3 - iter 30/32 - loss 0.56132771 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 21:16:54,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:54,617 EPOCH 3 done: loss 0.5533 - lr 0.0200000\n",
+      "2021-09-21 21:16:54,777 DEV : loss 0.4672014117240906 - score 0.5\n",
+      "2021-09-21 21:16:54,782 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:16:59,855 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:00,081 epoch 4 - iter 3/32 - loss 0.30251439 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:17:00,277 epoch 4 - iter 6/32 - loss 0.24682119 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 21:17:00,488 epoch 4 - iter 9/32 - loss 0.24453968 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:17:00,682 epoch 4 - iter 12/32 - loss 0.19128596 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:17:00,901 epoch 4 - iter 15/32 - loss 0.17967233 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:17:01,122 epoch 4 - iter 18/32 - loss 0.15689660 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 21:17:01,317 epoch 4 - iter 21/32 - loss 0.15514975 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 21:17:01,529 epoch 4 - iter 24/32 - loss 0.23835477 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 21:17:01,739 epoch 4 - iter 27/32 - loss 0.22885436 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:17:01,930 epoch 4 - iter 30/32 - loss 0.26688155 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:17:02,049 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:02,050 EPOCH 4 done: loss 0.2679 - lr 0.0200000\n",
+      "2021-09-21 21:17:02,207 DEV : loss 0.5861425995826721 - score 0.5\n",
+      "2021-09-21 21:17:02,209 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:17:02,234 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:02,502 epoch 5 - iter 3/32 - loss 0.24044475 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 21:17:02,677 epoch 5 - iter 6/32 - loss 0.15304092 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 21:17:02,869 epoch 5 - iter 9/32 - loss 0.16385225 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 21:17:03,053 epoch 5 - iter 12/32 - loss 0.18229064 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:17:03,244 epoch 5 - iter 15/32 - loss 0.15429842 - samples/sec: 15.71 - lr: 0.020000\n",
+      "2021-09-21 21:17:03,430 epoch 5 - iter 18/32 - loss 0.13182100 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:17:03,635 epoch 5 - iter 21/32 - loss 0.17321951 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 21:17:03,817 epoch 5 - iter 24/32 - loss 0.15591378 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:17:04,029 epoch 5 - iter 27/32 - loss 0.14490251 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 21:17:04,205 epoch 5 - iter 30/32 - loss 0.13568218 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 21:17:04,326 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:58,724 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,909 epoch 1 - iter 3/32 - loss 0.08797081 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,098 epoch 1 - iter 6/32 - loss 0.16358537 - samples/sec: 15.99 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,236 epoch 1 - iter 9/32 - loss 0.11329872 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,432 epoch 1 - iter 12/32 - loss 0.44152697 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,581 epoch 1 - iter 15/32 - loss 0.61449972 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,771 epoch 1 - iter 18/32 - loss 0.64630047 - samples/sec: 15.84 - lr: 0.020000\n",
-      "2021-09-08 01:38:59,917 epoch 1 - iter 21/32 - loss 0.71988388 - samples/sec: 20.67 - lr: 0.020000\n",
-      "2021-09-08 01:39:00,089 epoch 1 - iter 24/32 - loss 0.63541668 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 01:39:00,250 epoch 1 - iter 27/32 - loss 0.70742014 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 01:39:00,407 epoch 1 - iter 30/32 - loss 0.68259694 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 01:39:00,544 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:00,545 EPOCH 1 done: loss 0.6419 - lr 0.0200000\n",
-      "2021-09-08 01:39:00,643 DEV : loss 0.5899181962013245 - score 0.75\n",
-      "2021-09-08 01:39:00,644 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:04,327 EPOCH 5 done: loss 0.1354 - lr 0.0200000\n",
+      "2021-09-21 21:17:04,430 DEV : loss 0.7875989675521851 - score 0.5\n",
+      "2021-09-21 21:17:04,434 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:17:04,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:04,673 epoch 6 - iter 3/32 - loss 0.00291364 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 21:17:04,853 epoch 6 - iter 6/32 - loss 0.07927232 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 21:17:05,041 epoch 6 - iter 9/32 - loss 0.05433768 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 21:17:05,217 epoch 6 - iter 12/32 - loss 0.06324878 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:17:05,406 epoch 6 - iter 15/32 - loss 0.05612651 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:17:05,615 epoch 6 - iter 18/32 - loss 0.04762137 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 21:17:05,798 epoch 6 - iter 21/32 - loss 0.04089475 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:17:05,990 epoch 6 - iter 24/32 - loss 0.03701643 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 21:17:06,182 epoch 6 - iter 27/32 - loss 0.03570627 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 21:17:06,372 epoch 6 - iter 30/32 - loss 0.04988087 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 21:17:06,516 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:06,516 EPOCH 6 done: loss 0.0468 - lr 0.0200000\n",
+      "2021-09-21 21:17:06,635 DEV : loss 0.6560462713241577 - score 0.75\n",
+      "2021-09-21 21:17:06,635 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:39:04,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:05,096 epoch 2 - iter 3/32 - loss 0.27500453 - samples/sec: 13.73 - lr: 0.020000\n",
-      "2021-09-08 01:39:05,281 epoch 2 - iter 6/32 - loss 0.69853774 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 01:39:05,444 epoch 2 - iter 9/32 - loss 0.48012369 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 01:39:05,613 epoch 2 - iter 12/32 - loss 0.36676995 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 01:39:05,803 epoch 2 - iter 15/32 - loss 0.44152798 - samples/sec: 15.82 - lr: 0.020000\n",
-      "2021-09-08 01:39:06,049 epoch 2 - iter 18/32 - loss 0.43731494 - samples/sec: 12.25 - lr: 0.020000\n",
-      "2021-09-08 01:39:06,226 epoch 2 - iter 21/32 - loss 0.41085897 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:39:06,397 epoch 2 - iter 24/32 - loss 0.44385820 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 01:39:06,577 epoch 2 - iter 27/32 - loss 0.46176539 - samples/sec: 16.68 - lr: 0.020000\n",
-      "2021-09-08 01:39:06,762 epoch 2 - iter 30/32 - loss 0.44813231 - samples/sec: 16.29 - lr: 0.020000\n",
-      "2021-09-08 01:39:06,907 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:06,908 EPOCH 2 done: loss 0.4378 - lr 0.0200000\n",
-      "2021-09-08 01:39:07,459 DEV : loss 0.28038230538368225 - score 0.75\n",
-      "2021-09-08 01:39:07,462 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:10,790 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:11,017 epoch 7 - iter 3/32 - loss 0.00662823 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:17:11,223 epoch 7 - iter 6/32 - loss 0.00391942 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 21:17:11,428 epoch 7 - iter 9/32 - loss 0.13599737 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 21:17:11,663 epoch 7 - iter 12/32 - loss 0.11283954 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 21:17:11,874 epoch 7 - iter 15/32 - loss 0.09236464 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 21:17:12,092 epoch 7 - iter 18/32 - loss 0.08033012 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:17:12,305 epoch 7 - iter 21/32 - loss 0.09346098 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 21:17:12,502 epoch 7 - iter 24/32 - loss 0.08250821 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:17:12,707 epoch 7 - iter 27/32 - loss 0.07375510 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 21:17:12,924 epoch 7 - iter 30/32 - loss 0.09560881 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 21:17:13,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:13,072 EPOCH 7 done: loss 0.0960 - lr 0.0200000\n",
+      "2021-09-21 21:17:13,229 DEV : loss 0.41081225872039795 - score 0.75\n",
+      "2021-09-21 21:17:13,230 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:39:11,549 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:11,797 epoch 3 - iter 3/32 - loss 0.06130242 - samples/sec: 13.88 - lr: 0.020000\n",
-      "2021-09-08 01:39:12,019 epoch 3 - iter 6/32 - loss 0.18906354 - samples/sec: 13.56 - lr: 0.020000\n",
-      "2021-09-08 01:39:12,173 epoch 3 - iter 9/32 - loss 0.26077712 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:39:12,355 epoch 3 - iter 12/32 - loss 0.27673759 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 01:39:12,538 epoch 3 - iter 15/32 - loss 0.25021626 - samples/sec: 16.45 - lr: 0.020000\n",
-      "2021-09-08 01:39:12,784 epoch 3 - iter 18/32 - loss 0.26534799 - samples/sec: 12.23 - lr: 0.020000\n",
-      "2021-09-08 01:39:12,967 epoch 3 - iter 21/32 - loss 0.31032934 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:39:13,137 epoch 3 - iter 24/32 - loss 0.27267139 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 01:39:13,313 epoch 3 - iter 27/32 - loss 0.31537547 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:39:13,487 epoch 3 - iter 30/32 - loss 0.28467115 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 01:39:13,614 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:13,615 EPOCH 3 done: loss 0.3077 - lr 0.0200000\n",
-      "2021-09-08 01:39:13,824 DEV : loss 0.17355231940746307 - score 0.75\n",
-      "2021-09-08 01:39:13,827 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:18,699 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:18,939 epoch 8 - iter 3/32 - loss 0.00161496 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 21:17:19,176 epoch 8 - iter 6/32 - loss 0.03580238 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 21:17:19,370 epoch 8 - iter 9/32 - loss 0.02607827 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:17:19,568 epoch 8 - iter 12/32 - loss 0.02001659 - samples/sec: 15.21 - lr: 0.020000\n",
+      "2021-09-21 21:17:19,781 epoch 8 - iter 15/32 - loss 0.01611752 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 21:17:20,000 epoch 8 - iter 18/32 - loss 0.02157182 - samples/sec: 13.73 - lr: 0.020000\n",
+      "2021-09-21 21:17:20,209 epoch 8 - iter 21/32 - loss 0.01954387 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 21:17:20,423 epoch 8 - iter 24/32 - loss 0.01719526 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:17:20,619 epoch 8 - iter 27/32 - loss 0.01554862 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 21:17:20,807 epoch 8 - iter 30/32 - loss 0.01416585 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:17:20,976 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:20,976 EPOCH 8 done: loss 0.0149 - lr 0.0200000\n",
+      "2021-09-21 21:17:21,105 DEV : loss 0.4031255841255188 - score 1.0\n",
+      "2021-09-21 21:17:21,106 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:39:18,684 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:18,941 epoch 4 - iter 3/32 - loss 0.06246408 - samples/sec: 13.05 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,127 epoch 4 - iter 6/32 - loss 0.04308743 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,293 epoch 4 - iter 9/32 - loss 0.03570463 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,453 epoch 4 - iter 12/32 - loss 0.03093646 - samples/sec: 18.78 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,673 epoch 4 - iter 15/32 - loss 0.12141839 - samples/sec: 13.65 - lr: 0.020000\n",
-      "2021-09-08 01:39:19,947 epoch 4 - iter 18/32 - loss 0.10335784 - samples/sec: 11.00 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,138 epoch 4 - iter 21/32 - loss 0.09206273 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,330 epoch 4 - iter 24/32 - loss 0.12909529 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,519 epoch 4 - iter 27/32 - loss 0.11563803 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,739 epoch 4 - iter 30/32 - loss 0.10426263 - samples/sec: 13.64 - lr: 0.020000\n",
-      "2021-09-08 01:39:20,892 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:20,893 EPOCH 4 done: loss 0.0981 - lr 0.0200000\n",
-      "2021-09-08 01:39:21,043 DEV : loss 0.479407399892807 - score 0.75\n",
-      "2021-09-08 01:39:21,044 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:39:21,046 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:21,256 epoch 5 - iter 3/32 - loss 0.00321825 - samples/sec: 16.35 - lr: 0.020000\n",
-      "2021-09-08 01:39:21,416 epoch 5 - iter 6/32 - loss 0.00491238 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 01:39:21,589 epoch 5 - iter 9/32 - loss 0.02037447 - samples/sec: 17.34 - lr: 0.020000\n",
-      "2021-09-08 01:39:21,746 epoch 5 - iter 12/32 - loss 0.01561880 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:39:21,922 epoch 5 - iter 15/32 - loss 0.01316789 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:39:22,098 epoch 5 - iter 18/32 - loss 0.01162133 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 01:39:22,285 epoch 5 - iter 21/32 - loss 0.01012186 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 01:39:22,495 epoch 5 - iter 24/32 - loss 0.03626833 - samples/sec: 14.35 - lr: 0.020000\n",
-      "2021-09-08 01:39:22,844 epoch 5 - iter 27/32 - loss 0.03523878 - samples/sec: 8.60 - lr: 0.020000\n",
-      "2021-09-08 01:39:23,019 epoch 5 - iter 30/32 - loss 0.03336246 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:39:23,159 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:23,160 EPOCH 5 done: loss 0.0322 - lr 0.0200000\n",
-      "2021-09-08 01:39:23,304 DEV : loss 0.6264558434486389 - score 0.75\n",
-      "2021-09-08 01:39:23,308 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:39:23,310 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:23,546 epoch 6 - iter 3/32 - loss 0.00174528 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 01:39:23,745 epoch 6 - iter 6/32 - loss 0.00219853 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:39:23,901 epoch 6 - iter 9/32 - loss 0.00180004 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,082 epoch 6 - iter 12/32 - loss 0.00191919 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,325 epoch 6 - iter 15/32 - loss 0.00219584 - samples/sec: 12.37 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,496 epoch 6 - iter 18/32 - loss 0.00229275 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,680 epoch 6 - iter 21/32 - loss 0.00228221 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:39:24,873 epoch 6 - iter 24/32 - loss 0.00243030 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 01:39:25,113 epoch 6 - iter 27/32 - loss 0.00270245 - samples/sec: 12.50 - lr: 0.020000\n",
-      "2021-09-08 01:39:25,287 epoch 6 - iter 30/32 - loss 0.00254428 - samples/sec: 17.28 - lr: 0.020000\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:39:25,405 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:25,405 EPOCH 6 done: loss 0.0024 - lr 0.0200000\n",
-      "2021-09-08 01:39:25,509 DEV : loss 0.6265194416046143 - score 0.75\n",
-      "2021-09-08 01:39:25,511 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:39:25,513 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:25,717 epoch 7 - iter 3/32 - loss 0.00149569 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 01:39:25,918 epoch 7 - iter 6/32 - loss 0.00145458 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 01:39:26,167 epoch 7 - iter 9/32 - loss 0.00107463 - samples/sec: 12.05 - lr: 0.020000\n",
-      "2021-09-08 01:39:26,342 epoch 7 - iter 12/32 - loss 0.00112112 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 01:39:26,529 epoch 7 - iter 15/32 - loss 0.00140616 - samples/sec: 16.12 - lr: 0.020000\n",
-      "2021-09-08 01:39:26,735 epoch 7 - iter 18/32 - loss 0.00229129 - samples/sec: 14.60 - lr: 0.020000\n",
-      "2021-09-08 01:39:26,904 epoch 7 - iter 21/32 - loss 0.00215296 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 01:39:27,118 epoch 7 - iter 24/32 - loss 0.00438287 - samples/sec: 14.06 - lr: 0.020000\n",
-      "2021-09-08 01:39:27,342 epoch 7 - iter 27/32 - loss 0.00512774 - samples/sec: 13.38 - lr: 0.020000\n",
-      "2021-09-08 01:39:27,533 epoch 7 - iter 30/32 - loss 0.00478599 - samples/sec: 15.77 - lr: 0.020000\n",
-      "2021-09-08 01:39:27,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:27,640 EPOCH 7 done: loss 0.0046 - lr 0.0200000\n",
-      "2021-09-08 01:39:27,746 DEV : loss 0.7019645571708679 - score 0.75\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:39:27,748 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:39:27,751 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:28,009 epoch 8 - iter 3/32 - loss 0.00192687 - samples/sec: 13.63 - lr: 0.010000\n",
-      "2021-09-08 01:39:28,263 epoch 8 - iter 6/32 - loss 0.00156277 - samples/sec: 11.83 - lr: 0.010000\n",
-      "2021-09-08 01:39:28,462 epoch 8 - iter 9/32 - loss 0.00154505 - samples/sec: 15.19 - lr: 0.010000\n",
-      "2021-09-08 01:39:28,619 epoch 8 - iter 12/32 - loss 0.00134438 - samples/sec: 19.09 - lr: 0.010000\n",
-      "2021-09-08 01:39:28,810 epoch 8 - iter 15/32 - loss 0.00238584 - samples/sec: 15.81 - lr: 0.010000\n",
-      "2021-09-08 01:39:29,014 epoch 8 - iter 18/32 - loss 0.00219264 - samples/sec: 14.74 - lr: 0.010000\n",
-      "2021-09-08 01:39:29,252 epoch 8 - iter 21/32 - loss 0.00208553 - samples/sec: 12.61 - lr: 0.010000\n",
-      "2021-09-08 01:39:29,452 epoch 8 - iter 24/32 - loss 0.00194068 - samples/sec: 15.05 - lr: 0.010000\n",
-      "2021-09-08 01:39:29,640 epoch 8 - iter 27/32 - loss 0.00180413 - samples/sec: 16.00 - lr: 0.010000\n",
-      "2021-09-08 01:39:29,819 epoch 8 - iter 30/32 - loss 0.00232986 - samples/sec: 16.85 - lr: 0.010000\n",
-      "2021-09-08 01:39:29,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:29,942 EPOCH 8 done: loss 0.0022 - lr 0.0100000\n",
-      "2021-09-08 01:39:30,062 DEV : loss 0.6983410716056824 - score 0.75\n",
-      "2021-09-08 01:39:30,063 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:39:30,065 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:30,259 epoch 9 - iter 3/32 - loss 0.00053494 - samples/sec: 17.21 - lr: 0.010000\n",
-      "2021-09-08 01:39:30,446 epoch 9 - iter 6/32 - loss 0.00475923 - samples/sec: 16.13 - lr: 0.010000\n",
-      "2021-09-08 01:39:30,638 epoch 9 - iter 9/32 - loss 0.00332904 - samples/sec: 15.70 - lr: 0.010000\n",
-      "2021-09-08 01:39:30,861 epoch 9 - iter 12/32 - loss 0.00280281 - samples/sec: 13.47 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,097 epoch 9 - iter 15/32 - loss 0.00250685 - samples/sec: 12.72 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,268 epoch 9 - iter 18/32 - loss 0.00221320 - samples/sec: 17.67 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,444 epoch 9 - iter 21/32 - loss 0.00206865 - samples/sec: 17.11 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,610 epoch 9 - iter 24/32 - loss 0.00203124 - samples/sec: 18.08 - lr: 0.010000\n",
-      "2021-09-08 01:39:31,794 epoch 9 - iter 27/32 - loss 0.00187913 - samples/sec: 16.42 - lr: 0.010000\n",
-      "2021-09-08 01:39:32,010 epoch 9 - iter 30/32 - loss 0.00197515 - samples/sec: 13.88 - lr: 0.010000\n",
-      "2021-09-08 01:39:32,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:32,151 EPOCH 9 done: loss 0.0019 - lr 0.0100000\n",
-      "2021-09-08 01:39:32,265 DEV : loss 0.719595730304718 - score 0.75\n",
-      "2021-09-08 01:39:32,266 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:39:32,268 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:32,451 epoch 10 - iter 3/32 - loss 0.00114946 - samples/sec: 17.92 - lr: 0.010000\n",
-      "2021-09-08 01:39:32,664 epoch 10 - iter 6/32 - loss 0.00312535 - samples/sec: 14.14 - lr: 0.010000\n",
-      "2021-09-08 01:39:32,912 epoch 10 - iter 9/32 - loss 0.00224459 - samples/sec: 12.14 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,099 epoch 10 - iter 12/32 - loss 0.00200948 - samples/sec: 16.11 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,281 epoch 10 - iter 15/32 - loss 0.00188646 - samples/sec: 16.52 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,453 epoch 10 - iter 18/32 - loss 0.00235177 - samples/sec: 17.47 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,624 epoch 10 - iter 21/32 - loss 0.00208409 - samples/sec: 17.63 - lr: 0.010000\n",
-      "2021-09-08 01:39:33,851 epoch 10 - iter 24/32 - loss 0.00190618 - samples/sec: 13.23 - lr: 0.010000\n",
-      "2021-09-08 01:39:34,047 epoch 10 - iter 27/32 - loss 0.00173001 - samples/sec: 15.41 - lr: 0.010000\n",
-      "2021-09-08 01:39:34,226 epoch 10 - iter 30/32 - loss 0.00159240 - samples/sec: 16.82 - lr: 0.010000\n",
-      "2021-09-08 01:39:34,363 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:34,364 EPOCH 10 done: loss 0.0016 - lr 0.0100000\n",
-      "2021-09-08 01:39:34,478 DEV : loss 0.6386497616767883 - score 0.5\n",
-      "2021-09-08 01:39:34,479 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:39:38,750 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:38,751 Testing using best model ...\n",
-      "2021-09-08 01:39:38,753 loading file temp/best-model.pt\n",
+      "2021-09-21 21:17:25,091 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:25,358 epoch 9 - iter 3/32 - loss 0.00275684 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:17:25,567 epoch 9 - iter 6/32 - loss 0.00231099 - samples/sec: 14.41 - lr: 0.020000\n",
+      "2021-09-21 21:17:25,740 epoch 9 - iter 9/32 - loss 0.00238703 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 21:17:25,956 epoch 9 - iter 12/32 - loss 0.00228089 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:17:26,173 epoch 9 - iter 15/32 - loss 0.00215011 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:17:26,378 epoch 9 - iter 18/32 - loss 0.00200165 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 21:17:26,587 epoch 9 - iter 21/32 - loss 0.00183856 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 21:17:26,785 epoch 9 - iter 24/32 - loss 0.00167418 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 21:17:26,999 epoch 9 - iter 27/32 - loss 0.00353035 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 21:17:27,201 epoch 9 - iter 30/32 - loss 0.00330457 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 21:17:27,350 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:27,351 EPOCH 9 done: loss 0.0032 - lr 0.0200000\n",
+      "2021-09-21 21:17:27,481 DEV : loss 0.6552219390869141 - score 0.75\n",
+      "2021-09-21 21:17:27,481 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:17:27,483 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:27,721 epoch 10 - iter 3/32 - loss 0.00040169 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 21:17:27,922 epoch 10 - iter 6/32 - loss 0.00235641 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 21:17:28,109 epoch 10 - iter 9/32 - loss 0.00165507 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 21:17:28,320 epoch 10 - iter 12/32 - loss 0.00139491 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 21:17:28,479 epoch 10 - iter 15/32 - loss 0.00144730 - samples/sec: 18.88 - lr: 0.020000\n",
+      "2021-09-21 21:17:28,687 epoch 10 - iter 18/32 - loss 0.05041230 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:17:28,874 epoch 10 - iter 21/32 - loss 0.04326646 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 21:17:29,099 epoch 10 - iter 24/32 - loss 0.03805429 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 21:17:29,301 epoch 10 - iter 27/32 - loss 0.03392909 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 21:17:29,486 epoch 10 - iter 30/32 - loss 0.03070913 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 21:17:29,636 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:29,637 EPOCH 10 done: loss 0.0289 - lr 0.0200000\n",
+      "2021-09-21 21:17:29,778 DEV : loss 0.5976288318634033 - score 0.75\n",
+      "2021-09-21 21:17:29,778 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:17:33,887 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:33,888 Testing using best model ...\n",
+      "2021-09-21 21:17:33,889 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:39:43,941 \t1.0\n",
-      "2021-09-08 01:39:43,941 \n",
+      "2021-09-21 21:17:38,747 \t0.75\n",
+      "2021-09-21 21:17:38,747 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.75\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.6667\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                                                           precision    recall  f1-score   support\n",
       "\n",
-      "   emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         0\n",
+      "   emotions experienced when not in a state of well-being     0.5000    1.0000    0.6667         1\n",
       "the optimistic feeling that all is going to turn out well     1.0000    1.0000    1.0000         1\n",
-      "                                 the state of being angry     1.0000    1.0000    1.0000         2\n",
+      "                                 the state of being angry     0.0000    0.0000    0.0000         1\n",
       "                           the emotion of great happiness     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "                                                micro avg     1.0000    1.0000    1.0000         4\n",
-      "                                                macro avg     0.7500    0.7500    0.7500         4\n",
-      "                                             weighted avg     1.0000    1.0000    1.0000         4\n",
-      "                                              samples avg     1.0000    1.0000    1.0000         4\n",
-      "\n",
-      "2021-09-08 01:39:43,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:55,206 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "                                                micro avg     0.7500    0.7500    0.7500         4\n",
+      "                                                macro avg     0.6250    0.7500    0.6667         4\n",
+      "                                             weighted avg     0.6250    0.7500    0.6667         4\n",
+      "                                              samples avg     0.7500    0.7500    0.7500         4\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:17:38,748 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:51,007 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:39:59,234 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:17:55,767 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 42212.73it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 44752.50it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:39:59,237 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 01:39:59,248 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:59,250 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:17:55,770 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
+      "2021-09-21 21:17:55,778 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,780 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7169,25 +7193,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:39:59,250 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:59,251 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:39:59,251 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:59,251 Parameters:\n",
-      "2021-09-08 01:39:59,251  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:39:59,252  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:39:59,252  - patience: \"3\"\n",
-      "2021-09-08 01:39:59,252  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:39:59,253  - max_epochs: \"10\"\n",
-      "2021-09-08 01:39:59,253  - shuffle: \"True\"\n",
-      "2021-09-08 01:39:59,253  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:39:59,253  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:39:59,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:59,254 Model training base path: \"temp\"\n",
-      "2021-09-08 01:39:59,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:59,255 Device: cuda:0\n",
-      "2021-09-08 01:39:59,255 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:59,255 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:39:59,263 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:17:55,781 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,781 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:17:55,781 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,782 Parameters:\n",
+      "2021-09-21 21:17:55,782  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:17:55,782  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:17:55,782  - patience: \"3\"\n",
+      "2021-09-21 21:17:55,783  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:17:55,783  - max_epochs: \"10\"\n",
+      "2021-09-21 21:17:55,783  - shuffle: \"True\"\n",
+      "2021-09-21 21:17:55,784  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:17:55,784  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:17:55,784 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,784 Model training base path: \"temp\"\n",
+      "2021-09-21 21:17:55,785 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,785 Device: cuda:0\n",
+      "2021-09-21 21:17:55,785 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,785 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:17:55,792 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7201,210 +7225,222 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:39:59,444 epoch 1 - iter 3/32 - loss 0.23509853 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 01:39:59,605 epoch 1 - iter 6/32 - loss 0.16075205 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 01:39:59,753 epoch 1 - iter 9/32 - loss 0.49601987 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 01:39:59,944 epoch 1 - iter 12/32 - loss 0.61733919 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 01:40:00,090 epoch 1 - iter 15/32 - loss 0.59231024 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 01:40:00,265 epoch 1 - iter 18/32 - loss 0.59354515 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:40:00,454 epoch 1 - iter 21/32 - loss 0.52473193 - samples/sec: 15.99 - lr: 0.020000\n",
-      "2021-09-08 01:40:00,600 epoch 1 - iter 24/32 - loss 0.47911637 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 01:40:00,801 epoch 1 - iter 27/32 - loss 0.61700412 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 01:40:00,946 epoch 1 - iter 30/32 - loss 0.66137432 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 01:40:01,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:01,055 EPOCH 1 done: loss 0.6463 - lr 0.0200000\n",
-      "2021-09-08 01:40:01,289 DEV : loss 0.9912522435188293 - score 0.5\n",
-      "2021-09-08 01:40:01,290 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:56,093 epoch 1 - iter 3/32 - loss 0.15241034 - samples/sec: 11.95 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,360 epoch 1 - iter 6/32 - loss 0.10080774 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,641 epoch 1 - iter 9/32 - loss 0.26953015 - samples/sec: 10.72 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,928 epoch 1 - iter 12/32 - loss 0.36753742 - samples/sec: 10.47 - lr: 0.020000\n",
+      "2021-09-21 21:17:57,216 epoch 1 - iter 15/32 - loss 0.33124741 - samples/sec: 10.43 - lr: 0.020000\n",
+      "2021-09-21 21:17:57,485 epoch 1 - iter 18/32 - loss 0.44707635 - samples/sec: 11.19 - lr: 0.020000\n",
+      "2021-09-21 21:17:57,758 epoch 1 - iter 21/32 - loss 0.38876515 - samples/sec: 11.03 - lr: 0.020000\n",
+      "2021-09-21 21:17:58,017 epoch 1 - iter 24/32 - loss 0.39913966 - samples/sec: 11.61 - lr: 0.020000\n",
+      "2021-09-21 21:17:58,278 epoch 1 - iter 27/32 - loss 0.51040641 - samples/sec: 11.50 - lr: 0.020000\n",
+      "2021-09-21 21:17:58,540 epoch 1 - iter 30/32 - loss 0.52480194 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:17:58,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:58,719 EPOCH 1 done: loss 0.5151 - lr 0.0200000\n",
+      "2021-09-21 21:17:58,829 DEV : loss 1.3165942430496216 - score 0.25\n",
+      "2021-09-21 21:17:58,829 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:40:05,706 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:05,934 epoch 2 - iter 3/32 - loss 1.41349445 - samples/sec: 14.79 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,164 epoch 2 - iter 6/32 - loss 1.17405031 - samples/sec: 13.06 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,358 epoch 2 - iter 9/32 - loss 0.85653469 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,552 epoch 2 - iter 12/32 - loss 0.67764450 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,742 epoch 2 - iter 15/32 - loss 0.68898043 - samples/sec: 15.83 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,955 epoch 2 - iter 18/32 - loss 0.63050590 - samples/sec: 14.12 - lr: 0.020000\n",
-      "2021-09-08 01:40:07,130 epoch 2 - iter 21/32 - loss 0.62969217 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 01:40:07,386 epoch 2 - iter 24/32 - loss 0.63150385 - samples/sec: 11.75 - lr: 0.020000\n",
-      "2021-09-08 01:40:07,585 epoch 2 - iter 27/32 - loss 0.63025250 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 01:40:07,786 epoch 2 - iter 30/32 - loss 0.58971762 - samples/sec: 14.94 - lr: 0.020000\n",
-      "2021-09-08 01:40:07,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:07,943 EPOCH 2 done: loss 0.6153 - lr 0.0200000\n",
-      "2021-09-08 01:40:08,050 DEV : loss 0.3997902572154999 - score 0.75\n",
-      "2021-09-08 01:40:08,050 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:18:13,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:13,508 epoch 2 - iter 3/32 - loss 0.07153117 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 21:18:13,724 epoch 2 - iter 6/32 - loss 0.22817033 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 21:18:13,949 epoch 2 - iter 9/32 - loss 0.73650109 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:18:14,158 epoch 2 - iter 12/32 - loss 0.90652082 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 21:18:14,373 epoch 2 - iter 15/32 - loss 0.93170223 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:18:14,588 epoch 2 - iter 18/32 - loss 0.90499772 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 21:18:14,788 epoch 2 - iter 21/32 - loss 0.86502572 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 21:18:15,010 epoch 2 - iter 24/32 - loss 0.84416135 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 21:18:15,214 epoch 2 - iter 27/32 - loss 0.81701094 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 21:18:15,437 epoch 2 - iter 30/32 - loss 0.76391647 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 21:18:15,583 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:15,583 EPOCH 2 done: loss 0.7302 - lr 0.0200000\n",
+      "2021-09-21 21:18:15,736 DEV : loss 1.0640723705291748 - score 0.0\n",
+      "2021-09-21 21:18:15,737 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:18:15,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:15,992 epoch 3 - iter 3/32 - loss 1.19235176 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 21:18:16,200 epoch 3 - iter 6/32 - loss 0.78291192 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 21:18:16,402 epoch 3 - iter 9/32 - loss 0.62004635 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 21:18:16,611 epoch 3 - iter 12/32 - loss 0.67644488 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 21:18:16,825 epoch 3 - iter 15/32 - loss 0.60917349 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 21:18:17,055 epoch 3 - iter 18/32 - loss 0.57868824 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 21:18:17,260 epoch 3 - iter 21/32 - loss 0.54880550 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 21:18:17,499 epoch 3 - iter 24/32 - loss 0.57465518 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 21:18:17,697 epoch 3 - iter 27/32 - loss 0.53315820 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:18:17,896 epoch 3 - iter 30/32 - loss 0.50591902 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 21:18:18,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:18,021 EPOCH 3 done: loss 0.4768 - lr 0.0200000\n",
+      "2021-09-21 21:18:18,143 DEV : loss 0.782433271408081 - score 0.25\n",
+      "2021-09-21 21:18:18,144 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:40:12,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:12,474 epoch 3 - iter 3/32 - loss 0.02998257 - samples/sec: 14.35 - lr: 0.020000\n",
-      "2021-09-08 01:40:12,704 epoch 3 - iter 6/32 - loss 0.06473235 - samples/sec: 13.09 - lr: 0.020000\n",
-      "2021-09-08 01:40:12,906 epoch 3 - iter 9/32 - loss 0.28950175 - samples/sec: 14.92 - lr: 0.020000\n",
-      "2021-09-08 01:40:13,097 epoch 3 - iter 12/32 - loss 0.29098044 - samples/sec: 15.74 - lr: 0.020000\n",
-      "2021-09-08 01:40:13,306 epoch 3 - iter 15/32 - loss 0.35179045 - samples/sec: 14.45 - lr: 0.020000\n",
-      "2021-09-08 01:40:13,503 epoch 3 - iter 18/32 - loss 0.38971771 - samples/sec: 15.27 - lr: 0.020000\n",
-      "2021-09-08 01:40:13,713 epoch 3 - iter 21/32 - loss 0.34241363 - samples/sec: 14.31 - lr: 0.020000\n",
-      "2021-09-08 01:40:13,991 epoch 3 - iter 24/32 - loss 0.30505702 - samples/sec: 10.83 - lr: 0.020000\n",
-      "2021-09-08 01:40:14,179 epoch 3 - iter 27/32 - loss 0.29311106 - samples/sec: 16.00 - lr: 0.020000\n",
-      "2021-09-08 01:40:14,373 epoch 3 - iter 30/32 - loss 0.26526228 - samples/sec: 15.45 - lr: 0.020000\n",
-      "2021-09-08 01:40:14,490 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:14,490 EPOCH 3 done: loss 0.2505 - lr 0.0200000\n",
-      "2021-09-08 01:40:14,610 DEV : loss 0.38142868876457214 - score 0.75\n",
-      "2021-09-08 01:40:14,611 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:18:22,511 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:22,753 epoch 4 - iter 3/32 - loss 0.04362089 - samples/sec: 13.43 - lr: 0.020000\n",
+      "2021-09-21 21:18:23,021 epoch 4 - iter 6/32 - loss 0.15870032 - samples/sec: 11.19 - lr: 0.020000\n",
+      "2021-09-21 21:18:23,238 epoch 4 - iter 9/32 - loss 0.12989619 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 21:18:23,492 epoch 4 - iter 12/32 - loss 0.11065025 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 21:18:23,716 epoch 4 - iter 15/32 - loss 0.14314296 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:18:23,946 epoch 4 - iter 18/32 - loss 0.13156621 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 21:18:24,172 epoch 4 - iter 21/32 - loss 0.12153529 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:18:24,417 epoch 4 - iter 24/32 - loss 0.15689907 - samples/sec: 12.27 - lr: 0.020000\n",
+      "2021-09-21 21:18:24,668 epoch 4 - iter 27/32 - loss 0.14799005 - samples/sec: 11.98 - lr: 0.020000\n",
+      "2021-09-21 21:18:24,891 epoch 4 - iter 30/32 - loss 0.16769550 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 21:18:25,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:25,061 EPOCH 4 done: loss 0.1761 - lr 0.0200000\n",
+      "2021-09-21 21:18:25,218 DEV : loss 0.24851536750793457 - score 1.0\n",
+      "2021-09-21 21:18:25,219 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:40:18,669 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:18,878 epoch 4 - iter 3/32 - loss 0.12214667 - samples/sec: 15.64 - lr: 0.020000\n",
-      "2021-09-08 01:40:19,059 epoch 4 - iter 6/32 - loss 0.06381015 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 01:40:19,224 epoch 4 - iter 9/32 - loss 0.04533817 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 01:40:19,417 epoch 4 - iter 12/32 - loss 0.14597599 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 01:40:19,630 epoch 4 - iter 15/32 - loss 0.14980515 - samples/sec: 14.14 - lr: 0.020000\n",
-      "2021-09-08 01:40:19,885 epoch 4 - iter 18/32 - loss 0.13233806 - samples/sec: 11.79 - lr: 0.020000\n",
-      "2021-09-08 01:40:20,068 epoch 4 - iter 21/32 - loss 0.12279609 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 01:40:20,260 epoch 4 - iter 24/32 - loss 0.11047583 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 01:40:20,439 epoch 4 - iter 27/32 - loss 0.10362918 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 01:40:20,642 epoch 4 - iter 30/32 - loss 0.09399671 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 01:40:20,788 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:20,788 EPOCH 4 done: loss 0.0883 - lr 0.0200000\n",
-      "2021-09-08 01:40:20,938 DEV : loss 0.43621063232421875 - score 0.75\n",
-      "2021-09-08 01:40:20,940 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:40:20,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:21,210 epoch 5 - iter 3/32 - loss 0.02224571 - samples/sec: 14.05 - lr: 0.020000\n",
-      "2021-09-08 01:40:21,400 epoch 5 - iter 6/32 - loss 0.12520063 - samples/sec: 15.83 - lr: 0.020000\n",
-      "2021-09-08 01:40:21,571 epoch 5 - iter 9/32 - loss 0.08438991 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 01:40:21,763 epoch 5 - iter 12/32 - loss 0.06356564 - samples/sec: 15.68 - lr: 0.020000\n",
-      "2021-09-08 01:40:21,955 epoch 5 - iter 15/32 - loss 0.05148206 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:40:22,154 epoch 5 - iter 18/32 - loss 0.04339579 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:40:22,377 epoch 5 - iter 21/32 - loss 0.04248332 - samples/sec: 13.48 - lr: 0.020000\n",
-      "2021-09-08 01:40:22,652 epoch 5 - iter 24/32 - loss 0.09599469 - samples/sec: 10.96 - lr: 0.020000\n",
-      "2021-09-08 01:40:22,821 epoch 5 - iter 27/32 - loss 0.10070130 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 01:40:23,001 epoch 5 - iter 30/32 - loss 0.09089553 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 01:40:23,139 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:23,139 EPOCH 5 done: loss 0.0859 - lr 0.0200000\n",
-      "2021-09-08 01:40:23,222 DEV : loss 0.19463053345680237 - score 1.0\n",
-      "2021-09-08 01:40:23,223 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:40:27,238 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:27,525 epoch 6 - iter 3/32 - loss 0.01000957 - samples/sec: 12.33 - lr: 0.020000\n",
-      "2021-09-08 01:40:27,701 epoch 6 - iter 6/32 - loss 0.01620824 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:40:27,902 epoch 6 - iter 9/32 - loss 0.16166320 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 01:40:28,093 epoch 6 - iter 12/32 - loss 0.12197813 - samples/sec: 15.73 - lr: 0.020000\n",
-      "2021-09-08 01:40:28,279 epoch 6 - iter 15/32 - loss 0.09790677 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 01:40:28,477 epoch 6 - iter 18/32 - loss 0.08178817 - samples/sec: 15.18 - lr: 0.020000\n",
-      "2021-09-08 01:40:28,658 epoch 6 - iter 21/32 - loss 0.07021715 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 01:40:28,879 epoch 6 - iter 24/32 - loss 0.06166178 - samples/sec: 13.62 - lr: 0.020000\n",
-      "2021-09-08 01:40:29,124 epoch 6 - iter 27/32 - loss 0.05500733 - samples/sec: 12.26 - lr: 0.020000\n",
-      "2021-09-08 01:40:29,300 epoch 6 - iter 30/32 - loss 0.04960567 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 01:40:29,418 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:18:29,300 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:29,586 epoch 5 - iter 3/32 - loss 0.00268572 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:18:29,818 epoch 5 - iter 6/32 - loss 0.01639540 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 21:18:30,071 epoch 5 - iter 9/32 - loss 0.24718413 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 21:18:30,317 epoch 5 - iter 12/32 - loss 0.26729820 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:18:30,566 epoch 5 - iter 15/32 - loss 0.26416300 - samples/sec: 12.09 - lr: 0.020000\n",
+      "2021-09-21 21:18:30,824 epoch 5 - iter 18/32 - loss 0.22353795 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:18:31,070 epoch 5 - iter 21/32 - loss 0.24723713 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 21:18:31,319 epoch 5 - iter 24/32 - loss 0.24303305 - samples/sec: 12.05 - lr: 0.020000\n",
+      "2021-09-21 21:18:31,570 epoch 5 - iter 27/32 - loss 0.22346695 - samples/sec: 11.99 - lr: 0.020000\n",
+      "2021-09-21 21:18:31,801 epoch 5 - iter 30/32 - loss 0.21369535 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 21:18:31,940 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:31,941 EPOCH 5 done: loss 0.2090 - lr 0.0200000\n",
+      "2021-09-21 21:18:34,766 DEV : loss 0.7084013223648071 - score 0.5\n",
+      "2021-09-21 21:18:34,767 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:18:34,772 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:34,988 epoch 6 - iter 3/32 - loss 0.10075874 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 21:18:35,199 epoch 6 - iter 6/32 - loss 0.05715195 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 21:18:35,420 epoch 6 - iter 9/32 - loss 0.04049913 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 21:18:35,625 epoch 6 - iter 12/32 - loss 0.03514999 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 21:18:35,862 epoch 6 - iter 15/32 - loss 0.06440344 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 21:18:36,093 epoch 6 - iter 18/32 - loss 0.05542695 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 21:18:36,315 epoch 6 - iter 21/32 - loss 0.07591798 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 21:18:36,519 epoch 6 - iter 24/32 - loss 0.10527222 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 21:18:36,750 epoch 6 - iter 27/32 - loss 0.09409830 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 21:18:36,962 epoch 6 - iter 30/32 - loss 0.08611601 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:18:37,107 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:40:29,418 EPOCH 6 done: loss 0.0480 - lr 0.0200000\n",
-      "2021-09-08 01:40:29,525 DEV : loss 0.3471345901489258 - score 0.75\n",
-      "2021-09-08 01:40:29,526 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:40:29,528 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:29,747 epoch 7 - iter 3/32 - loss 0.00161477 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 01:40:29,976 epoch 7 - iter 6/32 - loss 0.00335635 - samples/sec: 13.12 - lr: 0.020000\n",
-      "2021-09-08 01:40:30,188 epoch 7 - iter 9/32 - loss 0.00257427 - samples/sec: 14.22 - lr: 0.020000\n",
-      "2021-09-08 01:40:30,370 epoch 7 - iter 12/32 - loss 0.00234705 - samples/sec: 16.52 - lr: 0.020000\n",
-      "2021-09-08 01:40:30,561 epoch 7 - iter 15/32 - loss 0.00193449 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 01:40:30,750 epoch 7 - iter 18/32 - loss 0.00179316 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 01:40:30,932 epoch 7 - iter 21/32 - loss 0.00188039 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 01:40:31,132 epoch 7 - iter 24/32 - loss 0.00189376 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:40:31,375 epoch 7 - iter 27/32 - loss 0.00173776 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 01:40:31,559 epoch 7 - iter 30/32 - loss 0.00170478 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 01:40:31,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:31,691 EPOCH 7 done: loss 0.0017 - lr 0.0200000\n",
-      "2021-09-08 01:40:31,783 DEV : loss 0.24884435534477234 - score 1.0\n",
-      "2021-09-08 01:40:31,785 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:40:31,787 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:31,999 epoch 8 - iter 3/32 - loss 0.00226845 - samples/sec: 17.02 - lr: 0.020000\n",
-      "2021-09-08 01:40:32,201 epoch 8 - iter 6/32 - loss 0.00143895 - samples/sec: 14.85 - lr: 0.020000\n",
-      "2021-09-08 01:40:32,474 epoch 8 - iter 9/32 - loss 0.20560948 - samples/sec: 11.02 - lr: 0.020000\n",
-      "2021-09-08 01:40:32,662 epoch 8 - iter 12/32 - loss 0.15448483 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 01:40:32,843 epoch 8 - iter 15/32 - loss 0.12383489 - samples/sec: 16.69 - lr: 0.020000\n",
-      "2021-09-08 01:40:33,012 epoch 8 - iter 18/32 - loss 0.10335083 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:40:33,194 epoch 8 - iter 21/32 - loss 0.10711542 - samples/sec: 16.45 - lr: 0.020000\n",
-      "2021-09-08 01:40:33,420 epoch 8 - iter 24/32 - loss 0.09381987 - samples/sec: 13.31 - lr: 0.020000\n",
-      "2021-09-08 01:40:33,640 epoch 8 - iter 27/32 - loss 0.08351227 - samples/sec: 13.69 - lr: 0.020000\n",
-      "2021-09-08 01:40:33,863 epoch 8 - iter 30/32 - loss 0.07541217 - samples/sec: 13.48 - lr: 0.020000\n",
-      "2021-09-08 01:40:33,984 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:33,984 EPOCH 8 done: loss 0.0707 - lr 0.0200000\n",
-      "2021-09-08 01:40:34,089 DEV : loss 0.38110464811325073 - score 0.75\n",
-      "2021-09-08 01:40:34,089 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:40:34,091 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:34,284 epoch 9 - iter 3/32 - loss 0.00711966 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 01:40:34,468 epoch 9 - iter 6/32 - loss 0.00479265 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 01:40:34,695 epoch 9 - iter 9/32 - loss 0.00328003 - samples/sec: 13.23 - lr: 0.020000\n",
-      "2021-09-08 01:40:34,898 epoch 9 - iter 12/32 - loss 0.00274534 - samples/sec: 14.83 - lr: 0.020000\n",
-      "2021-09-08 01:40:35,094 epoch 9 - iter 15/32 - loss 0.01350791 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 01:40:35,263 epoch 9 - iter 18/32 - loss 0.01130377 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 01:40:35,442 epoch 9 - iter 21/32 - loss 0.00979415 - samples/sec: 16.78 - lr: 0.020000\n",
-      "2021-09-08 01:40:35,638 epoch 9 - iter 24/32 - loss 0.00877910 - samples/sec: 15.35 - lr: 0.020000\n",
-      "2021-09-08 01:40:35,893 epoch 9 - iter 27/32 - loss 0.00792600 - samples/sec: 11.82 - lr: 0.020000\n",
-      "2021-09-08 01:40:36,083 epoch 9 - iter 30/32 - loss 0.00718324 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 01:40:36,203 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:36,204 EPOCH 9 done: loss 0.0068 - lr 0.0200000\n",
-      "2021-09-08 01:40:36,323 DEV : loss 0.4194389283657074 - score 0.75\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:40:36,324 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:40:36,326 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:36,558 epoch 10 - iter 3/32 - loss 0.00228842 - samples/sec: 14.97 - lr: 0.010000\n",
-      "2021-09-08 01:40:36,752 epoch 10 - iter 6/32 - loss 0.00140715 - samples/sec: 15.50 - lr: 0.010000\n",
-      "2021-09-08 01:40:36,960 epoch 10 - iter 9/32 - loss 0.00108428 - samples/sec: 14.46 - lr: 0.010000\n",
-      "2021-09-08 01:40:37,188 epoch 10 - iter 12/32 - loss 0.00094052 - samples/sec: 13.24 - lr: 0.010000\n",
-      "2021-09-08 01:40:37,361 epoch 10 - iter 15/32 - loss 0.00084903 - samples/sec: 17.33 - lr: 0.010000\n",
-      "2021-09-08 01:40:37,552 epoch 10 - iter 18/32 - loss 0.00086264 - samples/sec: 15.78 - lr: 0.010000\n",
-      "2021-09-08 01:40:37,735 epoch 10 - iter 21/32 - loss 0.00079862 - samples/sec: 16.47 - lr: 0.010000\n",
-      "2021-09-08 01:40:37,922 epoch 10 - iter 24/32 - loss 0.00090538 - samples/sec: 16.05 - lr: 0.010000\n",
-      "2021-09-08 01:40:38,111 epoch 10 - iter 27/32 - loss 0.00096667 - samples/sec: 15.93 - lr: 0.010000\n",
-      "2021-09-08 01:40:38,317 epoch 10 - iter 30/32 - loss 0.00095196 - samples/sec: 14.59 - lr: 0.010000\n",
-      "2021-09-08 01:40:38,438 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:38,438 EPOCH 10 done: loss 0.0236 - lr 0.0100000\n",
-      "2021-09-08 01:40:38,505 DEV : loss 0.44921645522117615 - score 1.0\n",
-      "2021-09-08 01:40:38,505 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:40:46,288 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:46,312 Testing using best model ...\n",
-      "2021-09-08 01:40:46,336 loading file temp/best-model.pt\n",
+      "2021-09-21 21:18:37,108 EPOCH 6 done: loss 0.0810 - lr 0.0200000\n",
+      "2021-09-21 21:18:37,249 DEV : loss 0.3946777880191803 - score 0.75\n",
+      "2021-09-21 21:18:37,250 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:18:37,252 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:37,465 epoch 7 - iter 3/32 - loss 0.00101647 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 21:18:37,695 epoch 7 - iter 6/32 - loss 0.06239480 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:18:37,890 epoch 7 - iter 9/32 - loss 0.09027460 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 21:18:38,112 epoch 7 - iter 12/32 - loss 0.06803306 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 21:18:38,332 epoch 7 - iter 15/32 - loss 0.05482383 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 21:18:38,534 epoch 7 - iter 18/32 - loss 0.04592164 - samples/sec: 14.94 - lr: 0.020000\n",
+      "2021-09-21 21:18:38,777 epoch 7 - iter 21/32 - loss 0.04996986 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 21:18:38,973 epoch 7 - iter 24/32 - loss 0.04458495 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 21:18:39,222 epoch 7 - iter 27/32 - loss 0.04093982 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 21:18:39,491 epoch 7 - iter 30/32 - loss 0.03719119 - samples/sec: 11.19 - lr: 0.020000\n",
+      "2021-09-21 21:18:39,649 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:39,649 EPOCH 7 done: loss 0.0351 - lr 0.0200000\n",
+      "2021-09-21 21:18:39,829 DEV : loss 1.1225965023040771 - score 0.5\n",
+      "2021-09-21 21:18:39,830 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:18:39,832 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:40,127 epoch 8 - iter 3/32 - loss 0.00503032 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:18:40,400 epoch 8 - iter 6/32 - loss 0.00833471 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 21:18:40,678 epoch 8 - iter 9/32 - loss 0.00579947 - samples/sec: 10.81 - lr: 0.020000\n",
+      "2021-09-21 21:18:40,975 epoch 8 - iter 12/32 - loss 0.00500087 - samples/sec: 10.12 - lr: 0.020000\n",
+      "2021-09-21 21:18:41,235 epoch 8 - iter 15/32 - loss 0.00632866 - samples/sec: 11.57 - lr: 0.020000\n",
+      "2021-09-21 21:18:41,518 epoch 8 - iter 18/32 - loss 0.00554399 - samples/sec: 10.59 - lr: 0.020000\n",
+      "2021-09-21 21:18:41,765 epoch 8 - iter 21/32 - loss 0.00476434 - samples/sec: 12.21 - lr: 0.020000\n",
+      "2021-09-21 21:18:42,068 epoch 8 - iter 24/32 - loss 0.00476800 - samples/sec: 9.92 - lr: 0.020000\n",
+      "2021-09-21 21:18:42,344 epoch 8 - iter 27/32 - loss 0.00444320 - samples/sec: 10.88 - lr: 0.020000\n",
+      "2021-09-21 21:18:42,598 epoch 8 - iter 30/32 - loss 0.02312112 - samples/sec: 11.84 - lr: 0.020000\n",
+      "2021-09-21 21:18:42,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:42,808 EPOCH 8 done: loss 0.0274 - lr 0.0200000\n",
+      "2021-09-21 21:18:42,992 DEV : loss 0.5910643339157104 - score 0.75\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:18:42,992 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:18:42,994 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:43,338 epoch 9 - iter 3/32 - loss 0.00141305 - samples/sec: 10.73 - lr: 0.010000\n",
+      "2021-09-21 21:18:43,623 epoch 9 - iter 6/32 - loss 0.00617909 - samples/sec: 10.54 - lr: 0.010000\n",
+      "2021-09-21 21:18:43,890 epoch 9 - iter 9/32 - loss 0.00529636 - samples/sec: 11.26 - lr: 0.010000\n",
+      "2021-09-21 21:18:44,162 epoch 9 - iter 12/32 - loss 0.00407271 - samples/sec: 11.06 - lr: 0.010000\n",
+      "2021-09-21 21:18:44,420 epoch 9 - iter 15/32 - loss 0.01899125 - samples/sec: 11.65 - lr: 0.010000\n",
+      "2021-09-21 21:18:44,709 epoch 9 - iter 18/32 - loss 0.01667940 - samples/sec: 10.41 - lr: 0.010000\n",
+      "2021-09-21 21:18:44,967 epoch 9 - iter 21/32 - loss 0.01435896 - samples/sec: 11.65 - lr: 0.010000\n",
+      "2021-09-21 21:18:45,225 epoch 9 - iter 24/32 - loss 0.01262328 - samples/sec: 11.65 - lr: 0.010000\n",
+      "2021-09-21 21:18:45,520 epoch 9 - iter 27/32 - loss 0.01135455 - samples/sec: 10.19 - lr: 0.010000\n",
+      "2021-09-21 21:18:45,781 epoch 9 - iter 30/32 - loss 0.01033865 - samples/sec: 11.52 - lr: 0.010000\n",
+      "2021-09-21 21:18:45,918 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:45,918 EPOCH 9 done: loss 0.0099 - lr 0.0100000\n",
+      "2021-09-21 21:18:46,029 DEV : loss 0.822402834892273 - score 0.5\n",
+      "2021-09-21 21:18:46,030 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:18:46,032 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:46,306 epoch 10 - iter 3/32 - loss 0.00174855 - samples/sec: 12.88 - lr: 0.010000\n",
+      "2021-09-21 21:18:46,514 epoch 10 - iter 6/32 - loss 0.00149087 - samples/sec: 14.46 - lr: 0.010000\n",
+      "2021-09-21 21:18:46,749 epoch 10 - iter 9/32 - loss 0.00114722 - samples/sec: 12.81 - lr: 0.010000\n",
+      "2021-09-21 21:18:46,945 epoch 10 - iter 12/32 - loss 0.00095172 - samples/sec: 15.40 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,141 epoch 10 - iter 15/32 - loss 0.00087243 - samples/sec: 15.33 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,348 epoch 10 - iter 18/32 - loss 0.00097785 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,573 epoch 10 - iter 21/32 - loss 0.00111518 - samples/sec: 13.37 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,798 epoch 10 - iter 24/32 - loss 0.00149757 - samples/sec: 13.36 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,993 epoch 10 - iter 27/32 - loss 0.00135709 - samples/sec: 15.44 - lr: 0.010000\n",
+      "2021-09-21 21:18:48,190 epoch 10 - iter 30/32 - loss 0.00127222 - samples/sec: 15.32 - lr: 0.010000\n",
+      "2021-09-21 21:18:48,331 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:48,331 EPOCH 10 done: loss 0.0012 - lr 0.0100000\n",
+      "2021-09-21 21:18:48,559 DEV : loss 0.7030301690101624 - score 0.75\n",
+      "2021-09-21 21:18:48,560 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:18:52,673 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:52,674 Testing using best model ...\n",
+      "2021-09-21 21:18:52,675 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:40:56,166 \t0.75\n",
-      "2021-09-08 01:40:56,166 \n",
+      "2021-09-21 21:18:57,836 \t0.5\n",
+      "2021-09-21 21:18:57,836 \n",
       "Results:\n",
-      "- F-score (micro) 0.75\n",
-      "- F-score (macro) 0.6667\n",
-      "- Accuracy 0.75\n",
+      "- F-score (micro) 0.5\n",
+      "- F-score (macro) 0.4167\n",
+      "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
       "                                                           precision    recall  f1-score   support\n",
       "\n",
-      "   emotions experienced when not in a state of well-being     1.0000    1.0000    1.0000         1\n",
-      "the optimistic feeling that all is going to turn out well     0.0000    0.0000    0.0000         1\n",
-      "                                 the state of being angry     0.5000    1.0000    0.6667         1\n",
-      "                           the emotion of great happiness     1.0000    1.0000    1.0000         1\n",
+      "   emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         1\n",
+      "the optimistic feeling that all is going to turn out well     0.0000    0.0000    0.0000         0\n",
+      "                                 the state of being angry     1.0000    1.0000    1.0000         1\n",
+      "                           the emotion of great happiness     1.0000    0.5000    0.6667         2\n",
       "\n",
-      "                                                micro avg     0.7500    0.7500    0.7500         4\n",
-      "                                                macro avg     0.6250    0.7500    0.6667         4\n",
-      "                                             weighted avg     0.6250    0.7500    0.6667         4\n",
-      "                                              samples avg     0.7500    0.7500    0.7500         4\n",
+      "                                                micro avg     0.5000    0.5000    0.5000         4\n",
+      "                                                macro avg     0.5000    0.3750    0.4167         4\n",
+      "                                             weighted avg     0.7500    0.5000    0.5833         4\n",
+      "                                              samples avg     0.5000    0.5000    0.5000         4\n",
       "\n",
-      "2021-09-08 01:40:56,167 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:09,565 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 21:18:57,837 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:04,919 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:41:13,818 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:19:08,886 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 36/36 [00:00<00:00, 45811.57it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:19:08,889 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 32388.45it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:41:13,823 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 01:41:13,837 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:13,839 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:19:13,725 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:13,727 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7717,203 +7753,191 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:41:13,840 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:13,840 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:41:13,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:13,841 Parameters:\n",
-      "2021-09-08 01:41:13,841  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:41:13,842  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:41:13,842  - patience: \"3\"\n",
-      "2021-09-08 01:41:13,842  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:41:13,842  - max_epochs: \"10\"\n",
-      "2021-09-08 01:41:13,843  - shuffle: \"True\"\n",
-      "2021-09-08 01:41:13,843  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:41:13,843  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:41:13,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:13,844 Model training base path: \"temp\"\n",
-      "2021-09-08 01:41:13,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:13,844 Device: cuda:0\n",
-      "2021-09-08 01:41:13,845 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:13,845 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:41:13,851 ----------------------------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:41:14,092 epoch 1 - iter 3/32 - loss 0.34047478 - samples/sec: 15.19 - lr: 0.020000\n",
-      "2021-09-08 01:41:14,291 epoch 1 - iter 6/32 - loss 0.18745040 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 01:41:14,525 epoch 1 - iter 9/32 - loss 0.13130198 - samples/sec: 12.84 - lr: 0.020000\n",
-      "2021-09-08 01:41:14,730 epoch 1 - iter 12/32 - loss 0.67518463 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 01:41:14,950 epoch 1 - iter 15/32 - loss 0.75458114 - samples/sec: 13.69 - lr: 0.020000\n",
-      "2021-09-08 01:41:15,172 epoch 1 - iter 18/32 - loss 0.65760783 - samples/sec: 13.55 - lr: 0.020000\n",
-      "2021-09-08 01:41:15,396 epoch 1 - iter 21/32 - loss 0.79086081 - samples/sec: 13.40 - lr: 0.020000\n",
-      "2021-09-08 01:41:15,597 epoch 1 - iter 24/32 - loss 0.74015785 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 01:41:15,817 epoch 1 - iter 27/32 - loss 0.78950802 - samples/sec: 13.70 - lr: 0.020000\n",
-      "2021-09-08 01:41:16,010 epoch 1 - iter 30/32 - loss 0.84201942 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 01:41:16,148 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:16,149 EPOCH 1 done: loss 0.8203 - lr 0.0200000\n",
-      "2021-09-08 01:41:16,273 DEV : loss 0.5618022680282593 - score 0.5\n",
-      "2021-09-08 01:41:16,274 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:19:13,728 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:13,728 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:19:13,729 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:13,729 Parameters:\n",
+      "2021-09-21 21:19:13,729  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:19:13,729  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:19:13,730  - patience: \"3\"\n",
+      "2021-09-21 21:19:13,730  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:19:13,730  - max_epochs: \"10\"\n",
+      "2021-09-21 21:19:13,730  - shuffle: \"True\"\n",
+      "2021-09-21 21:19:13,731  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:19:13,731  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:19:13,731 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:13,732 Model training base path: \"temp\"\n",
+      "2021-09-21 21:19:13,732 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:13,732 Device: cuda:0\n",
+      "2021-09-21 21:19:13,732 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:13,733 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:19:13,802 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:14,025 epoch 1 - iter 3/32 - loss 0.09334417 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:19:14,305 epoch 1 - iter 6/32 - loss 0.20574303 - samples/sec: 10.73 - lr: 0.020000\n",
+      "2021-09-21 21:19:14,573 epoch 1 - iter 9/32 - loss 0.61747514 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 21:19:14,871 epoch 1 - iter 12/32 - loss 0.51528499 - samples/sec: 10.10 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,100 epoch 1 - iter 15/32 - loss 0.58402529 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,400 epoch 1 - iter 18/32 - loss 0.50766692 - samples/sec: 10.02 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,669 epoch 1 - iter 21/32 - loss 0.43675446 - samples/sec: 11.14 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,974 epoch 1 - iter 24/32 - loss 0.52729248 - samples/sec: 9.86 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,239 epoch 1 - iter 27/32 - loss 0.51116968 - samples/sec: 11.35 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,489 epoch 1 - iter 30/32 - loss 0.50470032 - samples/sec: 12.06 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,669 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:16,670 EPOCH 1 done: loss 0.5051 - lr 0.0200000\n",
+      "2021-09-21 21:19:16,966 DEV : loss 0.4352363348007202 - score 0.75\n",
+      "2021-09-21 21:19:16,966 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:41:20,548 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:20,788 epoch 2 - iter 3/32 - loss 1.49846128 - samples/sec: 14.47 - lr: 0.020000\n",
-      "2021-09-08 01:41:20,952 epoch 2 - iter 6/32 - loss 1.11205648 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 01:41:21,138 epoch 2 - iter 9/32 - loss 0.85458686 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 01:41:21,324 epoch 2 - iter 12/32 - loss 0.76878354 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 01:41:21,479 epoch 2 - iter 15/32 - loss 0.71447345 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 01:41:21,633 epoch 2 - iter 18/32 - loss 0.62451594 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 01:41:21,814 epoch 2 - iter 21/32 - loss 0.54182614 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 01:41:22,015 epoch 2 - iter 24/32 - loss 0.51863881 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 01:41:22,203 epoch 2 - iter 27/32 - loss 0.58589384 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 01:41:22,387 epoch 2 - iter 30/32 - loss 0.58707372 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:41:22,494 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:22,494 EPOCH 2 done: loss 0.5538 - lr 0.0200000\n",
-      "2021-09-08 01:41:22,620 DEV : loss 0.9523144960403442 - score 0.5\n",
-      "2021-09-08 01:41:22,622 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:41:22,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:22,826 epoch 3 - iter 3/32 - loss 0.62478455 - samples/sec: 16.16 - lr: 0.020000\n",
-      "2021-09-08 01:41:23,004 epoch 3 - iter 6/32 - loss 0.74420801 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 01:41:23,170 epoch 3 - iter 9/32 - loss 0.56011487 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 01:41:23,336 epoch 3 - iter 12/32 - loss 0.44260020 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 01:41:23,521 epoch 3 - iter 15/32 - loss 0.49547031 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 01:41:23,726 epoch 3 - iter 18/32 - loss 0.44025993 - samples/sec: 14.71 - lr: 0.020000\n",
-      "2021-09-08 01:41:23,913 epoch 3 - iter 21/32 - loss 0.41165417 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:41:24,089 epoch 3 - iter 24/32 - loss 0.46339771 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 01:41:24,280 epoch 3 - iter 27/32 - loss 0.44288628 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 01:41:24,462 epoch 3 - iter 30/32 - loss 0.40783815 - samples/sec: 16.52 - lr: 0.020000\n",
-      "2021-09-08 01:41:24,617 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:24,618 EPOCH 3 done: loss 0.4374 - lr 0.0200000\n",
-      "2021-09-08 01:41:24,727 DEV : loss 1.069753885269165 - score 0.25\n",
-      "2021-09-08 01:41:24,728 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:41:24,730 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:24,931 epoch 4 - iter 3/32 - loss 0.06332990 - samples/sec: 15.99 - lr: 0.020000\n",
-      "2021-09-08 01:41:25,095 epoch 4 - iter 6/32 - loss 0.13553993 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 01:41:25,261 epoch 4 - iter 9/32 - loss 0.18551943 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 01:41:25,449 epoch 4 - iter 12/32 - loss 0.25929643 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 01:41:25,619 epoch 4 - iter 15/32 - loss 0.21021906 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:41:25,787 epoch 4 - iter 18/32 - loss 0.18753480 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:41:25,976 epoch 4 - iter 21/32 - loss 0.21549544 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 01:41:26,171 epoch 4 - iter 24/32 - loss 0.20776891 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 01:41:26,337 epoch 4 - iter 27/32 - loss 0.25498384 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:41:26,511 epoch 4 - iter 30/32 - loss 0.25144535 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 01:41:26,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:26,639 EPOCH 4 done: loss 0.3417 - lr 0.0200000\n",
-      "2021-09-08 01:41:26,744 DEV : loss 1.109673023223877 - score 0.5\n",
-      "2021-09-08 01:41:26,745 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:41:26,747 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:27,014 epoch 5 - iter 3/32 - loss 0.26064772 - samples/sec: 12.28 - lr: 0.020000\n",
-      "2021-09-08 01:41:27,210 epoch 5 - iter 6/32 - loss 0.14515486 - samples/sec: 15.33 - lr: 0.020000\n",
-      "2021-09-08 01:41:27,365 epoch 5 - iter 9/32 - loss 0.15054906 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:41:27,539 epoch 5 - iter 12/32 - loss 0.11384413 - samples/sec: 17.34 - lr: 0.020000\n",
-      "2021-09-08 01:41:27,765 epoch 5 - iter 15/32 - loss 0.09990668 - samples/sec: 13.32 - lr: 0.020000\n",
-      "2021-09-08 01:41:27,973 epoch 5 - iter 18/32 - loss 0.08537935 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 01:41:28,141 epoch 5 - iter 21/32 - loss 0.08226788 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:41:28,303 epoch 5 - iter 24/32 - loss 0.10583904 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 01:41:28,472 epoch 5 - iter 27/32 - loss 0.09433962 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 01:41:28,698 epoch 5 - iter 30/32 - loss 0.11612457 - samples/sec: 13.33 - lr: 0.020000\n",
-      "2021-09-08 01:41:28,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:28,851 EPOCH 5 done: loss 0.1102 - lr 0.0200000\n",
-      "2021-09-08 01:41:29,041 DEV : loss 0.7625503540039062 - score 0.75\n",
-      "2021-09-08 01:41:29,042 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:19:24,297 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:24,532 epoch 2 - iter 3/32 - loss 0.89383403 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 21:19:24,742 epoch 2 - iter 6/32 - loss 0.69009075 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 21:19:24,915 epoch 2 - iter 9/32 - loss 0.61010204 - samples/sec: 17.40 - lr: 0.020000\n",
+      "2021-09-21 21:19:25,070 epoch 2 - iter 12/32 - loss 0.89556053 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 21:19:25,251 epoch 2 - iter 15/32 - loss 0.75645519 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 21:19:25,438 epoch 2 - iter 18/32 - loss 0.70482240 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 21:19:25,617 epoch 2 - iter 21/32 - loss 0.65829956 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 21:19:25,787 epoch 2 - iter 24/32 - loss 0.67522676 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 21:19:25,970 epoch 2 - iter 27/32 - loss 0.72999308 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 21:19:26,255 epoch 2 - iter 30/32 - loss 0.71760867 - samples/sec: 10.57 - lr: 0.020000\n",
+      "2021-09-21 21:19:26,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:26,427 EPOCH 2 done: loss 0.7111 - lr 0.0200000\n",
+      "2021-09-21 21:19:26,570 DEV : loss 0.142171710729599 - score 1.0\n",
+      "2021-09-21 21:19:26,571 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:41:38,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:38,698 epoch 6 - iter 3/32 - loss 0.00480340 - samples/sec: 20.58 - lr: 0.020000\n",
-      "2021-09-08 01:41:38,839 epoch 6 - iter 6/32 - loss 0.00409177 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:41:39,021 epoch 6 - iter 9/32 - loss 0.06437796 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 01:41:39,164 epoch 6 - iter 12/32 - loss 0.05052849 - samples/sec: 21.00 - lr: 0.020000\n",
-      "2021-09-08 01:41:39,373 epoch 6 - iter 15/32 - loss 0.06750275 - samples/sec: 14.39 - lr: 0.020000\n",
-      "2021-09-08 01:41:39,510 epoch 6 - iter 18/32 - loss 0.05709284 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 01:41:39,671 epoch 6 - iter 21/32 - loss 0.12581200 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 01:41:39,857 epoch 6 - iter 24/32 - loss 0.11030783 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 01:41:39,998 epoch 6 - iter 27/32 - loss 0.11238150 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:41:40,176 epoch 6 - iter 30/32 - loss 0.10249704 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 01:41:40,291 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:19:30,466 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:30,783 epoch 3 - iter 3/32 - loss 0.12836329 - samples/sec: 10.80 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,079 epoch 3 - iter 6/32 - loss 0.20608242 - samples/sec: 10.16 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,388 epoch 3 - iter 9/32 - loss 0.15169520 - samples/sec: 9.72 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,631 epoch 3 - iter 12/32 - loss 0.11828149 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,881 epoch 3 - iter 15/32 - loss 0.24637499 - samples/sec: 12.02 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,117 epoch 3 - iter 18/32 - loss 0.20711281 - samples/sec: 12.73 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,373 epoch 3 - iter 21/32 - loss 0.19769672 - samples/sec: 11.76 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,684 epoch 3 - iter 24/32 - loss 0.17350993 - samples/sec: 9.66 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,922 epoch 3 - iter 27/32 - loss 0.18146853 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 21:19:33,216 epoch 3 - iter 30/32 - loss 0.23445575 - samples/sec: 10.21 - lr: 0.020000\n",
+      "2021-09-21 21:19:33,380 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:33,380 EPOCH 3 done: loss 0.2618 - lr 0.0200000\n",
+      "2021-09-21 21:19:33,555 DEV : loss 0.0062782843597233295 - score 1.0\n",
+      "2021-09-21 21:19:33,561 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:19:37,630 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:37,831 epoch 4 - iter 3/32 - loss 0.01196566 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 21:19:38,029 epoch 4 - iter 6/32 - loss 0.02068429 - samples/sec: 15.21 - lr: 0.020000\n",
+      "2021-09-21 21:19:38,211 epoch 4 - iter 9/32 - loss 0.01533757 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 21:19:38,444 epoch 4 - iter 12/32 - loss 0.15879152 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 21:19:38,668 epoch 4 - iter 15/32 - loss 0.13036285 - samples/sec: 13.42 - lr: 0.020000\n",
+      "2021-09-21 21:19:38,911 epoch 4 - iter 18/32 - loss 0.14430237 - samples/sec: 12.37 - lr: 0.020000\n",
+      "2021-09-21 21:19:39,127 epoch 4 - iter 21/32 - loss 0.12766957 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 21:19:39,379 epoch 4 - iter 24/32 - loss 0.15200172 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 21:19:39,581 epoch 4 - iter 27/32 - loss 0.13870011 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 21:19:39,743 epoch 4 - iter 30/32 - loss 0.13882547 - samples/sec: 18.55 - lr: 0.020000\n",
+      "2021-09-21 21:19:39,907 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:39,908 EPOCH 4 done: loss 0.1374 - lr 0.0200000\n",
+      "2021-09-21 21:19:40,162 DEV : loss 0.2308214008808136 - score 1.0\n",
+      "2021-09-21 21:19:40,166 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:19:40,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:40,495 epoch 5 - iter 3/32 - loss 0.05045515 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:19:40,744 epoch 5 - iter 6/32 - loss 0.04581426 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 21:19:40,967 epoch 5 - iter 9/32 - loss 0.03410822 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 21:19:41,173 epoch 5 - iter 12/32 - loss 0.35846999 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:19:41,352 epoch 5 - iter 15/32 - loss 0.29374333 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 21:19:41,517 epoch 5 - iter 18/32 - loss 0.24699684 - samples/sec: 18.26 - lr: 0.020000\n",
+      "2021-09-21 21:19:41,692 epoch 5 - iter 21/32 - loss 0.21210578 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:19:41,860 epoch 5 - iter 24/32 - loss 0.22545445 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 21:19:42,034 epoch 5 - iter 27/32 - loss 0.20092307 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 21:19:42,215 epoch 5 - iter 30/32 - loss 0.18110922 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 21:19:42,333 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:41:40,291 EPOCH 6 done: loss 0.1406 - lr 0.0200000\n",
-      "2021-09-08 01:41:40,449 DEV : loss 0.34571486711502075 - score 0.75\n",
-      "2021-09-08 01:41:40,450 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:19:42,334 EPOCH 5 done: loss 0.1698 - lr 0.0200000\n",
+      "2021-09-21 21:19:43,713 DEV : loss 0.0032949820160865784 - score 1.0\n",
+      "2021-09-21 21:19:43,717 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:41:46,347 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:46,588 epoch 7 - iter 3/32 - loss 0.00241450 - samples/sec: 15.73 - lr: 0.020000\n",
-      "2021-09-08 01:41:46,776 epoch 7 - iter 6/32 - loss 0.00356875 - samples/sec: 16.05 - lr: 0.020000\n",
-      "2021-09-08 01:41:46,975 epoch 7 - iter 9/32 - loss 0.02118812 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 01:41:47,155 epoch 7 - iter 12/32 - loss 0.01611065 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:41:47,355 epoch 7 - iter 15/32 - loss 0.01886420 - samples/sec: 15.06 - lr: 0.020000\n",
-      "2021-09-08 01:41:47,591 epoch 7 - iter 18/32 - loss 0.01611516 - samples/sec: 12.73 - lr: 0.020000\n",
-      "2021-09-08 01:41:47,818 epoch 7 - iter 21/32 - loss 0.01426083 - samples/sec: 13.25 - lr: 0.020000\n",
-      "2021-09-08 01:41:48,004 epoch 7 - iter 24/32 - loss 0.01646181 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 01:41:48,192 epoch 7 - iter 27/32 - loss 0.01484350 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 01:41:48,342 epoch 7 - iter 30/32 - loss 0.01354820 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 01:41:48,484 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:48,485 EPOCH 7 done: loss 0.0412 - lr 0.0200000\n",
-      "2021-09-08 01:41:48,583 DEV : loss 0.5383607745170593 - score 0.75\n",
-      "2021-09-08 01:41:48,584 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:41:48,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:48,810 epoch 8 - iter 3/32 - loss 0.27928426 - samples/sec: 14.55 - lr: 0.020000\n",
-      "2021-09-08 01:41:48,968 epoch 8 - iter 6/32 - loss 0.14027246 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:41:49,153 epoch 8 - iter 9/32 - loss 0.09395733 - samples/sec: 16.29 - lr: 0.020000\n",
-      "2021-09-08 01:41:49,317 epoch 8 - iter 12/32 - loss 0.07078815 - samples/sec: 18.33 - lr: 0.020000\n",
-      "2021-09-08 01:41:49,489 epoch 8 - iter 15/32 - loss 0.05699649 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:41:49,650 epoch 8 - iter 18/32 - loss 0.04882603 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:41:49,798 epoch 8 - iter 21/32 - loss 0.04462313 - samples/sec: 20.48 - lr: 0.020000\n",
-      "2021-09-08 01:41:50,004 epoch 8 - iter 24/32 - loss 0.04455438 - samples/sec: 14.58 - lr: 0.020000\n",
-      "2021-09-08 01:41:50,189 epoch 8 - iter 27/32 - loss 0.03984155 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:41:50,356 epoch 8 - iter 30/32 - loss 0.03635229 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:41:50,450 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:50,451 EPOCH 8 done: loss 0.0348 - lr 0.0200000\n",
-      "2021-09-08 01:41:50,648 DEV : loss 0.9646499752998352 - score 0.5\n",
-      "2021-09-08 01:41:50,649 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:41:50,734 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:50,908 epoch 9 - iter 3/32 - loss 0.00159821 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 01:41:51,049 epoch 9 - iter 6/32 - loss 0.00115075 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 01:41:51,254 epoch 9 - iter 9/32 - loss 0.00153024 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 01:41:51,389 epoch 9 - iter 12/32 - loss 0.00180451 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 01:41:51,558 epoch 9 - iter 15/32 - loss 0.00153277 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:41:51,722 epoch 9 - iter 18/32 - loss 0.01591998 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 01:41:51,865 epoch 9 - iter 21/32 - loss 0.01383967 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 01:41:52,043 epoch 9 - iter 24/32 - loss 0.01217301 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 01:41:52,181 epoch 9 - iter 27/32 - loss 0.01094174 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:41:52,361 epoch 9 - iter 30/32 - loss 0.00993956 - samples/sec: 16.72 - lr: 0.020000\n",
-      "2021-09-08 01:41:52,452 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:52,452 EPOCH 9 done: loss 0.0093 - lr 0.0200000\n",
-      "2021-09-08 01:41:52,523 DEV : loss 1.4367717504501343 - score 0.25\n",
-      "2021-09-08 01:41:52,524 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:41:52,526 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:52,761 epoch 10 - iter 3/32 - loss 0.00266028 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,001 epoch 10 - iter 6/32 - loss 0.00287018 - samples/sec: 12.52 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,173 epoch 10 - iter 9/32 - loss 0.00215113 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,362 epoch 10 - iter 12/32 - loss 0.04218474 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,549 epoch 10 - iter 15/32 - loss 0.03383825 - samples/sec: 16.04 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,724 epoch 10 - iter 18/32 - loss 0.02865243 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,931 epoch 10 - iter 21/32 - loss 0.02460917 - samples/sec: 14.57 - lr: 0.020000\n",
-      "2021-09-08 01:41:54,136 epoch 10 - iter 24/32 - loss 0.02160224 - samples/sec: 14.65 - lr: 0.020000\n",
-      "2021-09-08 01:41:54,376 epoch 10 - iter 27/32 - loss 0.01947019 - samples/sec: 12.54 - lr: 0.020000\n",
-      "2021-09-08 01:41:54,555 epoch 10 - iter 30/32 - loss 0.01764841 - samples/sec: 16.77 - lr: 0.020000\n",
-      "2021-09-08 01:41:54,684 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:54,685 EPOCH 10 done: loss 0.0166 - lr 0.0200000\n",
-      "2021-09-08 01:41:54,786 DEV : loss 1.2823927402496338 - score 0.25\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:41:54,786 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:42:00,505 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:00,506 Testing using best model ...\n",
-      "2021-09-08 01:42:00,532 loading file temp/best-model.pt\n",
+      "2021-09-21 21:19:48,112 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:48,390 epoch 6 - iter 3/32 - loss 0.04447123 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 21:19:48,596 epoch 6 - iter 6/32 - loss 0.02680033 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:19:48,821 epoch 6 - iter 9/32 - loss 0.01849799 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:19:49,059 epoch 6 - iter 12/32 - loss 0.01400215 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 21:19:49,250 epoch 6 - iter 15/32 - loss 0.01328786 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 21:19:49,483 epoch 6 - iter 18/32 - loss 0.02106162 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 21:19:49,706 epoch 6 - iter 21/32 - loss 0.01933954 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 21:19:49,923 epoch 6 - iter 24/32 - loss 0.01730345 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 21:19:50,128 epoch 6 - iter 27/32 - loss 0.03991545 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 21:19:50,385 epoch 6 - iter 30/32 - loss 0.09406882 - samples/sec: 11.71 - lr: 0.020000\n",
+      "2021-09-21 21:19:50,530 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:50,531 EPOCH 6 done: loss 0.0883 - lr 0.0200000\n",
+      "2021-09-21 21:19:50,670 DEV : loss 0.23316755890846252 - score 1.0\n",
+      "2021-09-21 21:19:50,671 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:19:50,673 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:50,922 epoch 7 - iter 3/32 - loss 0.02141735 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 21:19:51,149 epoch 7 - iter 6/32 - loss 0.04217076 - samples/sec: 13.27 - lr: 0.020000\n",
+      "2021-09-21 21:19:51,348 epoch 7 - iter 9/32 - loss 0.02882168 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 21:19:51,534 epoch 7 - iter 12/32 - loss 0.02182465 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:19:51,795 epoch 7 - iter 15/32 - loss 0.01763553 - samples/sec: 11.51 - lr: 0.020000\n",
+      "2021-09-21 21:19:52,028 epoch 7 - iter 18/32 - loss 0.04403527 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:19:52,259 epoch 7 - iter 21/32 - loss 0.03791605 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 21:19:52,500 epoch 7 - iter 24/32 - loss 0.03363682 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 21:19:52,776 epoch 7 - iter 27/32 - loss 0.03127557 - samples/sec: 10.89 - lr: 0.020000\n",
+      "2021-09-21 21:19:53,005 epoch 7 - iter 30/32 - loss 0.07935192 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 21:19:53,168 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:53,169 EPOCH 7 done: loss 0.0745 - lr 0.0200000\n",
+      "2021-09-21 21:19:53,314 DEV : loss 0.3341691792011261 - score 0.75\n",
+      "2021-09-21 21:19:53,315 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:19:53,318 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:53,580 epoch 8 - iter 3/32 - loss 0.00256053 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 21:19:53,811 epoch 8 - iter 6/32 - loss 0.00380299 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 21:19:54,071 epoch 8 - iter 9/32 - loss 0.00276689 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 21:19:54,312 epoch 8 - iter 12/32 - loss 0.19989364 - samples/sec: 12.47 - lr: 0.020000\n",
+      "2021-09-21 21:19:54,548 epoch 8 - iter 15/32 - loss 0.16113187 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:19:54,763 epoch 8 - iter 18/32 - loss 0.13447823 - samples/sec: 13.99 - lr: 0.020000\n",
+      "2021-09-21 21:19:54,986 epoch 8 - iter 21/32 - loss 0.11553529 - samples/sec: 13.48 - lr: 0.020000\n",
+      "2021-09-21 21:19:55,216 epoch 8 - iter 24/32 - loss 0.10123910 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:19:55,465 epoch 8 - iter 27/32 - loss 0.09083652 - samples/sec: 12.07 - lr: 0.020000\n",
+      "2021-09-21 21:19:55,698 epoch 8 - iter 30/32 - loss 0.08187866 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 21:19:55,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:55,860 EPOCH 8 done: loss 0.0768 - lr 0.0200000\n",
+      "2021-09-21 21:19:56,009 DEV : loss 0.009588990360498428 - score 1.0\n",
+      "2021-09-21 21:19:56,013 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:19:56,015 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:56,293 epoch 9 - iter 3/32 - loss 0.00124537 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 21:19:56,559 epoch 9 - iter 6/32 - loss 0.00105572 - samples/sec: 11.30 - lr: 0.020000\n",
+      "2021-09-21 21:19:56,781 epoch 9 - iter 9/32 - loss 0.00085980 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:19:57,036 epoch 9 - iter 12/32 - loss 0.00080388 - samples/sec: 11.76 - lr: 0.020000\n",
+      "2021-09-21 21:19:57,260 epoch 9 - iter 15/32 - loss 0.00109635 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 21:19:57,472 epoch 9 - iter 18/32 - loss 0.00094645 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 21:19:57,681 epoch 9 - iter 21/32 - loss 0.00213329 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 21:19:57,889 epoch 9 - iter 24/32 - loss 0.00201135 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 21:19:58,083 epoch 9 - iter 27/32 - loss 0.00201783 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 21:19:58,287 epoch 9 - iter 30/32 - loss 0.00188094 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 21:19:58,422 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:58,423 EPOCH 9 done: loss 0.0443 - lr 0.0200000\n",
+      "2021-09-21 21:19:58,640 DEV : loss 0.009770085103809834 - score 1.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:19:58,643 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:19:58,721 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:58,956 epoch 10 - iter 3/32 - loss 0.00038460 - samples/sec: 15.65 - lr: 0.010000\n",
+      "2021-09-21 21:19:59,151 epoch 10 - iter 6/32 - loss 0.00093143 - samples/sec: 15.38 - lr: 0.010000\n",
+      "2021-09-21 21:19:59,331 epoch 10 - iter 9/32 - loss 0.00080649 - samples/sec: 16.82 - lr: 0.010000\n",
+      "2021-09-21 21:19:59,513 epoch 10 - iter 12/32 - loss 0.00142599 - samples/sec: 16.49 - lr: 0.010000\n",
+      "2021-09-21 21:19:59,711 epoch 10 - iter 15/32 - loss 0.00134155 - samples/sec: 15.20 - lr: 0.010000\n",
+      "2021-09-21 21:19:59,900 epoch 10 - iter 18/32 - loss 0.00129433 - samples/sec: 15.91 - lr: 0.010000\n",
+      "2021-09-21 21:20:00,094 epoch 10 - iter 21/32 - loss 0.00125096 - samples/sec: 15.58 - lr: 0.010000\n",
+      "2021-09-21 21:20:00,272 epoch 10 - iter 24/32 - loss 0.00138313 - samples/sec: 16.92 - lr: 0.010000\n",
+      "2021-09-21 21:20:00,450 epoch 10 - iter 27/32 - loss 0.00125791 - samples/sec: 16.90 - lr: 0.010000\n",
+      "2021-09-21 21:20:00,647 epoch 10 - iter 30/32 - loss 0.00143428 - samples/sec: 15.28 - lr: 0.010000\n",
+      "2021-09-21 21:20:00,776 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:00,777 EPOCH 10 done: loss 0.0017 - lr 0.0100000\n",
+      "2021-09-21 21:20:00,988 DEV : loss 0.10781832039356232 - score 1.0\n",
+      "2021-09-21 21:20:00,989 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:20:14,446 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:14,446 Testing using best model ...\n",
+      "2021-09-21 21:20:14,448 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:42:06,330 \t0.5\n",
-      "2021-09-08 01:42:06,331 \n",
+      "2021-09-21 21:20:19,253 \t0.5\n",
+      "2021-09-21 21:20:19,253 \n",
       "Results:\n",
       "- F-score (micro) 0.5\n",
       "- F-score (macro) 0.3333\n",
@@ -7922,36 +7946,42 @@
       "By class:\n",
       "                                                           precision    recall  f1-score   support\n",
       "\n",
-      "   emotions experienced when not in a state of well-being     0.5000    1.0000    0.6667         1\n",
+      "   emotions experienced when not in a state of well-being     1.0000    0.5000    0.6667         2\n",
       "the optimistic feeling that all is going to turn out well     0.0000    0.0000    0.0000         1\n",
-      "                                 the state of being angry     0.0000    0.0000    0.0000         1\n",
-      "                           the emotion of great happiness     0.5000    1.0000    0.6667         1\n",
+      "                                 the state of being angry     0.5000    1.0000    0.6667         1\n",
+      "                           the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
       "\n",
       "                                                micro avg     0.5000    0.5000    0.5000         4\n",
-      "                                                macro avg     0.2500    0.5000    0.3333         4\n",
-      "                                             weighted avg     0.2500    0.5000    0.3333         4\n",
+      "                                                macro avg     0.3750    0.3750    0.3333         4\n",
+      "                                             weighted avg     0.6250    0.5000    0.5000         4\n",
       "                                              samples avg     0.5000    0.5000    0.5000         4\n",
-      "\n",
-      "2021-09-08 01:42:06,331 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:21,042 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:20:19,254 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:31,637 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:42:25,208 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:20:35,983 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 45700.65it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 35270.95it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:25,210 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 01:42:25,221 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,223 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:20:35,986 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
+      "2021-09-21 21:20:36,122 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,124 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8264,25 +8294,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:25,223 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,224 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 01:42:25,224 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,224 Parameters:\n",
-      "2021-09-08 01:42:25,224  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:42:25,225  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:42:25,225  - patience: \"3\"\n",
-      "2021-09-08 01:42:25,225  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:42:25,226  - max_epochs: \"10\"\n",
-      "2021-09-08 01:42:25,226  - shuffle: \"True\"\n",
-      "2021-09-08 01:42:25,226  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:42:25,226  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:42:25,227 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,227 Model training base path: \"temp\"\n",
-      "2021-09-08 01:42:25,227 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,228 Device: cuda:0\n",
-      "2021-09-08 01:42:25,228 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,228 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:42:25,235 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:20:36,124 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,125 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 21:20:36,125 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,125 Parameters:\n",
+      "2021-09-21 21:20:36,126  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:20:36,126  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:20:36,126  - patience: \"3\"\n",
+      "2021-09-21 21:20:36,127  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:20:36,127  - max_epochs: \"10\"\n",
+      "2021-09-21 21:20:36,127  - shuffle: \"True\"\n",
+      "2021-09-21 21:20:36,128  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:20:36,128  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:20:36,128 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,129 Model training base path: \"temp\"\n",
+      "2021-09-21 21:20:36,129 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,130 Device: cuda:0\n",
+      "2021-09-21 21:20:36,130 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,130 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -8296,172 +8325,173 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:25,495 epoch 1 - iter 3/32 - loss 0.16044668 - samples/sec: 12.65 - lr: 0.020000\n",
-      "2021-09-08 01:42:25,660 epoch 1 - iter 6/32 - loss 0.14499961 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 01:42:25,837 epoch 1 - iter 9/32 - loss 0.38545580 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 01:42:26,029 epoch 1 - iter 12/32 - loss 0.65300549 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 01:42:26,225 epoch 1 - iter 15/32 - loss 0.63095370 - samples/sec: 15.38 - lr: 0.020000\n",
-      "2021-09-08 01:42:26,455 epoch 1 - iter 18/32 - loss 0.74342215 - samples/sec: 13.03 - lr: 0.020000\n",
-      "2021-09-08 01:42:26,714 epoch 1 - iter 21/32 - loss 0.67308049 - samples/sec: 11.61 - lr: 0.020000\n",
-      "2021-09-08 01:42:26,893 epoch 1 - iter 24/32 - loss 0.70127798 - samples/sec: 16.81 - lr: 0.020000\n",
-      "2021-09-08 01:42:27,096 epoch 1 - iter 27/32 - loss 0.75530387 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 01:42:27,291 epoch 1 - iter 30/32 - loss 0.72628456 - samples/sec: 15.43 - lr: 0.020000\n",
-      "2021-09-08 01:42:27,418 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:27,419 EPOCH 1 done: loss 0.6980 - lr 0.0200000\n",
-      "2021-09-08 01:42:27,547 DEV : loss 1.1512081623077393 - score 0.25\n",
-      "2021-09-08 01:42:27,547 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:20:36,300 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,528 epoch 1 - iter 3/32 - loss 0.92070419 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 21:20:36,712 epoch 1 - iter 6/32 - loss 0.53713731 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:20:36,905 epoch 1 - iter 9/32 - loss 0.80102564 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 21:20:37,104 epoch 1 - iter 12/32 - loss 0.73162211 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 21:20:37,411 epoch 1 - iter 15/32 - loss 0.63223438 - samples/sec: 9.81 - lr: 0.020000\n",
+      "2021-09-21 21:20:37,712 epoch 1 - iter 18/32 - loss 0.73982319 - samples/sec: 9.98 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,000 epoch 1 - iter 21/32 - loss 0.64484014 - samples/sec: 10.46 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,291 epoch 1 - iter 24/32 - loss 0.56769123 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,604 epoch 1 - iter 27/32 - loss 0.82678856 - samples/sec: 9.62 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,902 epoch 1 - iter 30/32 - loss 0.83476480 - samples/sec: 10.08 - lr: 0.020000\n",
+      "2021-09-21 21:20:39,089 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:39,090 EPOCH 1 done: loss 0.8125 - lr 0.0200000\n",
+      "2021-09-21 21:20:39,260 DEV : loss 0.4346325397491455 - score 0.5\n",
+      "2021-09-21 21:20:39,261 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:42:33,332 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:33,503 epoch 2 - iter 3/32 - loss 1.27969330 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 01:42:33,701 epoch 2 - iter 6/32 - loss 0.84439598 - samples/sec: 15.18 - lr: 0.020000\n",
-      "2021-09-08 01:42:33,872 epoch 2 - iter 9/32 - loss 0.64624744 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:42:34,016 epoch 2 - iter 12/32 - loss 0.62489253 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 01:42:34,230 epoch 2 - iter 15/32 - loss 0.69781386 - samples/sec: 14.10 - lr: 0.020000\n",
-      "2021-09-08 01:42:34,378 epoch 2 - iter 18/32 - loss 0.65125087 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 01:42:34,561 epoch 2 - iter 21/32 - loss 0.66778910 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:42:34,717 epoch 2 - iter 24/32 - loss 0.67635295 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:42:34,895 epoch 2 - iter 27/32 - loss 0.67586565 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 01:42:35,075 epoch 2 - iter 30/32 - loss 0.74266568 - samples/sec: 16.75 - lr: 0.020000\n",
-      "2021-09-08 01:42:35,172 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:35,173 EPOCH 2 done: loss 0.7124 - lr 0.0200000\n",
-      "2021-09-08 01:42:35,387 DEV : loss 0.6610767841339111 - score 0.5\n",
-      "2021-09-08 01:42:35,388 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:20:44,609 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:44,834 epoch 2 - iter 3/32 - loss 0.78098626 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 21:20:45,083 epoch 2 - iter 6/32 - loss 0.47693399 - samples/sec: 12.07 - lr: 0.020000\n",
+      "2021-09-21 21:20:45,321 epoch 2 - iter 9/32 - loss 0.51903166 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 21:20:45,538 epoch 2 - iter 12/32 - loss 0.42005670 - samples/sec: 13.89 - lr: 0.020000\n",
+      "2021-09-21 21:20:45,780 epoch 2 - iter 15/32 - loss 0.44058804 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 21:20:46,012 epoch 2 - iter 18/32 - loss 0.46659219 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 21:20:46,283 epoch 2 - iter 21/32 - loss 0.46530348 - samples/sec: 11.10 - lr: 0.020000\n",
+      "2021-09-21 21:20:46,527 epoch 2 - iter 24/32 - loss 0.50427100 - samples/sec: 12.31 - lr: 0.020000\n",
+      "2021-09-21 21:20:46,766 epoch 2 - iter 27/32 - loss 0.52269381 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 21:20:46,987 epoch 2 - iter 30/32 - loss 0.49390034 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 21:20:47,158 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:47,159 EPOCH 2 done: loss 0.5220 - lr 0.0200000\n",
+      "2021-09-21 21:20:47,411 DEV : loss 0.5891913175582886 - score 0.5\n",
+      "2021-09-21 21:20:47,415 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:20:47,497 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:47,792 epoch 3 - iter 3/32 - loss 0.34748930 - samples/sec: 12.67 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,031 epoch 3 - iter 6/32 - loss 0.26435554 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,271 epoch 3 - iter 9/32 - loss 0.50563958 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,496 epoch 3 - iter 12/32 - loss 0.48037746 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,734 epoch 3 - iter 15/32 - loss 0.43916369 - samples/sec: 12.63 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,968 epoch 3 - iter 18/32 - loss 0.42263004 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,174 epoch 3 - iter 21/32 - loss 0.41371160 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,394 epoch 3 - iter 24/32 - loss 0.42706723 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,605 epoch 3 - iter 27/32 - loss 0.40549772 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,829 epoch 3 - iter 30/32 - loss 0.36805301 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,955 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:49,956 EPOCH 3 done: loss 0.3465 - lr 0.0200000\n",
+      "2021-09-21 21:20:50,552 DEV : loss 0.523633599281311 - score 1.0\n",
+      "2021-09-21 21:20:50,552 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:42:41,583 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:41,746 epoch 3 - iter 3/32 - loss 0.18681300 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 01:42:41,934 epoch 3 - iter 6/32 - loss 0.41416004 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 01:42:42,090 epoch 3 - iter 9/32 - loss 0.42899625 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 01:42:42,262 epoch 3 - iter 12/32 - loss 0.33146459 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 01:42:42,433 epoch 3 - iter 15/32 - loss 0.45598425 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 01:42:42,581 epoch 3 - iter 18/32 - loss 0.52631778 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 01:42:42,779 epoch 3 - iter 21/32 - loss 0.52767805 - samples/sec: 15.24 - lr: 0.020000\n",
-      "2021-09-08 01:42:42,928 epoch 3 - iter 24/32 - loss 0.52099348 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 01:42:43,109 epoch 3 - iter 27/32 - loss 0.47791339 - samples/sec: 16.70 - lr: 0.020000\n",
-      "2021-09-08 01:42:43,280 epoch 3 - iter 30/32 - loss 0.43512572 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:42:43,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:43,402 EPOCH 3 done: loss 0.4634 - lr 0.0200000\n",
-      "2021-09-08 01:42:43,495 DEV : loss 1.0212266445159912 - score 0.5\n",
-      "2021-09-08 01:42:43,495 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:42:43,498 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:43,661 epoch 4 - iter 3/32 - loss 0.29039867 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 01:42:43,824 epoch 4 - iter 6/32 - loss 0.21148504 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 01:42:43,998 epoch 4 - iter 9/32 - loss 0.38839414 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:42:44,137 epoch 4 - iter 12/32 - loss 0.29865251 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 01:42:44,338 epoch 4 - iter 15/32 - loss 0.37263864 - samples/sec: 14.94 - lr: 0.020000\n",
-      "2021-09-08 01:42:44,479 epoch 4 - iter 18/32 - loss 0.31396422 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 01:42:44,673 epoch 4 - iter 21/32 - loss 0.31166168 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:42:44,831 epoch 4 - iter 24/32 - loss 0.35328800 - samples/sec: 19.02 - lr: 0.020000\n",
-      "2021-09-08 01:42:45,012 epoch 4 - iter 27/32 - loss 0.36329323 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 01:42:45,182 epoch 4 - iter 30/32 - loss 0.35561652 - samples/sec: 17.72 - lr: 0.020000\n",
-      "2021-09-08 01:42:45,277 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:45,278 EPOCH 4 done: loss 0.3629 - lr 0.0200000\n",
-      "2021-09-08 01:42:45,357 DEV : loss 0.6310514807701111 - score 0.75\n",
-      "2021-09-08 01:42:45,357 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:21:00,162 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:00,453 epoch 4 - iter 3/32 - loss 0.83378015 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 21:21:00,697 epoch 4 - iter 6/32 - loss 0.42115517 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 21:21:00,930 epoch 4 - iter 9/32 - loss 0.28429860 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 21:21:01,156 epoch 4 - iter 12/32 - loss 0.21400940 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 21:21:01,387 epoch 4 - iter 15/32 - loss 0.17670974 - samples/sec: 13.03 - lr: 0.020000\n",
+      "2021-09-21 21:21:01,629 epoch 4 - iter 18/32 - loss 0.15562297 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 21:21:01,873 epoch 4 - iter 21/32 - loss 0.17396088 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:21:02,109 epoch 4 - iter 24/32 - loss 0.22910445 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:21:02,345 epoch 4 - iter 27/32 - loss 0.23904353 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 21:21:02,590 epoch 4 - iter 30/32 - loss 0.21629815 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 21:21:02,758 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:02,758 EPOCH 4 done: loss 0.2307 - lr 0.0200000\n",
+      "2021-09-21 21:21:02,937 DEV : loss 0.3726644814014435 - score 1.0\n",
+      "2021-09-21 21:21:02,941 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:42:49,441 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:49,706 epoch 5 - iter 3/32 - loss 0.11229993 - samples/sec: 13.17 - lr: 0.020000\n",
-      "2021-09-08 01:42:49,872 epoch 5 - iter 6/32 - loss 0.50579556 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,076 epoch 5 - iter 9/32 - loss 0.40411663 - samples/sec: 14.76 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,275 epoch 5 - iter 12/32 - loss 0.35191339 - samples/sec: 15.09 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,451 epoch 5 - iter 15/32 - loss 0.28869560 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,686 epoch 5 - iter 18/32 - loss 0.24478836 - samples/sec: 12.85 - lr: 0.020000\n",
-      "2021-09-08 01:42:50,861 epoch 5 - iter 21/32 - loss 0.21406741 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 01:42:51,032 epoch 5 - iter 24/32 - loss 0.19202038 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 01:42:51,208 epoch 5 - iter 27/32 - loss 0.23716078 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 01:42:51,411 epoch 5 - iter 30/32 - loss 0.23649662 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 01:42:51,576 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:51,576 EPOCH 5 done: loss 0.2220 - lr 0.0200000\n",
-      "2021-09-08 01:42:51,710 DEV : loss 0.4493107497692108 - score 0.75\n",
-      "2021-09-08 01:42:51,713 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:43:02,022 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:02,233 epoch 6 - iter 3/32 - loss 0.00765784 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:43:02,456 epoch 6 - iter 6/32 - loss 0.00772858 - samples/sec: 13.49 - lr: 0.020000\n",
-      "2021-09-08 01:43:02,637 epoch 6 - iter 9/32 - loss 0.00725193 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 01:43:02,812 epoch 6 - iter 12/32 - loss 0.00648229 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,016 epoch 6 - iter 15/32 - loss 0.01918585 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,238 epoch 6 - iter 18/32 - loss 0.04128725 - samples/sec: 13.54 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,412 epoch 6 - iter 21/32 - loss 0.03920396 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,587 epoch 6 - iter 24/32 - loss 0.03924686 - samples/sec: 17.19 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,755 epoch 6 - iter 27/32 - loss 0.03853324 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:43:03,953 epoch 6 - iter 30/32 - loss 0.03537006 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 01:43:04,084 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:21:06,822 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:07,104 epoch 5 - iter 3/32 - loss 0.04336572 - samples/sec: 12.47 - lr: 0.020000\n",
+      "2021-09-21 21:21:07,342 epoch 5 - iter 6/32 - loss 0.09039612 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 21:21:07,570 epoch 5 - iter 9/32 - loss 0.06352230 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 21:21:07,829 epoch 5 - iter 12/32 - loss 0.06756294 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:21:08,061 epoch 5 - iter 15/32 - loss 0.05437076 - samples/sec: 12.96 - lr: 0.020000\n",
+      "2021-09-21 21:21:08,267 epoch 5 - iter 18/32 - loss 0.08005907 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 21:21:08,511 epoch 5 - iter 21/32 - loss 0.14118777 - samples/sec: 12.30 - lr: 0.020000\n",
+      "2021-09-21 21:21:08,750 epoch 5 - iter 24/32 - loss 0.12409514 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 21:21:09,006 epoch 5 - iter 27/32 - loss 0.11042501 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 21:21:09,268 epoch 5 - iter 30/32 - loss 0.10348325 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:21:09,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:09,419 EPOCH 5 done: loss 0.1557 - lr 0.0200000\n",
+      "2021-09-21 21:21:09,587 DEV : loss 0.33442869782447815 - score 0.75\n",
+      "2021-09-21 21:21:09,588 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:21:09,590 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:09,855 epoch 6 - iter 3/32 - loss 0.04459483 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 21:21:10,082 epoch 6 - iter 6/32 - loss 0.02432561 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 21:21:10,305 epoch 6 - iter 9/32 - loss 0.01658907 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:21:10,519 epoch 6 - iter 12/32 - loss 0.01314641 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:21:10,792 epoch 6 - iter 15/32 - loss 0.07186284 - samples/sec: 11.01 - lr: 0.020000\n",
+      "2021-09-21 21:21:11,016 epoch 6 - iter 18/32 - loss 0.06169073 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 21:21:11,250 epoch 6 - iter 21/32 - loss 0.05349317 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 21:21:11,609 epoch 6 - iter 24/32 - loss 0.04955323 - samples/sec: 8.39 - lr: 0.020000\n",
+      "2021-09-21 21:21:11,848 epoch 6 - iter 27/32 - loss 0.06009157 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 21:21:12,099 epoch 6 - iter 30/32 - loss 0.05422683 - samples/sec: 11.95 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:04,085 EPOCH 6 done: loss 0.0333 - lr 0.0200000\n",
-      "2021-09-08 01:43:04,241 DEV : loss 0.7274999618530273 - score 0.75\n",
-      "2021-09-08 01:43:04,243 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:43:04,244 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:04,452 epoch 7 - iter 3/32 - loss 0.25449937 - samples/sec: 15.94 - lr: 0.020000\n",
-      "2021-09-08 01:43:04,618 epoch 7 - iter 6/32 - loss 0.13087208 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 01:43:04,799 epoch 7 - iter 9/32 - loss 0.08813624 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 01:43:04,996 epoch 7 - iter 12/32 - loss 0.06717893 - samples/sec: 15.30 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,246 epoch 7 - iter 15/32 - loss 0.05399810 - samples/sec: 12.00 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,428 epoch 7 - iter 18/32 - loss 0.04594115 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,616 epoch 7 - iter 21/32 - loss 0.03965716 - samples/sec: 16.01 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,773 epoch 7 - iter 24/32 - loss 0.03550288 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 01:43:05,966 epoch 7 - iter 27/32 - loss 0.03179173 - samples/sec: 15.57 - lr: 0.020000\n",
-      "2021-09-08 01:43:06,143 epoch 7 - iter 30/32 - loss 0.02870252 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 01:43:06,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:06,302 EPOCH 7 done: loss 0.0271 - lr 0.0200000\n",
-      "2021-09-08 01:43:06,415 DEV : loss 0.8284226655960083 - score 0.75\n",
-      "2021-09-08 01:43:06,415 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:43:06,417 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:06,614 epoch 8 - iter 3/32 - loss 0.00092583 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 01:43:06,826 epoch 8 - iter 6/32 - loss 0.00200585 - samples/sec: 14.21 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,004 epoch 8 - iter 9/32 - loss 0.00154059 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,191 epoch 8 - iter 12/32 - loss 0.00147282 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,373 epoch 8 - iter 15/32 - loss 0.00242494 - samples/sec: 16.53 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,557 epoch 8 - iter 18/32 - loss 0.00266406 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,775 epoch 8 - iter 21/32 - loss 0.00880955 - samples/sec: 13.85 - lr: 0.020000\n",
-      "2021-09-08 01:43:07,975 epoch 8 - iter 24/32 - loss 0.01876813 - samples/sec: 15.06 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,161 epoch 8 - iter 27/32 - loss 0.01685902 - samples/sec: 16.15 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,361 epoch 8 - iter 30/32 - loss 0.01544952 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,514 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,514 EPOCH 8 done: loss 0.0145 - lr 0.0200000\n",
-      "2021-09-08 01:43:08,630 DEV : loss 0.8485546112060547 - score 0.75\n",
-      "2021-09-08 01:43:08,631 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:43:08,633 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,832 epoch 9 - iter 3/32 - loss 0.23461427 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,037 epoch 9 - iter 6/32 - loss 0.11753907 - samples/sec: 14.68 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,273 epoch 9 - iter 9/32 - loss 0.07851171 - samples/sec: 12.76 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,450 epoch 9 - iter 12/32 - loss 0.06003880 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,632 epoch 9 - iter 15/32 - loss 0.04858840 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,803 epoch 9 - iter 18/32 - loss 0.04105302 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,988 epoch 9 - iter 21/32 - loss 0.03530037 - samples/sec: 16.26 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,184 epoch 9 - iter 24/32 - loss 0.03141698 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,381 epoch 9 - iter 27/32 - loss 0.02804052 - samples/sec: 15.23 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,577 epoch 9 - iter 30/32 - loss 0.02529741 - samples/sec: 15.38 - lr: 0.020000\n",
-      "2021-09-08 01:43:10,687 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:10,688 EPOCH 9 done: loss 0.0237 - lr 0.0200000\n",
-      "2021-09-08 01:43:10,903 DEV : loss 0.7882184982299805 - score 0.75\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:43:10,907 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:43:11,006 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:11,269 epoch 10 - iter 3/32 - loss 0.00016063 - samples/sec: 14.83 - lr: 0.010000\n",
-      "2021-09-08 01:43:11,451 epoch 10 - iter 6/32 - loss 0.00022429 - samples/sec: 16.56 - lr: 0.010000\n",
-      "2021-09-08 01:43:11,647 epoch 10 - iter 9/32 - loss 0.00059116 - samples/sec: 15.37 - lr: 0.010000\n",
-      "2021-09-08 01:43:11,845 epoch 10 - iter 12/32 - loss 0.00102515 - samples/sec: 15.17 - lr: 0.010000\n",
-      "2021-09-08 01:43:12,076 epoch 10 - iter 15/32 - loss 0.00099383 - samples/sec: 13.05 - lr: 0.010000\n",
-      "2021-09-08 01:43:12,259 epoch 10 - iter 18/32 - loss 0.00087004 - samples/sec: 16.39 - lr: 0.010000\n",
-      "2021-09-08 01:43:12,446 epoch 10 - iter 21/32 - loss 0.00082749 - samples/sec: 16.15 - lr: 0.010000\n",
-      "2021-09-08 01:43:12,616 epoch 10 - iter 24/32 - loss 0.00081412 - samples/sec: 17.68 - lr: 0.010000\n",
-      "2021-09-08 01:43:12,851 epoch 10 - iter 27/32 - loss 0.00083128 - samples/sec: 12.80 - lr: 0.010000\n",
-      "2021-09-08 01:43:13,042 epoch 10 - iter 30/32 - loss 0.00078681 - samples/sec: 15.78 - lr: 0.010000\n",
-      "2021-09-08 01:43:13,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:13,151 EPOCH 10 done: loss 0.0008 - lr 0.0100000\n",
-      "2021-09-08 01:43:13,395 DEV : loss 0.7972137928009033 - score 0.75\n",
-      "2021-09-08 01:43:13,397 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:43:21,807 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:21,807 Testing using best model ...\n",
-      "2021-09-08 01:43:21,809 loading file temp/best-model.pt\n",
+      "2021-09-21 21:21:12,243 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:12,244 EPOCH 6 done: loss 0.0511 - lr 0.0200000\n",
+      "2021-09-21 21:21:12,392 DEV : loss 0.1009187400341034 - score 1.0\n",
+      "2021-09-21 21:21:12,394 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:21:16,532 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:16,783 epoch 7 - iter 3/32 - loss 0.00045227 - samples/sec: 13.24 - lr: 0.020000\n",
+      "2021-09-21 21:21:17,033 epoch 7 - iter 6/32 - loss 0.00216339 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 21:21:17,242 epoch 7 - iter 9/32 - loss 0.00236644 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 21:21:17,482 epoch 7 - iter 12/32 - loss 0.00682820 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 21:21:17,708 epoch 7 - iter 15/32 - loss 0.00822227 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 21:21:17,952 epoch 7 - iter 18/32 - loss 0.00724018 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 21:21:18,208 epoch 7 - iter 21/32 - loss 0.00676188 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 21:21:18,464 epoch 7 - iter 24/32 - loss 0.02755140 - samples/sec: 11.75 - lr: 0.020000\n",
+      "2021-09-21 21:21:18,718 epoch 7 - iter 27/32 - loss 0.02881951 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 21:21:18,958 epoch 7 - iter 30/32 - loss 0.02603229 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 21:21:19,097 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:19,097 EPOCH 7 done: loss 0.0484 - lr 0.0200000\n",
+      "2021-09-21 21:21:19,239 DEV : loss 0.04434935003519058 - score 1.0\n",
+      "2021-09-21 21:21:19,243 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:21:23,474 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:23,663 epoch 8 - iter 3/32 - loss 0.00159381 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:21:23,835 epoch 8 - iter 6/32 - loss 0.07582511 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 21:21:24,001 epoch 8 - iter 9/32 - loss 0.05142507 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 21:21:24,173 epoch 8 - iter 12/32 - loss 0.03868132 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 21:21:24,364 epoch 8 - iter 15/32 - loss 0.03158149 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 21:21:24,534 epoch 8 - iter 18/32 - loss 0.02668415 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 21:21:24,708 epoch 8 - iter 21/32 - loss 0.02329107 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 21:21:24,883 epoch 8 - iter 24/32 - loss 0.02098680 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 21:21:25,050 epoch 8 - iter 27/32 - loss 0.01867457 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 21:21:25,223 epoch 8 - iter 30/32 - loss 0.01748495 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 21:21:25,337 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:25,337 EPOCH 8 done: loss 0.0165 - lr 0.0200000\n",
+      "2021-09-21 21:21:25,817 DEV : loss 0.5672041773796082 - score 1.0\n",
+      "2021-09-21 21:21:25,818 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:21:25,836 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:26,100 epoch 9 - iter 3/32 - loss 0.00049305 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 21:21:26,351 epoch 9 - iter 6/32 - loss 0.00060062 - samples/sec: 11.95 - lr: 0.020000\n",
+      "2021-09-21 21:21:26,576 epoch 9 - iter 9/32 - loss 0.00057328 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:21:26,785 epoch 9 - iter 12/32 - loss 0.00050605 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 21:21:26,992 epoch 9 - iter 15/32 - loss 0.00064597 - samples/sec: 14.50 - lr: 0.020000\n",
+      "2021-09-21 21:21:27,234 epoch 9 - iter 18/32 - loss 0.00059750 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 21:21:27,468 epoch 9 - iter 21/32 - loss 0.00053530 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 21:21:27,678 epoch 9 - iter 24/32 - loss 0.00054858 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 21:21:27,921 epoch 9 - iter 27/32 - loss 0.00049558 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 21:21:28,146 epoch 9 - iter 30/32 - loss 0.00054677 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 21:21:28,290 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:28,290 EPOCH 9 done: loss 0.0005 - lr 0.0200000\n",
+      "2021-09-21 21:21:28,465 DEV : loss 0.5938103795051575 - score 1.0\n",
+      "2021-09-21 21:21:28,467 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:21:28,469 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:28,756 epoch 10 - iter 3/32 - loss 0.00010954 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 21:21:29,002 epoch 10 - iter 6/32 - loss 0.00107817 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 21:21:29,238 epoch 10 - iter 9/32 - loss 0.00094344 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:21:29,464 epoch 10 - iter 12/32 - loss 0.00075889 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 21:21:29,707 epoch 10 - iter 15/32 - loss 0.00127260 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 21:21:29,928 epoch 10 - iter 18/32 - loss 0.00116840 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:21:30,127 epoch 10 - iter 21/32 - loss 0.00109104 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:21:30,308 epoch 10 - iter 24/32 - loss 0.00107506 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 21:21:30,463 epoch 10 - iter 27/32 - loss 0.00100442 - samples/sec: 19.40 - lr: 0.020000\n",
+      "2021-09-21 21:21:30,620 epoch 10 - iter 30/32 - loss 0.00093121 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 21:21:30,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:30,718 EPOCH 10 done: loss 0.0009 - lr 0.0200000\n",
+      "2021-09-21 21:21:30,806 DEV : loss 0.6275250315666199 - score 1.0\n",
+      "2021-09-21 21:21:30,807 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:21:37,592 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:37,592 Testing using best model ...\n",
+      "2021-09-21 21:21:37,594 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:43:26,802 \t0.75\n",
-      "2021-09-08 01:43:26,802 \n",
+      "2021-09-21 21:21:43,354 \t0.75\n",
+      "2021-09-21 21:21:43,355 \n",
       "Results:\n",
       "- F-score (micro) 0.75\n",
       "- F-score (macro) 0.6667\n",
@@ -8471,17 +8501,17 @@
       "                                                           precision    recall  f1-score   support\n",
       "\n",
       "   emotions experienced when not in a state of well-being     1.0000    1.0000    1.0000         1\n",
-      "the optimistic feeling that all is going to turn out well     0.5000    1.0000    0.6667         1\n",
-      "                                 the state of being angry     1.0000    1.0000    1.0000         1\n",
-      "                           the emotion of great happiness     0.0000    0.0000    0.0000         1\n",
+      "the optimistic feeling that all is going to turn out well     0.0000    0.0000    0.0000         1\n",
+      "                                 the state of being angry     0.5000    1.0000    0.6667         1\n",
+      "                           the emotion of great happiness     1.0000    1.0000    1.0000         1\n",
       "\n",
       "                                                micro avg     0.7500    0.7500    0.7500         4\n",
       "                                                macro avg     0.6250    0.7500    0.6667         4\n",
       "                                             weighted avg     0.6250    0.7500    0.6667         4\n",
       "                                              samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 01:43:26,803 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.6690140845070423\n"
+      "2021-09-21 21:21:43,355 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.6647887323943662\n"
      ]
     }
    ],
@@ -8555,11 +8585,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "de9de0d9",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.6672535211267606, 0.6813380281690141, 0.6390845070422535, 0.6936619718309859, 0.6426056338028169]\n",
+      "0.0212904457152549\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/fewshot/sentiment_amazon_fewshot.ipynb b/fewshot/sentiment_amazon_fewshot.ipynb
index 49ce581..ed2ca1e 100644
--- a/fewshot/sentiment_amazon_fewshot.ipynb
+++ b/fewshot/sentiment_amazon_fewshot.ipynb
@@ -37,7 +37,7 @@
    "source": [
     "# GRAKA auswählen\n",
     "import flair, torch\n",
-    "flair.device = torch.device('cuda:1') "
+    "flair.device = torch.device('cuda:0') "
    ]
   },
   {
@@ -70,25 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:02,142 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:02:46,293 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:33:14,052 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:02:53,453 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 48189.88it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 42831.39it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:14,054 [b'positive', b'neutral', b'negative']\n",
-      "2021-09-08 01:33:14,058 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,060 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:02:53,457 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 21:02:53,639 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:53,641 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -401,26 +401,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:14,060 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,060 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:33:14,061 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,061 Parameters:\n",
-      "2021-09-08 01:33:14,061  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:33:14,061  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:33:14,062  - patience: \"3\"\n",
-      "2021-09-08 01:33:14,062  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:33:14,062  - max_epochs: \"10\"\n",
-      "2021-09-08 01:33:14,063  - shuffle: \"True\"\n",
-      "2021-09-08 01:33:14,063  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:33:14,063  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:33:14,063 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,064 Model training base path: \"None1\"\n",
-      "2021-09-08 01:33:14,064 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,064 Device: cuda:1\n",
-      "2021-09-08 01:33:14,065 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,065 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:33:14,081 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:14,203 epoch 1 - iter 2/24 - loss 0.63072237 - samples/sec: 19.19 - lr: 0.020000\n"
+      "2021-09-21 21:02:53,642 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:53,642 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:02:53,643 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:53,643 Parameters:\n",
+      "2021-09-21 21:02:53,643  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:02:53,643  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:02:53,644  - patience: \"3\"\n",
+      "2021-09-21 21:02:53,644  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:02:53,644  - max_epochs: \"10\"\n",
+      "2021-09-21 21:02:53,644  - shuffle: \"True\"\n",
+      "2021-09-21 21:02:53,645  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:02:53,645  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:02:53,645 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:53,646 Model training base path: \"None1\"\n",
+      "2021-09-21 21:02:53,646 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:53,646 Device: cuda:0\n",
+      "2021-09-21 21:02:53,646 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:53,647 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -434,247 +432,235 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:14,304 epoch 1 - iter 4/24 - loss 0.62915154 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,408 epoch 1 - iter 6/24 - loss 0.63702586 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,511 epoch 1 - iter 8/24 - loss 0.63858473 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,616 epoch 1 - iter 10/24 - loss 0.63758439 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,720 epoch 1 - iter 12/24 - loss 0.63682501 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,828 epoch 1 - iter 14/24 - loss 0.63933844 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 01:33:14,926 epoch 1 - iter 16/24 - loss 0.63908418 - samples/sec: 20.47 - lr: 0.020000\n",
-      "2021-09-08 01:33:15,033 epoch 1 - iter 18/24 - loss 0.63736843 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:33:15,139 epoch 1 - iter 20/24 - loss 0.63952090 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:33:15,236 epoch 1 - iter 22/24 - loss 0.63885525 - samples/sec: 20.60 - lr: 0.020000\n",
-      "2021-09-08 01:33:15,339 epoch 1 - iter 24/24 - loss 0.63840431 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 01:33:15,340 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:15,340 EPOCH 1 done: loss 0.6384 - lr 0.0200000\n",
-      "2021-09-08 01:33:15,402 DEV : loss 0.6366134285926819 - score 0.6667\n",
-      "2021-09-08 01:33:15,402 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:02:53,809 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:53,921 epoch 1 - iter 2/24 - loss 0.63023683 - samples/sec: 21.32 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,010 epoch 1 - iter 4/24 - loss 0.63358353 - samples/sec: 22.63 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,097 epoch 1 - iter 6/24 - loss 0.63387586 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,190 epoch 1 - iter 8/24 - loss 0.63390695 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,279 epoch 1 - iter 10/24 - loss 0.63053013 - samples/sec: 22.63 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,367 epoch 1 - iter 12/24 - loss 0.63239721 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,454 epoch 1 - iter 14/24 - loss 0.63461318 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,542 epoch 1 - iter 16/24 - loss 0.63389702 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,631 epoch 1 - iter 18/24 - loss 0.63327662 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,719 epoch 1 - iter 20/24 - loss 0.63275951 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,807 epoch 1 - iter 22/24 - loss 0.63356390 - samples/sec: 22.60 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,895 epoch 1 - iter 24/24 - loss 0.63450030 - samples/sec: 22.85 - lr: 0.020000\n",
+      "2021-09-21 21:02:54,896 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:02:54,897 EPOCH 1 done: loss 0.6345 - lr 0.0200000\n",
+      "2021-09-21 21:02:55,047 DEV : loss 0.6365159749984741 - score 0.0\n",
+      "2021-09-21 21:02:55,048 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:33:20,012 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:20,114 epoch 2 - iter 2/24 - loss 0.63694125 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,204 epoch 2 - iter 4/24 - loss 0.63814789 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,292 epoch 2 - iter 6/24 - loss 0.63952736 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,378 epoch 2 - iter 8/24 - loss 0.63708392 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,465 epoch 2 - iter 10/24 - loss 0.63818955 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,551 epoch 2 - iter 12/24 - loss 0.63753654 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,637 epoch 2 - iter 14/24 - loss 0.63659355 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,724 epoch 2 - iter 16/24 - loss 0.63492760 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,811 epoch 2 - iter 18/24 - loss 0.63530782 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,897 epoch 2 - iter 20/24 - loss 0.63571248 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:33:20,987 epoch 2 - iter 22/24 - loss 0.63462693 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,073 epoch 2 - iter 24/24 - loss 0.63632794 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,074 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:21,075 EPOCH 2 done: loss 0.6363 - lr 0.0200000\n",
-      "2021-09-08 01:33:21,226 DEV : loss 0.6368245482444763 - score 0.3333\n",
-      "2021-09-08 01:33:21,227 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:33:21,304 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:21,404 epoch 3 - iter 2/24 - loss 0.64739212 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,491 epoch 3 - iter 4/24 - loss 0.63794404 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,578 epoch 3 - iter 6/24 - loss 0.63483612 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,665 epoch 3 - iter 8/24 - loss 0.64175703 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,752 epoch 3 - iter 10/24 - loss 0.63790710 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,839 epoch 3 - iter 12/24 - loss 0.63774526 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 01:33:21,925 epoch 3 - iter 14/24 - loss 0.63925823 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 01:33:22,013 epoch 3 - iter 16/24 - loss 0.64043570 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:33:22,102 epoch 3 - iter 18/24 - loss 0.63956753 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 01:33:22,188 epoch 3 - iter 20/24 - loss 0.63864848 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 01:33:22,275 epoch 3 - iter 22/24 - loss 0.63788105 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:33:22,360 epoch 3 - iter 24/24 - loss 0.63740246 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 01:33:22,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:22,362 EPOCH 3 done: loss 0.6374 - lr 0.0200000\n",
-      "2021-09-08 01:33:22,516 DEV : loss 0.6365916132926941 - score 0.6667\n",
-      "2021-09-08 01:33:22,517 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:33:27,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:27,401 epoch 4 - iter 2/24 - loss 0.64037380 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:33:27,487 epoch 4 - iter 4/24 - loss 0.64258353 - samples/sec: 23.31 - lr: 0.020000\n",
-      "2021-09-08 01:33:27,575 epoch 4 - iter 6/24 - loss 0.63722467 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 01:33:27,664 epoch 4 - iter 8/24 - loss 0.63574511 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:33:27,751 epoch 4 - iter 10/24 - loss 0.63821163 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:33:27,838 epoch 4 - iter 12/24 - loss 0.63770412 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 01:33:27,924 epoch 4 - iter 14/24 - loss 0.63961327 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 01:33:28,011 epoch 4 - iter 16/24 - loss 0.63769460 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 01:33:28,097 epoch 4 - iter 18/24 - loss 0.63752698 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 01:33:28,185 epoch 4 - iter 20/24 - loss 0.63794736 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:33:28,274 epoch 4 - iter 22/24 - loss 0.63837029 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 01:33:28,361 epoch 4 - iter 24/24 - loss 0.63868386 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 01:33:28,362 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:28,363 EPOCH 4 done: loss 0.6387 - lr 0.0200000\n",
-      "2021-09-08 01:33:28,413 DEV : loss 0.6365222930908203 - score 0.6667\n",
-      "2021-09-08 01:33:28,414 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:03:07,758 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:07,884 epoch 2 - iter 2/24 - loss 0.62976339 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:03:07,990 epoch 2 - iter 4/24 - loss 0.62667668 - samples/sec: 19.15 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,099 epoch 2 - iter 6/24 - loss 0.63243572 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,210 epoch 2 - iter 8/24 - loss 0.63623089 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,318 epoch 2 - iter 10/24 - loss 0.63723546 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,424 epoch 2 - iter 12/24 - loss 0.64148920 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,546 epoch 2 - iter 14/24 - loss 0.64018517 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,664 epoch 2 - iter 16/24 - loss 0.63919608 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,771 epoch 2 - iter 18/24 - loss 0.63956272 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,876 epoch 2 - iter 20/24 - loss 0.63708956 - samples/sec: 19.08 - lr: 0.020000\n",
+      "2021-09-21 21:03:08,982 epoch 2 - iter 22/24 - loss 0.63801558 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,090 epoch 2 - iter 24/24 - loss 0.63745594 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,091 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:09,092 EPOCH 2 done: loss 0.6375 - lr 0.0200000\n",
+      "2021-09-21 21:03:09,293 DEV : loss 0.6368047595024109 - score 0.0\n",
+      "2021-09-21 21:03:09,294 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:03:09,410 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:09,518 epoch 3 - iter 2/24 - loss 0.62473804 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,616 epoch 3 - iter 4/24 - loss 0.62863581 - samples/sec: 20.68 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,704 epoch 3 - iter 6/24 - loss 0.62663293 - samples/sec: 22.81 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,800 epoch 3 - iter 8/24 - loss 0.62682524 - samples/sec: 20.94 - lr: 0.020000\n",
+      "2021-09-21 21:03:09,905 epoch 3 - iter 10/24 - loss 0.63266144 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,013 epoch 3 - iter 12/24 - loss 0.63495143 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,115 epoch 3 - iter 14/24 - loss 0.63591784 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,216 epoch 3 - iter 16/24 - loss 0.63604524 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,320 epoch 3 - iter 18/24 - loss 0.63466565 - samples/sec: 19.36 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,422 epoch 3 - iter 20/24 - loss 0.63569235 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,528 epoch 3 - iter 22/24 - loss 0.63490905 - samples/sec: 18.92 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,628 epoch 3 - iter 24/24 - loss 0.63459532 - samples/sec: 20.27 - lr: 0.020000\n",
+      "2021-09-21 21:03:10,629 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:10,629 EPOCH 3 done: loss 0.6346 - lr 0.0200000\n",
+      "2021-09-21 21:03:10,871 DEV : loss 0.6365682482719421 - score 0.0\n",
+      "2021-09-21 21:03:10,872 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:03:11,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:11,136 epoch 4 - iter 2/24 - loss 0.63327861 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,243 epoch 4 - iter 4/24 - loss 0.62613297 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,350 epoch 4 - iter 6/24 - loss 0.62840412 - samples/sec: 18.87 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,446 epoch 4 - iter 8/24 - loss 0.63342286 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,533 epoch 4 - iter 10/24 - loss 0.63359001 - samples/sec: 23.01 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,633 epoch 4 - iter 12/24 - loss 0.63536980 - samples/sec: 20.04 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,739 epoch 4 - iter 14/24 - loss 0.63621064 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,845 epoch 4 - iter 16/24 - loss 0.63788027 - samples/sec: 18.85 - lr: 0.020000\n",
+      "2021-09-21 21:03:11,950 epoch 4 - iter 18/24 - loss 0.63742050 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,056 epoch 4 - iter 20/24 - loss 0.63770234 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,157 epoch 4 - iter 22/24 - loss 0.63833032 - samples/sec: 19.75 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,265 epoch 4 - iter 24/24 - loss 0.63813659 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 21:03:12,266 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:12,267 EPOCH 4 done: loss 0.6381 - lr 0.0200000\n",
+      "2021-09-21 21:03:14,694 DEV : loss 0.6365405917167664 - score 0.0\n",
+      "2021-09-21 21:03:14,695 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:03:14,781 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:14,897 epoch 5 - iter 2/24 - loss 0.63504538 - samples/sec: 19.54 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,007 epoch 5 - iter 4/24 - loss 0.63146472 - samples/sec: 18.14 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,119 epoch 5 - iter 6/24 - loss 0.62914765 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,222 epoch 5 - iter 8/24 - loss 0.63455805 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,330 epoch 5 - iter 10/24 - loss 0.63622191 - samples/sec: 18.56 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,443 epoch 5 - iter 12/24 - loss 0.63419649 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,555 epoch 5 - iter 14/24 - loss 0.63539371 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,663 epoch 5 - iter 16/24 - loss 0.63577992 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,773 epoch 5 - iter 18/24 - loss 0.63754267 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 21:03:15,878 epoch 5 - iter 20/24 - loss 0.63721880 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,001 epoch 5 - iter 22/24 - loss 0.63642628 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,108 epoch 5 - iter 24/24 - loss 0.63632725 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 21:03:16,109 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:16,109 EPOCH 5 done: loss 0.6363 - lr 0.0200000\n",
+      "2021-09-21 21:03:16,173 DEV : loss 0.6366275548934937 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:03:16,174 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:03:16,176 ----------------------------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:03:16,302 epoch 6 - iter 2/24 - loss 0.66777784 - samples/sec: 18.90 - lr: 0.010000\n",
+      "2021-09-21 21:03:16,404 epoch 6 - iter 4/24 - loss 0.65210155 - samples/sec: 19.58 - lr: 0.010000\n",
+      "2021-09-21 21:03:16,508 epoch 6 - iter 6/24 - loss 0.64879556 - samples/sec: 19.46 - lr: 0.010000\n",
+      "2021-09-21 21:03:16,607 epoch 6 - iter 8/24 - loss 0.64313738 - samples/sec: 20.21 - lr: 0.010000\n",
+      "2021-09-21 21:03:16,695 epoch 6 - iter 10/24 - loss 0.64633252 - samples/sec: 22.80 - lr: 0.010000\n",
+      "2021-09-21 21:03:16,809 epoch 6 - iter 12/24 - loss 0.64707173 - samples/sec: 17.70 - lr: 0.010000\n",
+      "2021-09-21 21:03:16,915 epoch 6 - iter 14/24 - loss 0.64570640 - samples/sec: 19.51 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,009 epoch 6 - iter 16/24 - loss 0.64495924 - samples/sec: 21.45 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,104 epoch 6 - iter 18/24 - loss 0.64369418 - samples/sec: 21.23 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,197 epoch 6 - iter 20/24 - loss 0.64346866 - samples/sec: 21.56 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,298 epoch 6 - iter 22/24 - loss 0.64251187 - samples/sec: 19.82 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,391 epoch 6 - iter 24/24 - loss 0.64264792 - samples/sec: 21.77 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,392 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:17,392 EPOCH 6 done: loss 0.6426 - lr 0.0100000\n",
+      "2021-09-21 21:03:17,572 DEV : loss 0.6365600824356079 - score 0.0\n",
+      "2021-09-21 21:03:17,574 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:03:17,686 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:17,795 epoch 7 - iter 2/24 - loss 0.63898143 - samples/sec: 20.93 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,888 epoch 7 - iter 4/24 - loss 0.63862777 - samples/sec: 21.65 - lr: 0.010000\n",
+      "2021-09-21 21:03:17,981 epoch 7 - iter 6/24 - loss 0.64375120 - samples/sec: 21.61 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,073 epoch 7 - iter 8/24 - loss 0.63838166 - samples/sec: 21.90 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,165 epoch 7 - iter 10/24 - loss 0.63931587 - samples/sec: 21.78 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,257 epoch 7 - iter 12/24 - loss 0.64063451 - samples/sec: 21.97 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,350 epoch 7 - iter 14/24 - loss 0.64297834 - samples/sec: 21.59 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,444 epoch 7 - iter 16/24 - loss 0.63965945 - samples/sec: 21.26 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,536 epoch 7 - iter 18/24 - loss 0.64218660 - samples/sec: 21.78 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,629 epoch 7 - iter 20/24 - loss 0.64390398 - samples/sec: 21.66 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,722 epoch 7 - iter 22/24 - loss 0.64287643 - samples/sec: 21.71 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,815 epoch 7 - iter 24/24 - loss 0.64251639 - samples/sec: 21.53 - lr: 0.010000\n",
+      "2021-09-21 21:03:18,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:18,817 EPOCH 7 done: loss 0.6425 - lr 0.0100000\n",
+      "2021-09-21 21:03:18,988 DEV : loss 0.6365141868591309 - score 0.0\n",
+      "2021-09-21 21:03:18,988 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:33:32,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:32,385 epoch 5 - iter 2/24 - loss 0.62749010 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,486 epoch 5 - iter 4/24 - loss 0.64468105 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,587 epoch 5 - iter 6/24 - loss 0.64384915 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,689 epoch 5 - iter 8/24 - loss 0.64161631 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,795 epoch 5 - iter 10/24 - loss 0.64111645 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,895 epoch 5 - iter 12/24 - loss 0.63971109 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 01:33:32,996 epoch 5 - iter 14/24 - loss 0.63946312 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,114 epoch 5 - iter 16/24 - loss 0.64021632 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,222 epoch 5 - iter 18/24 - loss 0.63920016 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,315 epoch 5 - iter 20/24 - loss 0.63913358 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,415 epoch 5 - iter 22/24 - loss 0.63744837 - samples/sec: 20.27 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,515 epoch 5 - iter 24/24 - loss 0.63684161 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 01:33:33,516 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:33,516 EPOCH 5 done: loss 0.6368 - lr 0.0200000\n",
-      "2021-09-08 01:33:33,583 DEV : loss 0.6365141272544861 - score 0.6667\n",
-      "2021-09-08 01:33:33,584 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:03:29,425 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:29,547 epoch 8 - iter 2/24 - loss 0.63771302 - samples/sec: 20.21 - lr: 0.010000\n",
+      "2021-09-21 21:03:29,658 epoch 8 - iter 4/24 - loss 0.62837492 - samples/sec: 18.16 - lr: 0.010000\n",
+      "2021-09-21 21:03:29,765 epoch 8 - iter 6/24 - loss 0.63061365 - samples/sec: 18.77 - lr: 0.010000\n",
+      "2021-09-21 21:03:29,880 epoch 8 - iter 8/24 - loss 0.63451485 - samples/sec: 17.47 - lr: 0.010000\n",
+      "2021-09-21 21:03:29,996 epoch 8 - iter 10/24 - loss 0.63238292 - samples/sec: 17.34 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,094 epoch 8 - iter 12/24 - loss 0.63104760 - samples/sec: 20.51 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,196 epoch 8 - iter 14/24 - loss 0.63441867 - samples/sec: 19.53 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,298 epoch 8 - iter 16/24 - loss 0.63517731 - samples/sec: 19.83 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,400 epoch 8 - iter 18/24 - loss 0.63439982 - samples/sec: 19.61 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,497 epoch 8 - iter 20/24 - loss 0.63324169 - samples/sec: 20.87 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,600 epoch 8 - iter 22/24 - loss 0.63201100 - samples/sec: 19.44 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,696 epoch 8 - iter 24/24 - loss 0.63115078 - samples/sec: 20.90 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,697 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:30,697 EPOCH 8 done: loss 0.6312 - lr 0.0100000\n",
+      "2021-09-21 21:03:30,754 DEV : loss 0.6365143060684204 - score 0.0\n",
+      "2021-09-21 21:03:30,755 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:03:30,757 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:30,876 epoch 9 - iter 2/24 - loss 0.63309881 - samples/sec: 19.03 - lr: 0.010000\n",
+      "2021-09-21 21:03:30,994 epoch 9 - iter 4/24 - loss 0.63115394 - samples/sec: 17.11 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,099 epoch 9 - iter 6/24 - loss 0.63868872 - samples/sec: 19.00 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,200 epoch 9 - iter 8/24 - loss 0.63462084 - samples/sec: 20.02 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,312 epoch 9 - iter 10/24 - loss 0.63199363 - samples/sec: 17.93 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,410 epoch 9 - iter 12/24 - loss 0.63491597 - samples/sec: 20.63 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,508 epoch 9 - iter 14/24 - loss 0.63436555 - samples/sec: 20.34 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,614 epoch 9 - iter 16/24 - loss 0.63317959 - samples/sec: 19.11 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,718 epoch 9 - iter 18/24 - loss 0.63210118 - samples/sec: 19.36 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,819 epoch 9 - iter 20/24 - loss 0.63027721 - samples/sec: 19.84 - lr: 0.010000\n",
+      "2021-09-21 21:03:31,930 epoch 9 - iter 22/24 - loss 0.62888471 - samples/sec: 17.98 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,032 epoch 9 - iter 24/24 - loss 0.62842460 - samples/sec: 19.78 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,033 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:32,034 EPOCH 9 done: loss 0.6284 - lr 0.0100000\n",
+      "2021-09-21 21:03:32,092 DEV : loss 0.6365141868591309 - score 0.0\n",
+      "2021-09-21 21:03:32,092 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:03:32,094 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:32,215 epoch 10 - iter 2/24 - loss 0.63774413 - samples/sec: 19.08 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,315 epoch 10 - iter 4/24 - loss 0.63485931 - samples/sec: 20.05 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,418 epoch 10 - iter 6/24 - loss 0.63392197 - samples/sec: 19.60 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,512 epoch 10 - iter 8/24 - loss 0.62728997 - samples/sec: 21.22 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,616 epoch 10 - iter 10/24 - loss 0.62685686 - samples/sec: 19.35 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,724 epoch 10 - iter 12/24 - loss 0.62593843 - samples/sec: 18.71 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,827 epoch 10 - iter 14/24 - loss 0.62977594 - samples/sec: 19.40 - lr: 0.010000\n",
+      "2021-09-21 21:03:32,942 epoch 10 - iter 16/24 - loss 0.62935232 - samples/sec: 17.46 - lr: 0.010000\n",
+      "2021-09-21 21:03:33,050 epoch 10 - iter 18/24 - loss 0.62837670 - samples/sec: 18.67 - lr: 0.010000\n",
+      "2021-09-21 21:03:33,160 epoch 10 - iter 20/24 - loss 0.62918448 - samples/sec: 18.25 - lr: 0.010000\n",
+      "2021-09-21 21:03:33,274 epoch 10 - iter 22/24 - loss 0.63132668 - samples/sec: 17.59 - lr: 0.010000\n",
+      "2021-09-21 21:03:33,385 epoch 10 - iter 24/24 - loss 0.63328433 - samples/sec: 18.02 - lr: 0.010000\n",
+      "2021-09-21 21:03:33,386 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:33,387 EPOCH 10 done: loss 0.6333 - lr 0.0100000\n",
+      "2021-09-21 21:03:33,451 DEV : loss 0.6365751028060913 - score 0.3333\n",
+      "2021-09-21 21:03:33,451 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:33:40,560 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:40,681 epoch 6 - iter 2/24 - loss 0.64377382 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 01:33:40,786 epoch 6 - iter 4/24 - loss 0.63693018 - samples/sec: 19.13 - lr: 0.020000\n"
+      "2021-09-21 21:03:45,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:45,372 Testing using best model ...\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:40,881 epoch 6 - iter 6/24 - loss 0.63368832 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:33:40,982 epoch 6 - iter 8/24 - loss 0.63598980 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,077 epoch 6 - iter 10/24 - loss 0.63385727 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,183 epoch 6 - iter 12/24 - loss 0.63454806 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,290 epoch 6 - iter 14/24 - loss 0.63467964 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,381 epoch 6 - iter 16/24 - loss 0.63673941 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,483 epoch 6 - iter 18/24 - loss 0.63718763 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,575 epoch 6 - iter 20/24 - loss 0.63810663 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,671 epoch 6 - iter 22/24 - loss 0.63989964 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,781 epoch 6 - iter 24/24 - loss 0.64075420 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 01:33:41,782 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:41,782 EPOCH 6 done: loss 0.6408 - lr 0.0200000\n",
-      "2021-09-08 01:33:42,074 DEV : loss 0.6369720697402954 - score 0.6667\n",
-      "2021-09-08 01:33:42,077 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:33:42,156 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:42,285 epoch 7 - iter 2/24 - loss 0.64254943 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,377 epoch 7 - iter 4/24 - loss 0.64729856 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,476 epoch 7 - iter 6/24 - loss 0.64285939 - samples/sec: 20.38 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,572 epoch 7 - iter 8/24 - loss 0.64332153 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,669 epoch 7 - iter 10/24 - loss 0.64224757 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,766 epoch 7 - iter 12/24 - loss 0.64064167 - samples/sec: 20.58 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,872 epoch 7 - iter 14/24 - loss 0.64197274 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 01:33:42,971 epoch 7 - iter 16/24 - loss 0.64174669 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 01:33:43,082 epoch 7 - iter 18/24 - loss 0.64170734 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 01:33:43,187 epoch 7 - iter 20/24 - loss 0.64220025 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:33:43,292 epoch 7 - iter 22/24 - loss 0.64191853 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 01:33:43,399 epoch 7 - iter 24/24 - loss 0.64188773 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:33:43,400 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:43,401 EPOCH 7 done: loss 0.6419 - lr 0.0200000\n",
-      "2021-09-08 01:33:43,924 DEV : loss 0.636518657207489 - score 0.3333\n",
-      "2021-09-08 01:33:43,925 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:33:44,385 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:44,510 epoch 8 - iter 2/24 - loss 0.63698405 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 01:33:44,626 epoch 8 - iter 4/24 - loss 0.64303756 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 01:33:44,737 epoch 8 - iter 6/24 - loss 0.64473039 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 01:33:44,847 epoch 8 - iter 8/24 - loss 0.64237239 - samples/sec: 18.41 - lr: 0.020000\n",
-      "2021-09-08 01:33:44,948 epoch 8 - iter 10/24 - loss 0.64091779 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,054 epoch 8 - iter 12/24 - loss 0.63903712 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,160 epoch 8 - iter 14/24 - loss 0.63877021 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,264 epoch 8 - iter 16/24 - loss 0.63865405 - samples/sec: 19.35 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,382 epoch 8 - iter 18/24 - loss 0.63930434 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,486 epoch 8 - iter 20/24 - loss 0.63941602 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,587 epoch 8 - iter 22/24 - loss 0.63987408 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,686 epoch 8 - iter 24/24 - loss 0.63901239 - samples/sec: 20.25 - lr: 0.020000\n",
-      "2021-09-08 01:33:45,687 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:45,687 EPOCH 8 done: loss 0.6390 - lr 0.0200000\n",
-      "2021-09-08 01:33:46,334 DEV : loss 0.6365704536437988 - score 1.0\n",
-      "2021-09-08 01:33:46,501 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:33:51,808 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:51,926 epoch 9 - iter 2/24 - loss 0.64188460 - samples/sec: 20.31 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,031 epoch 9 - iter 4/24 - loss 0.63441782 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,134 epoch 9 - iter 6/24 - loss 0.63623755 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,244 epoch 9 - iter 8/24 - loss 0.63644535 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,340 epoch 9 - iter 10/24 - loss 0.63733432 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,438 epoch 9 - iter 12/24 - loss 0.63894999 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,529 epoch 9 - iter 14/24 - loss 0.63802706 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,633 epoch 9 - iter 16/24 - loss 0.63918756 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,739 epoch 9 - iter 18/24 - loss 0.63924346 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,846 epoch 9 - iter 20/24 - loss 0.63848296 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:33:52,941 epoch 9 - iter 22/24 - loss 0.63895298 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,035 epoch 9 - iter 24/24 - loss 0.63931920 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:53,036 EPOCH 9 done: loss 0.6393 - lr 0.0200000\n",
-      "2021-09-08 01:33:53,111 DEV : loss 0.6365419626235962 - score 0.6667\n",
-      "2021-09-08 01:33:53,111 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:33:53,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:53,238 epoch 10 - iter 2/24 - loss 0.65765998 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,347 epoch 10 - iter 4/24 - loss 0.64697383 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,466 epoch 10 - iter 6/24 - loss 0.64241296 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,571 epoch 10 - iter 8/24 - loss 0.64197397 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,676 epoch 10 - iter 10/24 - loss 0.64055095 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,777 epoch 10 - iter 12/24 - loss 0.63808142 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,872 epoch 10 - iter 14/24 - loss 0.63617341 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 01:33:53,977 epoch 10 - iter 16/24 - loss 0.63743599 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 01:33:54,075 epoch 10 - iter 18/24 - loss 0.63740112 - samples/sec: 20.52 - lr: 0.020000\n",
-      "2021-09-08 01:33:54,194 epoch 10 - iter 20/24 - loss 0.63724738 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:33:54,296 epoch 10 - iter 22/24 - loss 0.63751512 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 01:33:54,400 epoch 10 - iter 24/24 - loss 0.63809553 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 01:33:54,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:54,401 EPOCH 10 done: loss 0.6381 - lr 0.0200000\n",
-      "2021-09-08 01:33:54,556 DEV : loss 0.6365206837654114 - score 0.0\n",
-      "2021-09-08 01:33:54,557 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:34:04,725 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:04,726 Testing using best model ...\n",
-      "2021-09-08 01:34:04,727 loading file None1/best-model.pt\n",
+      "2021-09-21 21:03:45,373 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:34:14,360 \t0.3333\n",
-      "2021-09-08 01:34:14,361 \n",
+      "2021-09-21 21:03:51,668 \t0.3333\n",
+      "2021-09-21 21:03:51,669 \n",
       "Results:\n",
       "- F-score (micro) 0.3333\n",
-      "- F-score (macro) 0.1667\n",
+      "- F-score (macro) 0.2222\n",
       "- Accuracy 0.3333\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "    positive     0.0000    0.0000    0.0000         1\n",
-      "     neutral     0.3333    1.0000    0.5000         1\n",
+      "    positive     1.0000    0.5000    0.6667         2\n",
+      "     neutral     0.0000    0.0000    0.0000         0\n",
       "    negative     0.0000    0.0000    0.0000         1\n",
       "\n",
       "   micro avg     0.3333    0.3333    0.3333         3\n",
-      "   macro avg     0.1111    0.3333    0.1667         3\n",
-      "weighted avg     0.1111    0.3333    0.1667         3\n",
+      "   macro avg     0.3333    0.1667    0.2222         3\n",
+      "weighted avg     0.6667    0.3333    0.4444         3\n",
       " samples avg     0.3333    0.3333    0.3333         3\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:34:14,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:27,847 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "\n",
+      "2021-09-21 21:03:51,669 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:12,834 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:34:32,711 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:04:17,529 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 43372.73it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 23296.90it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:34:32,714 [b'positive', b'neutral', b'negative']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:34:34,398 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,400 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:04:17,532 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 21:04:17,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:17,662 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -987,253 +973,265 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:34:34,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,401 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:34:34,402 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,402 Parameters:\n",
-      "2021-09-08 01:34:34,403  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:34:34,403  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:34:34,403  - patience: \"3\"\n",
-      "2021-09-08 01:34:34,404  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:34:34,404  - max_epochs: \"10\"\n",
-      "2021-09-08 01:34:34,404  - shuffle: \"True\"\n",
-      "2021-09-08 01:34:34,404  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:34:34,405  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:34:34,405 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,405 Model training base path: \"None1\"\n",
-      "2021-09-08 01:34:34,405 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,406 Device: cuda:1\n",
-      "2021-09-08 01:34:34,406 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,406 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:34:34,413 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,525 epoch 1 - iter 2/24 - loss 0.62558201 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,631 epoch 1 - iter 4/24 - loss 0.62173401 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,736 epoch 1 - iter 6/24 - loss 0.62701678 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,842 epoch 1 - iter 8/24 - loss 0.62647901 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,955 epoch 1 - iter 10/24 - loss 0.63073800 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,064 epoch 1 - iter 12/24 - loss 0.63011977 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,167 epoch 1 - iter 14/24 - loss 0.63041128 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,273 epoch 1 - iter 16/24 - loss 0.62946261 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,378 epoch 1 - iter 18/24 - loss 0.63337755 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,488 epoch 1 - iter 20/24 - loss 0.63287374 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,605 epoch 1 - iter 22/24 - loss 0.63327114 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,715 epoch 1 - iter 24/24 - loss 0.63295015 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 01:34:35,716 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:35,716 EPOCH 1 done: loss 0.6330 - lr 0.0200000\n",
-      "2021-09-08 01:34:35,782 DEV : loss 0.6369431614875793 - score 0.3333\n",
-      "2021-09-08 01:34:35,783 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:42,855 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:42,969 epoch 2 - iter 2/24 - loss 0.62482595 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,068 epoch 2 - iter 4/24 - loss 0.64664438 - samples/sec: 20.21 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,172 epoch 2 - iter 6/24 - loss 0.64266172 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,274 epoch 2 - iter 8/24 - loss 0.64584956 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,373 epoch 2 - iter 10/24 - loss 0.64073264 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,470 epoch 2 - iter 12/24 - loss 0.63961270 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,566 epoch 2 - iter 14/24 - loss 0.64009500 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,671 epoch 2 - iter 16/24 - loss 0.64027918 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,765 epoch 2 - iter 18/24 - loss 0.63946818 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,859 epoch 2 - iter 20/24 - loss 0.63851055 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 01:34:43,947 epoch 2 - iter 22/24 - loss 0.63819902 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:34:44,035 epoch 2 - iter 24/24 - loss 0.63763330 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 01:34:44,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:44,036 EPOCH 2 done: loss 0.6376 - lr 0.0200000\n",
-      "2021-09-08 01:34:44,682 DEV : loss 0.6367451548576355 - score 0.6667\n",
-      "2021-09-08 01:34:44,683 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:04:17,662 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:17,663 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:04:17,663 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:17,663 Parameters:\n",
+      "2021-09-21 21:04:17,663  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:04:17,664  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:04:17,664  - patience: \"3\"\n",
+      "2021-09-21 21:04:17,664  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:04:17,664  - max_epochs: \"10\"\n",
+      "2021-09-21 21:04:17,665  - shuffle: \"True\"\n",
+      "2021-09-21 21:04:17,665  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:04:17,665  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:04:17,666 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:17,666 Model training base path: \"None1\"\n",
+      "2021-09-21 21:04:17,666 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:17,666 Device: cuda:0\n",
+      "2021-09-21 21:04:17,667 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:17,667 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:04:17,944 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:18,054 epoch 1 - iter 2/24 - loss 0.63928887 - samples/sec: 20.89 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,163 epoch 1 - iter 4/24 - loss 0.64010155 - samples/sec: 18.44 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,259 epoch 1 - iter 6/24 - loss 0.64071258 - samples/sec: 20.80 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,351 epoch 1 - iter 8/24 - loss 0.63922341 - samples/sec: 21.98 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,442 epoch 1 - iter 10/24 - loss 0.63829015 - samples/sec: 22.06 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,532 epoch 1 - iter 12/24 - loss 0.63846519 - samples/sec: 22.27 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,623 epoch 1 - iter 14/24 - loss 0.63861174 - samples/sec: 22.03 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,715 epoch 1 - iter 16/24 - loss 0.63760978 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,805 epoch 1 - iter 18/24 - loss 0.63877304 - samples/sec: 22.16 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,897 epoch 1 - iter 20/24 - loss 0.63947219 - samples/sec: 21.98 - lr: 0.020000\n",
+      "2021-09-21 21:04:18,987 epoch 1 - iter 22/24 - loss 0.63934261 - samples/sec: 22.14 - lr: 0.020000\n",
+      "2021-09-21 21:04:19,079 epoch 1 - iter 24/24 - loss 0.63871983 - samples/sec: 22.03 - lr: 0.020000\n",
+      "2021-09-21 21:04:19,080 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:19,080 EPOCH 1 done: loss 0.6387 - lr 0.0200000\n",
+      "2021-09-21 21:04:28,029 DEV : loss 0.6365630626678467 - score 0.3333\n",
+      "2021-09-21 21:04:28,029 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:34:56,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:56,812 epoch 3 - iter 2/24 - loss 0.65447631 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 01:34:56,901 epoch 3 - iter 4/24 - loss 0.64631802 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 01:34:56,989 epoch 3 - iter 6/24 - loss 0.64539837 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,077 epoch 3 - iter 8/24 - loss 0.63927406 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,165 epoch 3 - iter 10/24 - loss 0.63696665 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,254 epoch 3 - iter 12/24 - loss 0.63427264 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,343 epoch 3 - iter 14/24 - loss 0.63318551 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,432 epoch 3 - iter 16/24 - loss 0.63452300 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,521 epoch 3 - iter 18/24 - loss 0.63249403 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,609 epoch 3 - iter 20/24 - loss 0.63168181 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,697 epoch 3 - iter 22/24 - loss 0.63208673 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,788 epoch 3 - iter 24/24 - loss 0.63139779 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:34:57,789 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:57,789 EPOCH 3 done: loss 0.6314 - lr 0.0200000\n",
-      "2021-09-08 01:35:00,117 DEV : loss 0.6365965604782104 - score 1.0\n",
-      "2021-09-08 01:35:00,118 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:04:32,296 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:32,481 epoch 2 - iter 2/24 - loss 0.65555012 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 21:04:32,670 epoch 2 - iter 4/24 - loss 0.64646848 - samples/sec: 10.67 - lr: 0.020000\n",
+      "2021-09-21 21:04:32,824 epoch 2 - iter 6/24 - loss 0.64306520 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:04:32,975 epoch 2 - iter 8/24 - loss 0.64122685 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,136 epoch 2 - iter 10/24 - loss 0.63952838 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,280 epoch 2 - iter 12/24 - loss 0.63753051 - samples/sec: 13.99 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,448 epoch 2 - iter 14/24 - loss 0.63863787 - samples/sec: 11.93 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,600 epoch 2 - iter 16/24 - loss 0.63834807 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,758 epoch 2 - iter 18/24 - loss 0.63979443 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,898 epoch 2 - iter 20/24 - loss 0.63925845 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,038 epoch 2 - iter 22/24 - loss 0.63821484 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,203 epoch 2 - iter 24/24 - loss 0.63686472 - samples/sec: 12.17 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,204 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:34,205 EPOCH 2 done: loss 0.6369 - lr 0.0200000\n",
+      "2021-09-21 21:04:34,401 DEV : loss 0.6366991400718689 - score 0.3333\n",
+      "2021-09-21 21:04:34,403 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:04:34,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:34,655 epoch 3 - iter 2/24 - loss 0.65496078 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,800 epoch 3 - iter 4/24 - loss 0.64384528 - samples/sec: 13.88 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,957 epoch 3 - iter 6/24 - loss 0.64353633 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,117 epoch 3 - iter 8/24 - loss 0.64563178 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,262 epoch 3 - iter 10/24 - loss 0.64442639 - samples/sec: 13.89 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,410 epoch 3 - iter 12/24 - loss 0.64286428 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,572 epoch 3 - iter 14/24 - loss 0.64060676 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,717 epoch 3 - iter 16/24 - loss 0.63901321 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,852 epoch 3 - iter 18/24 - loss 0.64100407 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,016 epoch 3 - iter 20/24 - loss 0.63934891 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,163 epoch 3 - iter 22/24 - loss 0.63829147 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,298 epoch 3 - iter 24/24 - loss 0.63874525 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:36,299 EPOCH 3 done: loss 0.6387 - lr 0.0200000\n",
+      "2021-09-21 21:04:36,695 DEV : loss 0.637126624584198 - score 0.3333\n",
+      "2021-09-21 21:04:36,696 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:04:36,698 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:36,818 epoch 4 - iter 2/24 - loss 0.66196728 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,933 epoch 4 - iter 4/24 - loss 0.64613053 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,079 epoch 4 - iter 6/24 - loss 0.64482994 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,186 epoch 4 - iter 8/24 - loss 0.64305372 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,299 epoch 4 - iter 10/24 - loss 0.64433351 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,438 epoch 4 - iter 12/24 - loss 0.64186384 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,561 epoch 4 - iter 14/24 - loss 0.64059070 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,670 epoch 4 - iter 16/24 - loss 0.63950310 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,781 epoch 4 - iter 18/24 - loss 0.63861245 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,907 epoch 4 - iter 20/24 - loss 0.63797282 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 21:04:38,032 epoch 4 - iter 22/24 - loss 0.63669447 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 21:04:38,148 epoch 4 - iter 24/24 - loss 0.63605179 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 21:04:38,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:38,150 EPOCH 4 done: loss 0.6361 - lr 0.0200000\n",
+      "2021-09-21 21:04:38,595 DEV : loss 0.6366278529167175 - score 0.0\n",
+      "2021-09-21 21:04:38,596 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:04:38,842 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:38,982 epoch 5 - iter 2/24 - loss 0.64495924 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,115 epoch 5 - iter 4/24 - loss 0.63623683 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,239 epoch 5 - iter 6/24 - loss 0.63885671 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,362 epoch 5 - iter 8/24 - loss 0.63632559 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,486 epoch 5 - iter 10/24 - loss 0.63938727 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,615 epoch 5 - iter 12/24 - loss 0.63949733 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,715 epoch 5 - iter 14/24 - loss 0.64319581 - samples/sec: 19.99 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,803 epoch 5 - iter 16/24 - loss 0.64294565 - samples/sec: 23.10 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,890 epoch 5 - iter 18/24 - loss 0.64291031 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,987 epoch 5 - iter 20/24 - loss 0.64263420 - samples/sec: 20.56 - lr: 0.020000\n",
+      "2021-09-21 21:04:40,104 epoch 5 - iter 22/24 - loss 0.64203230 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 21:04:40,229 epoch 5 - iter 24/24 - loss 0.64195594 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:04:40,231 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:40,231 EPOCH 5 done: loss 0.6420 - lr 0.0200000\n",
+      "2021-09-21 21:04:40,396 DEV : loss 0.6365146636962891 - score 0.3333\n",
+      "2021-09-21 21:04:40,398 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:35:08,659 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:08,787 epoch 4 - iter 2/24 - loss 0.62349972 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:35:08,903 epoch 4 - iter 4/24 - loss 0.61007212 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,009 epoch 4 - iter 6/24 - loss 0.60417883 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,119 epoch 4 - iter 8/24 - loss 0.61624806 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,242 epoch 4 - iter 10/24 - loss 0.62639045 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,368 epoch 4 - iter 12/24 - loss 0.63377975 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,486 epoch 4 - iter 14/24 - loss 0.63244739 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,610 epoch 4 - iter 16/24 - loss 0.62869426 - samples/sec: 16.23 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,741 epoch 4 - iter 18/24 - loss 0.62633991 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,867 epoch 4 - iter 20/24 - loss 0.62782027 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 01:35:09,993 epoch 4 - iter 22/24 - loss 0.63344334 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,110 epoch 4 - iter 24/24 - loss 0.63465036 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,111 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:10,111 EPOCH 4 done: loss 0.6347 - lr 0.0200000\n",
-      "2021-09-08 01:35:10,174 DEV : loss 0.6365715861320496 - score 0.0\n",
-      "2021-09-08 01:35:10,175 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:35:10,177 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:10,309 epoch 5 - iter 2/24 - loss 0.62680784 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,418 epoch 5 - iter 4/24 - loss 0.63370420 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,531 epoch 5 - iter 6/24 - loss 0.64282271 - samples/sec: 17.80 - lr: 0.020000\n"
+      "2021-09-21 21:04:50,329 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:10,641 epoch 5 - iter 8/24 - loss 0.64019176 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,748 epoch 5 - iter 10/24 - loss 0.63319466 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,862 epoch 5 - iter 12/24 - loss 0.63806360 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 01:35:10,962 epoch 5 - iter 14/24 - loss 0.64119209 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,064 epoch 5 - iter 16/24 - loss 0.64058860 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,177 epoch 5 - iter 18/24 - loss 0.64178562 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,289 epoch 5 - iter 20/24 - loss 0.64282156 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,394 epoch 5 - iter 22/24 - loss 0.64293965 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,502 epoch 5 - iter 24/24 - loss 0.63880621 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,504 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:11,504 EPOCH 5 done: loss 0.6388 - lr 0.0200000\n",
-      "2021-09-08 01:35:11,576 DEV : loss 0.6365534067153931 - score 0.6667\n",
-      "2021-09-08 01:35:11,577 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:35:11,579 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:11,713 epoch 6 - iter 2/24 - loss 0.61863807 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,815 epoch 6 - iter 4/24 - loss 0.62875500 - samples/sec: 19.69 - lr: 0.020000\n",
-      "2021-09-08 01:35:11,919 epoch 6 - iter 6/24 - loss 0.62477823 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,022 epoch 6 - iter 8/24 - loss 0.63615355 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,125 epoch 6 - iter 10/24 - loss 0.63638088 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,226 epoch 6 - iter 12/24 - loss 0.63739531 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,328 epoch 6 - iter 14/24 - loss 0.63667849 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,433 epoch 6 - iter 16/24 - loss 0.63651655 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,549 epoch 6 - iter 18/24 - loss 0.63728597 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,658 epoch 6 - iter 20/24 - loss 0.63880106 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,776 epoch 6 - iter 22/24 - loss 0.63773468 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,891 epoch 6 - iter 24/24 - loss 0.63715277 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 01:35:12,893 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:12,893 EPOCH 6 done: loss 0.6372 - lr 0.0200000\n",
-      "2021-09-08 01:35:12,954 DEV : loss 0.6365154385566711 - score 0.6667\n",
-      "2021-09-08 01:35:12,955 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:35:12,957 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:13,082 epoch 7 - iter 2/24 - loss 0.63261530 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,199 epoch 7 - iter 4/24 - loss 0.64373907 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,313 epoch 7 - iter 6/24 - loss 0.64510053 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,419 epoch 7 - iter 8/24 - loss 0.64103311 - samples/sec: 19.00 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,525 epoch 7 - iter 10/24 - loss 0.63689789 - samples/sec: 18.92 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,634 epoch 7 - iter 12/24 - loss 0.63794434 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,741 epoch 7 - iter 14/24 - loss 0.63878493 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,849 epoch 7 - iter 16/24 - loss 0.63905478 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 01:35:13,959 epoch 7 - iter 18/24 - loss 0.64037875 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 01:35:14,062 epoch 7 - iter 20/24 - loss 0.64192330 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 01:35:14,170 epoch 7 - iter 22/24 - loss 0.64449445 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:35:14,292 epoch 7 - iter 24/24 - loss 0.64400335 - samples/sec: 16.53 - lr: 0.020000\n",
-      "2021-09-08 01:35:14,293 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:14,293 EPOCH 7 done: loss 0.6440 - lr 0.0200000\n",
-      "2021-09-08 01:35:14,370 DEV : loss 0.6366357207298279 - score 1.0\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:35:14,372 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:35:14,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:14,519 epoch 8 - iter 2/24 - loss 0.64378116 - samples/sec: 16.10 - lr: 0.010000\n",
-      "2021-09-08 01:35:14,649 epoch 8 - iter 4/24 - loss 0.63362572 - samples/sec: 15.45 - lr: 0.010000\n",
-      "2021-09-08 01:35:14,771 epoch 8 - iter 6/24 - loss 0.63525221 - samples/sec: 16.54 - lr: 0.010000\n",
-      "2021-09-08 01:35:14,885 epoch 8 - iter 8/24 - loss 0.63965724 - samples/sec: 17.54 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,003 epoch 8 - iter 10/24 - loss 0.64139131 - samples/sec: 17.16 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,133 epoch 8 - iter 12/24 - loss 0.64280509 - samples/sec: 15.39 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,246 epoch 8 - iter 14/24 - loss 0.64208037 - samples/sec: 17.83 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,365 epoch 8 - iter 16/24 - loss 0.63970304 - samples/sec: 16.88 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,489 epoch 8 - iter 18/24 - loss 0.63950238 - samples/sec: 16.19 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,611 epoch 8 - iter 20/24 - loss 0.64029753 - samples/sec: 16.51 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,731 epoch 8 - iter 22/24 - loss 0.64208885 - samples/sec: 16.69 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,850 epoch 8 - iter 24/24 - loss 0.64332761 - samples/sec: 16.94 - lr: 0.010000\n",
-      "2021-09-08 01:35:15,852 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:15,852 EPOCH 8 done: loss 0.6433 - lr 0.0100000\n",
-      "2021-09-08 01:35:15,918 DEV : loss 0.6370370388031006 - score 0.3333\n",
-      "2021-09-08 01:35:15,919 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:35:15,921 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:16,050 epoch 9 - iter 2/24 - loss 0.63798648 - samples/sec: 17.53 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,155 epoch 9 - iter 4/24 - loss 0.63919665 - samples/sec: 19.23 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,262 epoch 9 - iter 6/24 - loss 0.63488864 - samples/sec: 18.76 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,380 epoch 9 - iter 8/24 - loss 0.63465902 - samples/sec: 17.06 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,485 epoch 9 - iter 10/24 - loss 0.63499026 - samples/sec: 19.25 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,592 epoch 9 - iter 12/24 - loss 0.63333738 - samples/sec: 18.90 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,688 epoch 9 - iter 14/24 - loss 0.63478133 - samples/sec: 20.90 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,800 epoch 9 - iter 16/24 - loss 0.63547956 - samples/sec: 17.97 - lr: 0.010000\n",
-      "2021-09-08 01:35:16,918 epoch 9 - iter 18/24 - loss 0.63521350 - samples/sec: 17.11 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,021 epoch 9 - iter 20/24 - loss 0.63407854 - samples/sec: 19.38 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,126 epoch 9 - iter 22/24 - loss 0.63477099 - samples/sec: 19.28 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,239 epoch 9 - iter 24/24 - loss 0.63560302 - samples/sec: 17.75 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,240 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:17,241 EPOCH 9 done: loss 0.6356 - lr 0.0100000\n",
-      "2021-09-08 01:35:17,301 DEV : loss 0.6365611553192139 - score 0.3333\n",
-      "2021-09-08 01:35:17,302 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:35:17,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:17,428 epoch 10 - iter 2/24 - loss 0.63470677 - samples/sec: 19.11 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,540 epoch 10 - iter 4/24 - loss 0.63220006 - samples/sec: 17.93 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,652 epoch 10 - iter 6/24 - loss 0.63182330 - samples/sec: 17.94 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,759 epoch 10 - iter 8/24 - loss 0.63907973 - samples/sec: 18.77 - lr: 0.010000\n"
+      "2021-09-21 21:04:50,502 epoch 6 - iter 2/24 - loss 0.64563271 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:04:50,651 epoch 6 - iter 4/24 - loss 0.65237726 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 21:04:50,799 epoch 6 - iter 6/24 - loss 0.64735706 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 21:04:50,934 epoch 6 - iter 8/24 - loss 0.64537966 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,064 epoch 6 - iter 10/24 - loss 0.64428652 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,200 epoch 6 - iter 12/24 - loss 0.64440468 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,333 epoch 6 - iter 14/24 - loss 0.64283511 - samples/sec: 15.08 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,459 epoch 6 - iter 16/24 - loss 0.64246171 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,584 epoch 6 - iter 18/24 - loss 0.64272494 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,694 epoch 6 - iter 20/24 - loss 0.64114939 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,784 epoch 6 - iter 22/24 - loss 0.64071081 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,880 epoch 6 - iter 24/24 - loss 0.64121964 - samples/sec: 20.90 - lr: 0.020000\n",
+      "2021-09-21 21:04:51,882 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:51,882 EPOCH 6 done: loss 0.6412 - lr 0.0200000\n",
+      "2021-09-21 21:04:52,068 DEV : loss 0.6365252733230591 - score 0.0\n",
+      "2021-09-21 21:04:52,069 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:04:52,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:52,268 epoch 7 - iter 2/24 - loss 0.65297940 - samples/sec: 19.45 - lr: 0.020000\n",
+      "2021-09-21 21:04:52,368 epoch 7 - iter 4/24 - loss 0.64898141 - samples/sec: 20.23 - lr: 0.020000\n",
+      "2021-09-21 21:04:52,470 epoch 7 - iter 6/24 - loss 0.64891315 - samples/sec: 19.68 - lr: 0.020000\n",
+      "2021-09-21 21:04:52,567 epoch 7 - iter 8/24 - loss 0.64729804 - samples/sec: 20.82 - lr: 0.020000\n",
+      "2021-09-21 21:04:52,668 epoch 7 - iter 10/24 - loss 0.64533585 - samples/sec: 19.94 - lr: 0.020000\n",
+      "2021-09-21 21:04:52,766 epoch 7 - iter 12/24 - loss 0.64555275 - samples/sec: 20.64 - lr: 0.020000\n",
+      "2021-09-21 21:04:52,865 epoch 7 - iter 14/24 - loss 0.64688760 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 21:04:52,964 epoch 7 - iter 16/24 - loss 0.64609692 - samples/sec: 20.42 - lr: 0.020000\n",
+      "2021-09-21 21:04:53,061 epoch 7 - iter 18/24 - loss 0.64550632 - samples/sec: 20.63 - lr: 0.020000\n",
+      "2021-09-21 21:04:53,158 epoch 7 - iter 20/24 - loss 0.64442452 - samples/sec: 20.81 - lr: 0.020000\n",
+      "2021-09-21 21:04:53,255 epoch 7 - iter 22/24 - loss 0.64553033 - samples/sec: 20.79 - lr: 0.020000\n",
+      "2021-09-21 21:04:53,355 epoch 7 - iter 24/24 - loss 0.64417842 - samples/sec: 20.14 - lr: 0.020000\n",
+      "2021-09-21 21:04:53,356 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:53,356 EPOCH 7 done: loss 0.6442 - lr 0.0200000\n",
+      "2021-09-21 21:04:53,534 DEV : loss 0.6367654800415039 - score 0.3333\n",
+      "2021-09-21 21:04:53,536 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:04:53,619 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:53,736 epoch 8 - iter 2/24 - loss 0.65905014 - samples/sec: 19.95 - lr: 0.020000\n",
+      "2021-09-21 21:04:53,831 epoch 8 - iter 4/24 - loss 0.64796148 - samples/sec: 21.16 - lr: 0.020000\n",
+      "2021-09-21 21:04:53,928 epoch 8 - iter 6/24 - loss 0.64476260 - samples/sec: 20.89 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,026 epoch 8 - iter 8/24 - loss 0.64504689 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,120 epoch 8 - iter 10/24 - loss 0.64355084 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,225 epoch 8 - iter 12/24 - loss 0.64304770 - samples/sec: 19.29 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,323 epoch 8 - iter 14/24 - loss 0.64249020 - samples/sec: 20.46 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,419 epoch 8 - iter 16/24 - loss 0.64212861 - samples/sec: 21.01 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,511 epoch 8 - iter 18/24 - loss 0.64148038 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,602 epoch 8 - iter 20/24 - loss 0.64079922 - samples/sec: 22.16 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,705 epoch 8 - iter 22/24 - loss 0.64119225 - samples/sec: 19.59 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,798 epoch 8 - iter 24/24 - loss 0.64026354 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 21:04:54,799 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:54,800 EPOCH 8 done: loss 0.6403 - lr 0.0200000\n",
+      "2021-09-21 21:04:55,601 DEV : loss 0.6365992426872253 - score 0.3333\n",
+      "2021-09-21 21:04:55,605 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:04:55,676 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:55,812 epoch 9 - iter 2/24 - loss 0.63300624 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 21:04:55,925 epoch 9 - iter 4/24 - loss 0.63352403 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,033 epoch 9 - iter 6/24 - loss 0.63316844 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,150 epoch 9 - iter 8/24 - loss 0.63488851 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,274 epoch 9 - iter 10/24 - loss 0.63351163 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,404 epoch 9 - iter 12/24 - loss 0.63467524 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,532 epoch 9 - iter 14/24 - loss 0.63402626 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,664 epoch 9 - iter 16/24 - loss 0.63469296 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,797 epoch 9 - iter 18/24 - loss 0.63415472 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:04:56,916 epoch 9 - iter 20/24 - loss 0.63464523 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:04:57,032 epoch 9 - iter 22/24 - loss 0.63537502 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 21:04:57,149 epoch 9 - iter 24/24 - loss 0.63656239 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 21:04:57,151 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:57,151 EPOCH 9 done: loss 0.6366 - lr 0.0200000\n",
+      "2021-09-21 21:05:02,198 DEV : loss 0.6365945339202881 - score 0.3333\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:05:02,200 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:05:02,305 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:02,420 epoch 10 - iter 2/24 - loss 0.63386792 - samples/sec: 19.93 - lr: 0.010000\n",
+      "2021-09-21 21:05:02,519 epoch 10 - iter 4/24 - loss 0.63825414 - samples/sec: 20.35 - lr: 0.010000\n",
+      "2021-09-21 21:05:02,635 epoch 10 - iter 6/24 - loss 0.63785008 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 21:05:02,750 epoch 10 - iter 8/24 - loss 0.63992288 - samples/sec: 17.52 - lr: 0.010000\n",
+      "2021-09-21 21:05:02,861 epoch 10 - iter 10/24 - loss 0.63824083 - samples/sec: 18.07 - lr: 0.010000\n",
+      "2021-09-21 21:05:02,976 epoch 10 - iter 12/24 - loss 0.63775784 - samples/sec: 17.62 - lr: 0.010000\n",
+      "2021-09-21 21:05:03,095 epoch 10 - iter 14/24 - loss 0.63691119 - samples/sec: 16.83 - lr: 0.010000\n",
+      "2021-09-21 21:05:03,211 epoch 10 - iter 16/24 - loss 0.63660154 - samples/sec: 17.31 - lr: 0.010000\n",
+      "2021-09-21 21:05:03,322 epoch 10 - iter 18/24 - loss 0.63627929 - samples/sec: 18.11 - lr: 0.010000\n",
+      "2021-09-21 21:05:03,428 epoch 10 - iter 20/24 - loss 0.63661980 - samples/sec: 19.01 - lr: 0.010000\n",
+      "2021-09-21 21:05:03,537 epoch 10 - iter 22/24 - loss 0.63761795 - samples/sec: 18.55 - lr: 0.010000\n",
+      "2021-09-21 21:05:03,646 epoch 10 - iter 24/24 - loss 0.63665611 - samples/sec: 18.38 - lr: 0.010000\n",
+      "2021-09-21 21:05:03,647 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:03,647 EPOCH 10 done: loss 0.6367 - lr 0.0100000\n",
+      "2021-09-21 21:05:03,703 DEV : loss 0.63651442527771 - score 0.0\n",
+      "2021-09-21 21:05:03,703 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:05:07,774 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:07,774 Testing using best model ...\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:17,868 epoch 10 - iter 10/24 - loss 0.63865131 - samples/sec: 18.46 - lr: 0.010000\n",
-      "2021-09-08 01:35:17,976 epoch 10 - iter 12/24 - loss 0.63705192 - samples/sec: 18.61 - lr: 0.010000\n",
-      "2021-09-08 01:35:18,078 epoch 10 - iter 14/24 - loss 0.63884231 - samples/sec: 19.74 - lr: 0.010000\n",
-      "2021-09-08 01:35:18,179 epoch 10 - iter 16/24 - loss 0.63787351 - samples/sec: 19.91 - lr: 0.010000\n",
-      "2021-09-08 01:35:18,282 epoch 10 - iter 18/24 - loss 0.63963528 - samples/sec: 19.59 - lr: 0.010000\n",
-      "2021-09-08 01:35:18,384 epoch 10 - iter 20/24 - loss 0.64038046 - samples/sec: 19.78 - lr: 0.010000\n",
-      "2021-09-08 01:35:18,489 epoch 10 - iter 22/24 - loss 0.63925983 - samples/sec: 19.09 - lr: 0.010000\n",
-      "2021-09-08 01:35:18,597 epoch 10 - iter 24/24 - loss 0.63909212 - samples/sec: 18.60 - lr: 0.010000\n",
-      "2021-09-08 01:35:18,598 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:18,599 EPOCH 10 done: loss 0.6391 - lr 0.0100000\n",
-      "2021-09-08 01:35:18,666 DEV : loss 0.6366121172904968 - score 0.3333\n",
-      "2021-09-08 01:35:18,667 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:35:24,904 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:24,905 Testing using best model ...\n",
-      "2021-09-08 01:35:24,907 loading file None1/best-model.pt\n",
+      "2021-09-21 21:05:07,776 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:35:30,086 \t0.0\n",
-      "2021-09-08 01:35:30,087 \n",
+      "2021-09-21 21:05:13,224 \t0.3333\n",
+      "2021-09-21 21:05:13,225 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.3333\n",
+      "- F-score (macro) 0.1667\n",
+      "- Accuracy 0.3333\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "    positive     0.0000    0.0000    0.0000         1\n",
+      "    positive     0.5000    0.5000    0.5000         2\n",
       "     neutral     0.0000    0.0000    0.0000         0\n",
-      "    negative     0.0000    0.0000    0.0000         2\n",
+      "    negative     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "   micro avg     0.0000    0.0000    0.0000         3\n",
-      "   macro avg     0.0000    0.0000    0.0000         3\n",
-      "weighted avg     0.0000    0.0000    0.0000         3\n",
-      " samples avg     0.0000    0.0000    0.0000         3\n",
+      "   micro avg     0.3333    0.3333    0.3333         3\n",
+      "   macro avg     0.1667    0.1667    0.1667         3\n",
+      "weighted avg     0.3333    0.3333    0.3333         3\n",
+      " samples avg     0.3333    0.3333    0.3333         3\n",
       "\n",
-      "2021-09-08 01:35:30,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:42,757 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:05:13,225 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:44,664 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:35:47,465 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:05:49,168 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 33396.11it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 32430.19it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:47,468 [b'positive', b'neutral', b'negative']\n",
-      "2021-09-08 01:35:47,494 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:47,496 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:05:49,171 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 21:05:49,330 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:49,332 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1546,26 +1544,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:47,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:47,497 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:35:47,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:47,498 Parameters:\n",
-      "2021-09-08 01:35:47,498  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:35:47,498  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:35:47,499  - patience: \"3\"\n",
-      "2021-09-08 01:35:47,499  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:35:47,499  - max_epochs: \"10\"\n",
-      "2021-09-08 01:35:47,500  - shuffle: \"True\"\n",
-      "2021-09-08 01:35:47,500  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:35:47,500  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:35:47,501 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:47,501 Model training base path: \"None1\"\n",
-      "2021-09-08 01:35:47,501 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:47,502 Device: cuda:1\n",
-      "2021-09-08 01:35:47,502 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:47,502 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:35:47,509 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:47,628 epoch 1 - iter 2/24 - loss 0.62598291 - samples/sec: 20.40 - lr: 0.020000\n"
+      "2021-09-21 21:05:49,333 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:49,333 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:05:49,334 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:49,334 Parameters:\n",
+      "2021-09-21 21:05:49,334  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:05:49,335  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:05:49,335  - patience: \"3\"\n",
+      "2021-09-21 21:05:49,335  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:05:49,336  - max_epochs: \"10\"\n",
+      "2021-09-21 21:05:49,336  - shuffle: \"True\"\n",
+      "2021-09-21 21:05:49,336  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:05:49,337  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:05:49,337 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:49,337 Model training base path: \"None1\"\n",
+      "2021-09-21 21:05:49,338 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:49,338 Device: cuda:0\n",
+      "2021-09-21 21:05:49,338 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:49,339 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -1579,232 +1575,249 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:47,738 epoch 1 - iter 4/24 - loss 0.63000216 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,843 epoch 1 - iter 6/24 - loss 0.63313050 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 01:35:47,948 epoch 1 - iter 8/24 - loss 0.62859953 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,058 epoch 1 - iter 10/24 - loss 0.63042394 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,163 epoch 1 - iter 12/24 - loss 0.63245761 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,259 epoch 1 - iter 14/24 - loss 0.63184924 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,363 epoch 1 - iter 16/24 - loss 0.63236218 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,460 epoch 1 - iter 18/24 - loss 0.63194844 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,563 epoch 1 - iter 20/24 - loss 0.63385586 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,657 epoch 1 - iter 22/24 - loss 0.63308667 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,757 epoch 1 - iter 24/24 - loss 0.63438412 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:35:48,758 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:48,759 EPOCH 1 done: loss 0.6344 - lr 0.0200000\n",
-      "2021-09-08 01:35:48,825 DEV : loss 0.6365306377410889 - score 0.6667\n",
-      "2021-09-08 01:35:48,826 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:05:49,521 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:49,645 epoch 1 - iter 2/24 - loss 0.62520158 - samples/sec: 18.76 - lr: 0.020000\n",
+      "2021-09-21 21:05:49,772 epoch 1 - iter 4/24 - loss 0.63680795 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 21:05:49,885 epoch 1 - iter 6/24 - loss 0.63533167 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 21:05:49,995 epoch 1 - iter 8/24 - loss 0.63455832 - samples/sec: 18.24 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,121 epoch 1 - iter 10/24 - loss 0.63232227 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,234 epoch 1 - iter 12/24 - loss 0.63323057 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,340 epoch 1 - iter 14/24 - loss 0.63295348 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,465 epoch 1 - iter 16/24 - loss 0.63553853 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,592 epoch 1 - iter 18/24 - loss 0.63381477 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,691 epoch 1 - iter 20/24 - loss 0.63467388 - samples/sec: 20.52 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,809 epoch 1 - iter 22/24 - loss 0.63386354 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,922 epoch 1 - iter 24/24 - loss 0.63364911 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 21:05:50,924 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:50,924 EPOCH 1 done: loss 0.6336 - lr 0.0200000\n",
+      "2021-09-21 21:05:55,219 DEV : loss 0.6365706324577332 - score 0.0\n",
+      "2021-09-21 21:05:55,220 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:05:59,037 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:59,161 epoch 2 - iter 2/24 - loss 0.62715515 - samples/sec: 19.67 - lr: 0.020000\n",
+      "2021-09-21 21:05:59,262 epoch 2 - iter 4/24 - loss 0.63463674 - samples/sec: 19.97 - lr: 0.020000\n",
+      "2021-09-21 21:05:59,359 epoch 2 - iter 6/24 - loss 0.63887431 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 21:05:59,453 epoch 2 - iter 8/24 - loss 0.63888261 - samples/sec: 21.42 - lr: 0.020000\n",
+      "2021-09-21 21:05:59,570 epoch 2 - iter 10/24 - loss 0.63575723 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:05:59,688 epoch 2 - iter 12/24 - loss 0.63550141 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 21:05:59,801 epoch 2 - iter 14/24 - loss 0.63506171 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 21:05:59,912 epoch 2 - iter 16/24 - loss 0.63614328 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 21:06:00,040 epoch 2 - iter 18/24 - loss 0.63718193 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:06:00,163 epoch 2 - iter 20/24 - loss 0.63571472 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:06:00,286 epoch 2 - iter 22/24 - loss 0.63611425 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 21:06:00,416 epoch 2 - iter 24/24 - loss 0.63658603 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 21:06:00,418 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:00,418 EPOCH 2 done: loss 0.6366 - lr 0.0200000\n",
+      "2021-09-21 21:06:00,494 DEV : loss 0.6365208029747009 - score 0.0\n",
+      "2021-09-21 21:06:00,495 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:06:04,639 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:04,798 epoch 3 - iter 2/24 - loss 0.62938726 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 21:06:04,943 epoch 3 - iter 4/24 - loss 0.63380584 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,050 epoch 3 - iter 6/24 - loss 0.63762105 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,163 epoch 3 - iter 8/24 - loss 0.63281472 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,269 epoch 3 - iter 10/24 - loss 0.63609703 - samples/sec: 18.98 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,374 epoch 3 - iter 12/24 - loss 0.63220844 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,486 epoch 3 - iter 14/24 - loss 0.63699468 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,591 epoch 3 - iter 16/24 - loss 0.63662473 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,701 epoch 3 - iter 18/24 - loss 0.63341754 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,801 epoch 3 - iter 20/24 - loss 0.63307632 - samples/sec: 20.13 - lr: 0.020000\n",
+      "2021-09-21 21:06:05,915 epoch 3 - iter 22/24 - loss 0.63196025 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 21:06:06,020 epoch 3 - iter 24/24 - loss 0.63245230 - samples/sec: 19.18 - lr: 0.020000\n",
+      "2021-09-21 21:06:06,022 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:06,022 EPOCH 3 done: loss 0.6325 - lr 0.0200000\n",
+      "2021-09-21 21:06:06,190 DEV : loss 0.636520504951477 - score 0.0\n",
+      "2021-09-21 21:06:06,191 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:35:53,202 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:53,334 epoch 2 - iter 2/24 - loss 0.64902571 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 01:35:53,430 epoch 2 - iter 4/24 - loss 0.63625565 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 01:35:53,533 epoch 2 - iter 6/24 - loss 0.63356825 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 01:35:53,634 epoch 2 - iter 8/24 - loss 0.63613152 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 01:35:53,728 epoch 2 - iter 10/24 - loss 0.63474869 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 01:35:53,840 epoch 2 - iter 12/24 - loss 0.63410237 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:35:53,936 epoch 2 - iter 14/24 - loss 0.63493896 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 01:35:54,033 epoch 2 - iter 16/24 - loss 0.63403073 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 01:35:54,139 epoch 2 - iter 18/24 - loss 0.63450310 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 01:35:54,239 epoch 2 - iter 20/24 - loss 0.63365538 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 01:35:54,344 epoch 2 - iter 22/24 - loss 0.63297960 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:35:54,451 epoch 2 - iter 24/24 - loss 0.63318855 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 01:35:54,452 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:54,453 EPOCH 2 done: loss 0.6332 - lr 0.0200000\n",
-      "2021-09-08 01:35:54,557 DEV : loss 0.636518120765686 - score 0.3333\n",
-      "2021-09-08 01:35:54,558 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:35:55,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,650 epoch 3 - iter 2/24 - loss 0.63429365 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 01:35:55,743 epoch 3 - iter 4/24 - loss 0.64641801 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 01:35:55,841 epoch 3 - iter 6/24 - loss 0.63964473 - samples/sec: 20.47 - lr: 0.020000\n",
-      "2021-09-08 01:35:55,931 epoch 3 - iter 8/24 - loss 0.63897636 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,023 epoch 3 - iter 10/24 - loss 0.63750381 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,118 epoch 3 - iter 12/24 - loss 0.63841683 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,214 epoch 3 - iter 14/24 - loss 0.64258959 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,308 epoch 3 - iter 16/24 - loss 0.64207349 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,413 epoch 3 - iter 18/24 - loss 0.64213186 - samples/sec: 19.29 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,510 epoch 3 - iter 20/24 - loss 0.64168663 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,620 epoch 3 - iter 22/24 - loss 0.64098231 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,740 epoch 3 - iter 24/24 - loss 0.64075078 - samples/sec: 16.75 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,742 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:56,742 EPOCH 3 done: loss 0.6408 - lr 0.0200000\n",
-      "2021-09-08 01:35:56,809 DEV : loss 0.6365474462509155 - score 1.0\n",
-      "2021-09-08 01:35:56,809 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:06:17,376 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:17,521 epoch 4 - iter 2/24 - loss 0.63626057 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:06:17,652 epoch 4 - iter 4/24 - loss 0.63864124 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 21:06:17,789 epoch 4 - iter 6/24 - loss 0.63365848 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 21:06:17,902 epoch 4 - iter 8/24 - loss 0.63377019 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,037 epoch 4 - iter 10/24 - loss 0.63685277 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,155 epoch 4 - iter 12/24 - loss 0.63573204 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,312 epoch 4 - iter 14/24 - loss 0.63894414 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,428 epoch 4 - iter 16/24 - loss 0.63895974 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,552 epoch 4 - iter 18/24 - loss 0.63873935 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,676 epoch 4 - iter 20/24 - loss 0.63815666 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,816 epoch 4 - iter 22/24 - loss 0.63775914 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,947 epoch 4 - iter 24/24 - loss 0.63841609 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,948 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:18,949 EPOCH 4 done: loss 0.6384 - lr 0.0200000\n",
+      "2021-09-21 21:06:19,044 DEV : loss 0.6365430355072021 - score 0.3333\n",
+      "2021-09-21 21:06:19,045 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:36:00,957 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:01,093 epoch 4 - iter 2/24 - loss 0.64397743 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,215 epoch 4 - iter 4/24 - loss 0.64245823 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,331 epoch 4 - iter 6/24 - loss 0.64093116 - samples/sec: 17.38 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,452 epoch 4 - iter 8/24 - loss 0.64289808 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,569 epoch 4 - iter 10/24 - loss 0.63962592 - samples/sec: 17.17 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,682 epoch 4 - iter 12/24 - loss 0.63981397 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,811 epoch 4 - iter 14/24 - loss 0.63915707 - samples/sec: 15.60 - lr: 0.020000\n",
-      "2021-09-08 01:36:01,921 epoch 4 - iter 16/24 - loss 0.63781169 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,035 epoch 4 - iter 18/24 - loss 0.63771958 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,162 epoch 4 - iter 20/24 - loss 0.63705909 - samples/sec: 15.84 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,280 epoch 4 - iter 22/24 - loss 0.63666879 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,395 epoch 4 - iter 24/24 - loss 0.63697503 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 01:36:02,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:02,397 EPOCH 4 done: loss 0.6370 - lr 0.0200000\n",
-      "2021-09-08 01:36:02,505 DEV : loss 0.6366802453994751 - score 0.3333\n",
-      "2021-09-08 01:36:02,505 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:36:02,852 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:03,000 epoch 5 - iter 2/24 - loss 0.62610850 - samples/sec: 15.98 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,111 epoch 5 - iter 4/24 - loss 0.63060799 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,223 epoch 5 - iter 6/24 - loss 0.63348732 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,335 epoch 5 - iter 8/24 - loss 0.63232525 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,455 epoch 5 - iter 10/24 - loss 0.63425770 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,577 epoch 5 - iter 12/24 - loss 0.63155461 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,694 epoch 5 - iter 14/24 - loss 0.63407887 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,817 epoch 5 - iter 16/24 - loss 0.63436480 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,938 epoch 5 - iter 18/24 - loss 0.63292278 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,053 epoch 5 - iter 20/24 - loss 0.63266196 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,176 epoch 5 - iter 22/24 - loss 0.63328963 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,301 epoch 5 - iter 24/24 - loss 0.63271170 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:04,303 EPOCH 5 done: loss 0.6327 - lr 0.0200000\n",
-      "2021-09-08 01:36:04,372 DEV : loss 0.6365149021148682 - score 0.6667\n",
-      "2021-09-08 01:36:04,373 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:36:04,376 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:04,507 epoch 6 - iter 2/24 - loss 0.64785719 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,639 epoch 6 - iter 4/24 - loss 0.64238392 - samples/sec: 15.15 - lr: 0.020000\n"
+      "2021-09-21 21:06:33,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:33,594 epoch 5 - iter 2/24 - loss 0.63055515 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 21:06:33,701 epoch 5 - iter 4/24 - loss 0.63878807 - samples/sec: 18.91 - lr: 0.020000\n",
+      "2021-09-21 21:06:33,797 epoch 5 - iter 6/24 - loss 0.63662810 - samples/sec: 20.94 - lr: 0.020000\n",
+      "2021-09-21 21:06:33,902 epoch 5 - iter 8/24 - loss 0.63531947 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,000 epoch 5 - iter 10/24 - loss 0.63812903 - samples/sec: 20.60 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,108 epoch 5 - iter 12/24 - loss 0.63769991 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,208 epoch 5 - iter 14/24 - loss 0.63955973 - samples/sec: 20.20 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,310 epoch 5 - iter 16/24 - loss 0.64075723 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,411 epoch 5 - iter 18/24 - loss 0.63876909 - samples/sec: 20.02 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,512 epoch 5 - iter 20/24 - loss 0.63858748 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,609 epoch 5 - iter 22/24 - loss 0.63912285 - samples/sec: 20.81 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,708 epoch 5 - iter 24/24 - loss 0.63924219 - samples/sec: 20.31 - lr: 0.020000\n",
+      "2021-09-21 21:06:34,709 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:34,710 EPOCH 5 done: loss 0.6392 - lr 0.0200000\n",
+      "2021-09-21 21:06:37,793 DEV : loss 0.6365214586257935 - score 0.0\n",
+      "2021-09-21 21:06:37,794 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:06:37,816 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:04,764 epoch 6 - iter 6/24 - loss 0.63309149 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,893 epoch 6 - iter 8/24 - loss 0.63087989 - samples/sec: 15.67 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,018 epoch 6 - iter 10/24 - loss 0.63131651 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,125 epoch 6 - iter 12/24 - loss 0.62802125 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,227 epoch 6 - iter 14/24 - loss 0.62873272 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,338 epoch 6 - iter 16/24 - loss 0.63097350 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,434 epoch 6 - iter 18/24 - loss 0.63019694 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,532 epoch 6 - iter 20/24 - loss 0.63007199 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,644 epoch 6 - iter 22/24 - loss 0.63096472 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,741 epoch 6 - iter 24/24 - loss 0.63167793 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,742 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:05,743 EPOCH 6 done: loss 0.6317 - lr 0.0200000\n",
-      "2021-09-08 01:36:05,907 DEV : loss 0.6365160942077637 - score 0.3333\n",
-      "2021-09-08 01:36:05,907 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:36:05,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:06,118 epoch 7 - iter 2/24 - loss 0.64213130 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,229 epoch 7 - iter 4/24 - loss 0.63835208 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,332 epoch 7 - iter 6/24 - loss 0.64115999 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,429 epoch 7 - iter 8/24 - loss 0.64076301 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,543 epoch 7 - iter 10/24 - loss 0.63920210 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,656 epoch 7 - iter 12/24 - loss 0.63864408 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,757 epoch 7 - iter 14/24 - loss 0.63620105 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,861 epoch 7 - iter 16/24 - loss 0.63698006 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 01:36:06,967 epoch 7 - iter 18/24 - loss 0.63754963 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:36:07,063 epoch 7 - iter 20/24 - loss 0.63655786 - samples/sec: 21.03 - lr: 0.020000\n",
-      "2021-09-08 01:36:07,165 epoch 7 - iter 22/24 - loss 0.63859730 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 01:36:07,267 epoch 7 - iter 24/24 - loss 0.63784757 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 01:36:07,268 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:07,269 EPOCH 7 done: loss 0.6378 - lr 0.0200000\n",
-      "2021-09-08 01:36:08,317 DEV : loss 0.6365305185317993 - score 0.6667\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:36:08,317 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:36:08,426 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:08,559 epoch 8 - iter 2/24 - loss 0.63342312 - samples/sec: 17.14 - lr: 0.010000\n",
-      "2021-09-08 01:36:08,662 epoch 8 - iter 4/24 - loss 0.64501968 - samples/sec: 19.72 - lr: 0.010000\n",
-      "2021-09-08 01:36:08,770 epoch 8 - iter 6/24 - loss 0.64820282 - samples/sec: 18.60 - lr: 0.010000\n",
-      "2021-09-08 01:36:08,884 epoch 8 - iter 8/24 - loss 0.65134829 - samples/sec: 17.68 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,005 epoch 8 - iter 10/24 - loss 0.64826393 - samples/sec: 16.56 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,107 epoch 8 - iter 12/24 - loss 0.64709599 - samples/sec: 19.70 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,209 epoch 8 - iter 14/24 - loss 0.64438416 - samples/sec: 19.89 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,310 epoch 8 - iter 16/24 - loss 0.64286064 - samples/sec: 19.81 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,406 epoch 8 - iter 18/24 - loss 0.64423996 - samples/sec: 21.06 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,517 epoch 8 - iter 20/24 - loss 0.64587198 - samples/sec: 17.99 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,618 epoch 8 - iter 22/24 - loss 0.64682190 - samples/sec: 19.96 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,717 epoch 8 - iter 24/24 - loss 0.64515341 - samples/sec: 20.42 - lr: 0.010000\n",
-      "2021-09-08 01:36:09,718 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:09,718 EPOCH 8 done: loss 0.6452 - lr 0.0100000\n",
-      "2021-09-08 01:36:09,913 DEV : loss 0.6365370750427246 - score 0.6667\n",
-      "2021-09-08 01:36:09,914 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:36:10,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:10,129 epoch 9 - iter 2/24 - loss 0.63003778 - samples/sec: 19.58 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,237 epoch 9 - iter 4/24 - loss 0.63051987 - samples/sec: 18.56 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,341 epoch 9 - iter 6/24 - loss 0.62707143 - samples/sec: 19.29 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,441 epoch 9 - iter 8/24 - loss 0.62534428 - samples/sec: 20.23 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,542 epoch 9 - iter 10/24 - loss 0.63559019 - samples/sec: 19.83 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,642 epoch 9 - iter 12/24 - loss 0.63675243 - samples/sec: 20.20 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,739 epoch 9 - iter 14/24 - loss 0.63829548 - samples/sec: 20.61 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,843 epoch 9 - iter 16/24 - loss 0.63792331 - samples/sec: 19.42 - lr: 0.010000\n",
-      "2021-09-08 01:36:10,942 epoch 9 - iter 18/24 - loss 0.63832191 - samples/sec: 20.29 - lr: 0.010000\n",
-      "2021-09-08 01:36:11,043 epoch 9 - iter 20/24 - loss 0.63921087 - samples/sec: 19.97 - lr: 0.010000\n",
-      "2021-09-08 01:36:11,150 epoch 9 - iter 22/24 - loss 0.63835075 - samples/sec: 18.79 - lr: 0.010000\n",
-      "2021-09-08 01:36:11,256 epoch 9 - iter 24/24 - loss 0.63884259 - samples/sec: 19.00 - lr: 0.010000\n",
-      "2021-09-08 01:36:11,257 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:11,257 EPOCH 9 done: loss 0.6388 - lr 0.0100000\n",
-      "2021-09-08 01:36:12,584 DEV : loss 0.6365201473236084 - score 0.3333\n",
-      "2021-09-08 01:36:12,585 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:36:12,662 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:12,790 epoch 10 - iter 2/24 - loss 0.63857895 - samples/sec: 18.90 - lr: 0.010000\n",
-      "2021-09-08 01:36:12,887 epoch 10 - iter 4/24 - loss 0.63253787 - samples/sec: 20.78 - lr: 0.010000\n",
-      "2021-09-08 01:36:12,982 epoch 10 - iter 6/24 - loss 0.63525401 - samples/sec: 21.23 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,080 epoch 10 - iter 8/24 - loss 0.63777865 - samples/sec: 20.52 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,181 epoch 10 - iter 10/24 - loss 0.63823041 - samples/sec: 19.82 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,282 epoch 10 - iter 12/24 - loss 0.64092120 - samples/sec: 20.00 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,399 epoch 10 - iter 14/24 - loss 0.64029985 - samples/sec: 17.05 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,504 epoch 10 - iter 16/24 - loss 0.64057319 - samples/sec: 19.24 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,610 epoch 10 - iter 18/24 - loss 0.64014257 - samples/sec: 18.85 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,708 epoch 10 - iter 20/24 - loss 0.64067556 - samples/sec: 20.50 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,810 epoch 10 - iter 22/24 - loss 0.64032966 - samples/sec: 19.83 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,908 epoch 10 - iter 24/24 - loss 0.64084024 - samples/sec: 20.54 - lr: 0.010000\n",
-      "2021-09-08 01:36:13,909 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:13,909 EPOCH 10 done: loss 0.6408 - lr 0.0100000\n",
-      "2021-09-08 01:36:14,430 DEV : loss 0.6366056799888611 - score 0.3333\n",
-      "2021-09-08 01:36:14,431 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:36:19,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:19,320 Testing using best model ...\n",
-      "2021-09-08 01:36:19,321 loading file None1/best-model.pt\n",
+      "2021-09-21 21:06:37,969 epoch 6 - iter 2/24 - loss 0.64004055 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,099 epoch 6 - iter 4/24 - loss 0.63286747 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,228 epoch 6 - iter 6/24 - loss 0.63340523 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,354 epoch 6 - iter 8/24 - loss 0.63343402 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,480 epoch 6 - iter 10/24 - loss 0.63773847 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,594 epoch 6 - iter 12/24 - loss 0.63649091 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,726 epoch 6 - iter 14/24 - loss 0.63703338 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,861 epoch 6 - iter 16/24 - loss 0.63811563 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,975 epoch 6 - iter 18/24 - loss 0.63735606 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,111 epoch 6 - iter 20/24 - loss 0.63953047 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,243 epoch 6 - iter 22/24 - loss 0.64034070 - samples/sec: 15.14 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,371 epoch 6 - iter 24/24 - loss 0.63997566 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:39,372 EPOCH 6 done: loss 0.6400 - lr 0.0200000\n",
+      "2021-09-21 21:06:39,564 DEV : loss 0.6365815997123718 - score 0.0\n",
+      "2021-09-21 21:06:39,565 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:06:39,647 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:39,807 epoch 7 - iter 2/24 - loss 0.64618975 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,944 epoch 7 - iter 4/24 - loss 0.64124124 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,063 epoch 7 - iter 6/24 - loss 0.63616993 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,201 epoch 7 - iter 8/24 - loss 0.63422820 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,328 epoch 7 - iter 10/24 - loss 0.63548301 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,487 epoch 7 - iter 12/24 - loss 0.63787147 - samples/sec: 12.63 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,607 epoch 7 - iter 14/24 - loss 0.63841580 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,737 epoch 7 - iter 16/24 - loss 0.63777236 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,859 epoch 7 - iter 18/24 - loss 0.63713291 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,988 epoch 7 - iter 20/24 - loss 0.63820678 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 21:06:41,129 epoch 7 - iter 22/24 - loss 0.63817121 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:06:41,260 epoch 7 - iter 24/24 - loss 0.63923212 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 21:06:41,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:41,261 EPOCH 7 done: loss 0.6392 - lr 0.0200000\n",
+      "2021-09-21 21:06:43,532 DEV : loss 0.6366555690765381 - score 0.3333\n",
+      "2021-09-21 21:06:43,533 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:06:43,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:43,720 epoch 8 - iter 2/24 - loss 0.63805002 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 21:06:43,879 epoch 8 - iter 4/24 - loss 0.63784669 - samples/sec: 12.64 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,032 epoch 8 - iter 6/24 - loss 0.63614742 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,183 epoch 8 - iter 8/24 - loss 0.63565250 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,329 epoch 8 - iter 10/24 - loss 0.63597779 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,478 epoch 8 - iter 12/24 - loss 0.63611744 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,627 epoch 8 - iter 14/24 - loss 0.63731722 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,802 epoch 8 - iter 16/24 - loss 0.63855677 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,954 epoch 8 - iter 18/24 - loss 0.63812886 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,089 epoch 8 - iter 20/24 - loss 0.63913293 - samples/sec: 14.94 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,251 epoch 8 - iter 22/24 - loss 0.63970469 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,412 epoch 8 - iter 24/24 - loss 0.63973931 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,413 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:45,414 EPOCH 8 done: loss 0.6397 - lr 0.0200000\n",
+      "2021-09-21 21:06:45,511 DEV : loss 0.6368883848190308 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:06:45,512 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:06:45,515 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:45,701 epoch 9 - iter 2/24 - loss 0.64104977 - samples/sec: 13.53 - lr: 0.010000\n",
+      "2021-09-21 21:06:45,860 epoch 9 - iter 4/24 - loss 0.63395122 - samples/sec: 12.69 - lr: 0.010000\n",
+      "2021-09-21 21:06:46,012 epoch 9 - iter 6/24 - loss 0.63224504 - samples/sec: 13.20 - lr: 0.010000\n",
+      "2021-09-21 21:06:46,154 epoch 9 - iter 8/24 - loss 0.63221893 - samples/sec: 14.08 - lr: 0.010000\n",
+      "2021-09-21 21:06:46,299 epoch 9 - iter 10/24 - loss 0.63360256 - samples/sec: 13.89 - lr: 0.010000\n",
+      "2021-09-21 21:06:46,446 epoch 9 - iter 12/24 - loss 0.63395727 - samples/sec: 13.63 - lr: 0.010000\n",
+      "2021-09-21 21:06:46,583 epoch 9 - iter 14/24 - loss 0.63544077 - samples/sec: 14.72 - lr: 0.010000\n",
+      "2021-09-21 21:06:46,731 epoch 9 - iter 16/24 - loss 0.63442300 - samples/sec: 13.60 - lr: 0.010000\n",
+      "2021-09-21 21:06:46,884 epoch 9 - iter 18/24 - loss 0.63346653 - samples/sec: 13.08 - lr: 0.010000\n",
+      "2021-09-21 21:06:47,035 epoch 9 - iter 20/24 - loss 0.63345973 - samples/sec: 13.33 - lr: 0.010000\n",
+      "2021-09-21 21:06:47,206 epoch 9 - iter 22/24 - loss 0.63403261 - samples/sec: 11.78 - lr: 0.010000\n",
+      "2021-09-21 21:06:47,367 epoch 9 - iter 24/24 - loss 0.63471184 - samples/sec: 12.46 - lr: 0.010000\n",
+      "2021-09-21 21:06:47,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:47,369 EPOCH 9 done: loss 0.6347 - lr 0.0100000\n",
+      "2021-09-21 21:06:47,518 DEV : loss 0.6365146040916443 - score 0.0\n",
+      "2021-09-21 21:06:47,519 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:06:47,534 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:47,732 epoch 10 - iter 2/24 - loss 0.63633591 - samples/sec: 12.06 - lr: 0.010000\n",
+      "2021-09-21 21:06:47,869 epoch 10 - iter 4/24 - loss 0.64134055 - samples/sec: 14.68 - lr: 0.010000\n",
+      "2021-09-21 21:06:48,006 epoch 10 - iter 6/24 - loss 0.63362134 - samples/sec: 14.63 - lr: 0.010000\n",
+      "2021-09-21 21:06:48,167 epoch 10 - iter 8/24 - loss 0.63664371 - samples/sec: 12.48 - lr: 0.010000\n",
+      "2021-09-21 21:06:48,322 epoch 10 - iter 10/24 - loss 0.63548577 - samples/sec: 13.00 - lr: 0.010000\n",
+      "2021-09-21 21:06:48,485 epoch 10 - iter 12/24 - loss 0.63688664 - samples/sec: 12.30 - lr: 0.010000\n",
+      "2021-09-21 21:06:48,643 epoch 10 - iter 14/24 - loss 0.63637052 - samples/sec: 12.66 - lr: 0.010000\n",
+      "2021-09-21 21:06:48,797 epoch 10 - iter 16/24 - loss 0.63766314 - samples/sec: 13.08 - lr: 0.010000\n",
+      "2021-09-21 21:06:48,953 epoch 10 - iter 18/24 - loss 0.63768751 - samples/sec: 12.79 - lr: 0.010000\n",
+      "2021-09-21 21:06:49,156 epoch 10 - iter 20/24 - loss 0.63710471 - samples/sec: 9.89 - lr: 0.010000\n",
+      "2021-09-21 21:06:49,348 epoch 10 - iter 22/24 - loss 0.63705131 - samples/sec: 10.43 - lr: 0.010000\n",
+      "2021-09-21 21:06:49,552 epoch 10 - iter 24/24 - loss 0.63673210 - samples/sec: 9.87 - lr: 0.010000\n",
+      "2021-09-21 21:06:49,554 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:49,554 EPOCH 10 done: loss 0.6367 - lr 0.0100000\n",
+      "2021-09-21 21:06:49,706 DEV : loss 0.6365911364555359 - score 0.3333\n",
+      "2021-09-21 21:06:49,710 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:07:09,095 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:09,096 Testing using best model ...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:07:09,098 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:36:24,240 \t0.0\n",
-      "2021-09-08 01:36:24,241 \n",
+      "2021-09-21 21:07:21,394 \t0.6667\n",
+      "2021-09-21 21:07:21,395 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.6667\n",
+      "- F-score (macro) 0.5556\n",
+      "- Accuracy 0.6667\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "    positive     0.0000    0.0000    0.0000         0\n",
-      "     neutral     0.0000    0.0000    0.0000         0\n",
-      "    negative     0.0000    0.0000    0.0000         3\n",
+      "    positive     1.0000    1.0000    1.0000         1\n",
+      "     neutral     0.5000    1.0000    0.6667         1\n",
+      "    negative     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "   micro avg     0.0000    0.0000    0.0000         3\n",
-      "   macro avg     0.0000    0.0000    0.0000         3\n",
-      "weighted avg     0.0000    0.0000    0.0000         3\n",
-      " samples avg     0.0000    0.0000    0.0000         3\n",
-      "\n"
+      "   micro avg     0.6667    0.6667    0.6667         3\n",
+      "   macro avg     0.5000    0.6667    0.5556         3\n",
+      "weighted avg     0.5000    0.6667    0.5556         3\n",
+      " samples avg     0.6667    0.6667    0.6667         3\n",
+      "\n",
+      "2021-09-21 21:07:21,396 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:41,644 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:07:46,790 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 27/27 [00:00<00:00, 22496.27it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:24,241 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:39,249 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:36:43,612 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:07:46,793 [b'positive', b'neutral', b'negative']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 37142.08it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:43,615 [b'positive', b'neutral', b'negative']\n",
-      "2021-09-08 01:36:43,765 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:43,767 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:07:47,661 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:47,663 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2117,266 +2130,253 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:43,768 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:43,768 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:36:43,768 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:43,769 Parameters:\n",
-      "2021-09-08 01:36:43,769  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:36:43,769  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:36:43,770  - patience: \"3\"\n",
-      "2021-09-08 01:36:43,770  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:36:43,770  - max_epochs: \"10\"\n",
-      "2021-09-08 01:36:43,770  - shuffle: \"True\"\n",
-      "2021-09-08 01:36:43,771  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:36:43,771  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:36:43,771 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:43,772 Model training base path: \"None1\"\n",
-      "2021-09-08 01:36:43,772 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:43,772 Device: cuda:1\n",
-      "2021-09-08 01:36:43,772 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:43,773 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:36:44,017 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:44,113 epoch 1 - iter 2/24 - loss 0.65604362 - samples/sec: 24.35 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,202 epoch 1 - iter 4/24 - loss 0.65146431 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,291 epoch 1 - iter 6/24 - loss 0.64339660 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,380 epoch 1 - iter 8/24 - loss 0.64492343 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,469 epoch 1 - iter 10/24 - loss 0.64511735 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,557 epoch 1 - iter 12/24 - loss 0.64635860 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,646 epoch 1 - iter 14/24 - loss 0.64579637 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,734 epoch 1 - iter 16/24 - loss 0.64449340 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,822 epoch 1 - iter 18/24 - loss 0.64379197 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,911 epoch 1 - iter 20/24 - loss 0.64335288 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:36:44,999 epoch 1 - iter 22/24 - loss 0.64190485 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:36:45,095 epoch 1 - iter 24/24 - loss 0.64055458 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 01:36:45,096 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:45,096 EPOCH 1 done: loss 0.6406 - lr 0.0200000\n",
-      "2021-09-08 01:36:45,698 DEV : loss 0.636623740196228 - score 0.6667\n",
-      "2021-09-08 01:36:45,699 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:07:47,663 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:47,664 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:07:47,664 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:47,664 Parameters:\n",
+      "2021-09-21 21:07:47,665  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:07:47,665  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:07:47,665  - patience: \"3\"\n",
+      "2021-09-21 21:07:47,665  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:07:47,666  - max_epochs: \"10\"\n",
+      "2021-09-21 21:07:47,666  - shuffle: \"True\"\n",
+      "2021-09-21 21:07:47,666  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:07:47,667  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:07:47,667 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:47,667 Model training base path: \"None1\"\n",
+      "2021-09-21 21:07:47,667 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:47,668 Device: cuda:0\n",
+      "2021-09-21 21:07:47,668 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:47,668 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:07:47,680 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:47,793 epoch 1 - iter 2/24 - loss 0.63549191 - samples/sec: 20.31 - lr: 0.020000\n",
+      "2021-09-21 21:07:47,899 epoch 1 - iter 4/24 - loss 0.63359614 - samples/sec: 19.10 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,007 epoch 1 - iter 6/24 - loss 0.63675740 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,128 epoch 1 - iter 8/24 - loss 0.63491710 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,294 epoch 1 - iter 10/24 - loss 0.63856445 - samples/sec: 12.06 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,463 epoch 1 - iter 12/24 - loss 0.63565160 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,601 epoch 1 - iter 14/24 - loss 0.63525577 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,729 epoch 1 - iter 16/24 - loss 0.63505663 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 21:07:48,887 epoch 1 - iter 18/24 - loss 0.63472667 - samples/sec: 12.73 - lr: 0.020000\n",
+      "2021-09-21 21:07:49,029 epoch 1 - iter 20/24 - loss 0.63606160 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 21:07:49,157 epoch 1 - iter 22/24 - loss 0.63631512 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 21:07:49,292 epoch 1 - iter 24/24 - loss 0.63735981 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 21:07:49,293 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:49,293 EPOCH 1 done: loss 0.6374 - lr 0.0200000\n",
+      "2021-09-21 21:07:49,408 DEV : loss 0.6366848349571228 - score 0.3333\n",
+      "2021-09-21 21:07:49,408 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:36:53,769 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:53,905 epoch 2 - iter 2/24 - loss 0.63973841 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,017 epoch 2 - iter 4/24 - loss 0.63945720 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,122 epoch 2 - iter 6/24 - loss 0.63641250 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,227 epoch 2 - iter 8/24 - loss 0.63961930 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,333 epoch 2 - iter 10/24 - loss 0.63913246 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,441 epoch 2 - iter 12/24 - loss 0.63884366 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,547 epoch 2 - iter 14/24 - loss 0.63880787 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,653 epoch 2 - iter 16/24 - loss 0.63790257 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,761 epoch 2 - iter 18/24 - loss 0.63816914 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,870 epoch 2 - iter 20/24 - loss 0.63661283 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 01:36:54,980 epoch 2 - iter 22/24 - loss 0.63759835 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:36:55,087 epoch 2 - iter 24/24 - loss 0.63694204 - samples/sec: 18.82 - lr: 0.020000\n",
-      "2021-09-08 01:36:55,088 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:55,088 EPOCH 2 done: loss 0.6369 - lr 0.0200000\n",
-      "2021-09-08 01:36:55,481 DEV : loss 0.6365294456481934 - score 1.0\n",
-      "2021-09-08 01:36:55,481 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:07:53,744 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:53,940 epoch 2 - iter 2/24 - loss 0.62866682 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,094 epoch 2 - iter 4/24 - loss 0.63802831 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,243 epoch 2 - iter 6/24 - loss 0.63340338 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,399 epoch 2 - iter 8/24 - loss 0.63325693 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,551 epoch 2 - iter 10/24 - loss 0.63282951 - samples/sec: 13.24 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,705 epoch 2 - iter 12/24 - loss 0.63471644 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,854 epoch 2 - iter 14/24 - loss 0.63742433 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 21:07:54,981 epoch 2 - iter 16/24 - loss 0.63675472 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 21:07:55,106 epoch 2 - iter 18/24 - loss 0.63704564 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 21:07:55,246 epoch 2 - iter 20/24 - loss 0.63751992 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 21:07:55,378 epoch 2 - iter 22/24 - loss 0.63718841 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 21:07:55,495 epoch 2 - iter 24/24 - loss 0.63875880 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:07:55,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:55,496 EPOCH 2 done: loss 0.6388 - lr 0.0200000\n",
+      "2021-09-21 21:07:55,601 DEV : loss 0.6365146040916443 - score 0.6667\n",
+      "2021-09-21 21:07:55,601 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:37:02,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:02,332 epoch 3 - iter 2/24 - loss 0.64447975 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 01:37:02,441 epoch 3 - iter 4/24 - loss 0.63635428 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:37:02,546 epoch 3 - iter 6/24 - loss 0.63910123 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:37:02,653 epoch 3 - iter 8/24 - loss 0.64085244 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 01:37:02,761 epoch 3 - iter 10/24 - loss 0.63981737 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 01:37:02,869 epoch 3 - iter 12/24 - loss 0.63968040 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 01:37:02,977 epoch 3 - iter 14/24 - loss 0.64002753 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 01:37:03,086 epoch 3 - iter 16/24 - loss 0.63950819 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 01:37:03,193 epoch 3 - iter 18/24 - loss 0.63915947 - samples/sec: 18.92 - lr: 0.020000\n",
-      "2021-09-08 01:37:03,298 epoch 3 - iter 20/24 - loss 0.63868121 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:37:03,402 epoch 3 - iter 22/24 - loss 0.63930202 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 01:37:03,510 epoch 3 - iter 24/24 - loss 0.63964386 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:37:03,511 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:03,511 EPOCH 3 done: loss 0.6396 - lr 0.0200000\n",
-      "2021-09-08 01:37:04,800 DEV : loss 0.6374433040618896 - score 0.6667\n",
-      "2021-09-08 01:37:04,801 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:37:04,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:04,944 epoch 4 - iter 2/24 - loss 0.64830846 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,058 epoch 4 - iter 4/24 - loss 0.63912490 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,180 epoch 4 - iter 6/24 - loss 0.63651255 - samples/sec: 16.52 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,289 epoch 4 - iter 8/24 - loss 0.63530295 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,398 epoch 4 - iter 10/24 - loss 0.63549631 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,507 epoch 4 - iter 12/24 - loss 0.63758868 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,618 epoch 4 - iter 14/24 - loss 0.63838028 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,727 epoch 4 - iter 16/24 - loss 0.63738187 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,839 epoch 4 - iter 18/24 - loss 0.63790728 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 01:37:05,957 epoch 4 - iter 20/24 - loss 0.63667341 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,076 epoch 4 - iter 22/24 - loss 0.63614676 - samples/sec: 16.81 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,198 epoch 4 - iter 24/24 - loss 0.63673471 - samples/sec: 16.45 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,199 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:06,200 EPOCH 4 done: loss 0.6367 - lr 0.0200000\n",
-      "2021-09-08 01:37:06,272 DEV : loss 0.637225866317749 - score 0.3333\n",
-      "2021-09-08 01:37:06,272 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:37:06,276 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:06,411 epoch 5 - iter 2/24 - loss 0.65691733 - samples/sec: 17.17 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,520 epoch 5 - iter 4/24 - loss 0.64453344 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,630 epoch 5 - iter 6/24 - loss 0.64452319 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,739 epoch 5 - iter 8/24 - loss 0.64270251 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,865 epoch 5 - iter 10/24 - loss 0.64089840 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 01:37:06,983 epoch 5 - iter 12/24 - loss 0.64245905 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,096 epoch 5 - iter 14/24 - loss 0.64258093 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,222 epoch 5 - iter 16/24 - loss 0.64194452 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,335 epoch 5 - iter 18/24 - loss 0.64280292 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,460 epoch 5 - iter 20/24 - loss 0.64163064 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,581 epoch 5 - iter 22/24 - loss 0.64058246 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,698 epoch 5 - iter 24/24 - loss 0.64041436 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:37:07,699 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:07,700 EPOCH 5 done: loss 0.6404 - lr 0.0200000\n",
-      "2021-09-08 01:37:07,764 DEV : loss 0.636614203453064 - score 0.6667\n",
-      "2021-09-08 01:37:07,765 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:37:07,767 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:08:01,477 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:01,617 epoch 3 - iter 2/24 - loss 0.64204624 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:08:01,730 epoch 3 - iter 4/24 - loss 0.63950880 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 21:08:01,849 epoch 3 - iter 6/24 - loss 0.63668325 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 21:08:01,965 epoch 3 - iter 8/24 - loss 0.63602467 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,073 epoch 3 - iter 10/24 - loss 0.63851542 - samples/sec: 18.63 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,193 epoch 3 - iter 12/24 - loss 0.63866925 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,319 epoch 3 - iter 14/24 - loss 0.63968402 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,432 epoch 3 - iter 16/24 - loss 0.63868076 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,541 epoch 3 - iter 18/24 - loss 0.63876519 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,652 epoch 3 - iter 20/24 - loss 0.63893094 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,781 epoch 3 - iter 22/24 - loss 0.63919635 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,882 epoch 3 - iter 24/24 - loss 0.63929730 - samples/sec: 19.83 - lr: 0.020000\n",
+      "2021-09-21 21:08:02,883 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:02,884 EPOCH 3 done: loss 0.6393 - lr 0.0200000\n",
+      "2021-09-21 21:08:04,022 DEV : loss 0.63673996925354 - score 0.3333\n",
+      "2021-09-21 21:08:04,023 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:08:04,088 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:04,243 epoch 4 - iter 2/24 - loss 0.61843190 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 21:08:04,376 epoch 4 - iter 4/24 - loss 0.62199745 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:08:04,534 epoch 4 - iter 6/24 - loss 0.62609779 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 21:08:04,665 epoch 4 - iter 8/24 - loss 0.63279270 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 21:08:04,809 epoch 4 - iter 10/24 - loss 0.63691300 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 21:08:04,970 epoch 4 - iter 12/24 - loss 0.63757523 - samples/sec: 12.49 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,126 epoch 4 - iter 14/24 - loss 0.63515065 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,292 epoch 4 - iter 16/24 - loss 0.63730877 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,455 epoch 4 - iter 18/24 - loss 0.63701174 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,604 epoch 4 - iter 20/24 - loss 0.63518544 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,776 epoch 4 - iter 22/24 - loss 0.63576899 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,941 epoch 4 - iter 24/24 - loss 0.63734161 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:08:05,942 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:05,942 EPOCH 4 done: loss 0.6373 - lr 0.0200000\n",
+      "2021-09-21 21:08:06,069 DEV : loss 0.6365463733673096 - score 0.0\n",
+      "2021-09-21 21:08:06,069 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:08:06,071 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:06,286 epoch 5 - iter 2/24 - loss 0.63517442 - samples/sec: 11.81 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,465 epoch 5 - iter 4/24 - loss 0.63815497 - samples/sec: 11.24 - lr: 0.020000\n",
+      "2021-09-21 21:08:06,673 epoch 5 - iter 6/24 - loss 0.64054277 - samples/sec: 9.62 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:07,905 epoch 6 - iter 2/24 - loss 0.62683198 - samples/sec: 17.05 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,024 epoch 6 - iter 4/24 - loss 0.62786032 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,146 epoch 6 - iter 6/24 - loss 0.62690178 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,259 epoch 6 - iter 8/24 - loss 0.62810379 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,373 epoch 6 - iter 10/24 - loss 0.62564275 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,491 epoch 6 - iter 12/24 - loss 0.62584326 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,608 epoch 6 - iter 14/24 - loss 0.62738560 - samples/sec: 17.16 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,724 epoch 6 - iter 16/24 - loss 0.62983794 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,833 epoch 6 - iter 18/24 - loss 0.63016089 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 01:37:08,921 epoch 6 - iter 20/24 - loss 0.63167831 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:37:09,009 epoch 6 - iter 22/24 - loss 0.63247184 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:37:09,098 epoch 6 - iter 24/24 - loss 0.63288429 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:37:09,099 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:09,099 EPOCH 6 done: loss 0.6329 - lr 0.0200000\n",
-      "2021-09-08 01:37:09,148 DEV : loss 0.6365156173706055 - score 0.6667\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:37:09,149 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:37:09,150 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:09,253 epoch 7 - iter 2/24 - loss 0.62163284 - samples/sec: 22.08 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,342 epoch 7 - iter 4/24 - loss 0.62701333 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,430 epoch 7 - iter 6/24 - loss 0.62734156 - samples/sec: 22.88 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,518 epoch 7 - iter 8/24 - loss 0.62766325 - samples/sec: 22.84 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,606 epoch 7 - iter 10/24 - loss 0.62849877 - samples/sec: 22.88 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,694 epoch 7 - iter 12/24 - loss 0.62983898 - samples/sec: 22.65 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,784 epoch 7 - iter 14/24 - loss 0.63001962 - samples/sec: 22.44 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,873 epoch 7 - iter 16/24 - loss 0.63073868 - samples/sec: 22.57 - lr: 0.010000\n",
-      "2021-09-08 01:37:09,962 epoch 7 - iter 18/24 - loss 0.63232572 - samples/sec: 22.73 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,051 epoch 7 - iter 20/24 - loss 0.63339405 - samples/sec: 22.61 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,144 epoch 7 - iter 22/24 - loss 0.63278324 - samples/sec: 21.69 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,232 epoch 7 - iter 24/24 - loss 0.63341031 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,233 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:10,233 EPOCH 7 done: loss 0.6334 - lr 0.0100000\n",
-      "2021-09-08 01:37:10,384 DEV : loss 0.6365147829055786 - score 0.3333\n",
-      "2021-09-08 01:37:10,384 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:37:10,461 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:10,561 epoch 8 - iter 2/24 - loss 0.63983920 - samples/sec: 22.88 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,649 epoch 8 - iter 4/24 - loss 0.63731840 - samples/sec: 22.79 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,738 epoch 8 - iter 6/24 - loss 0.63580308 - samples/sec: 22.48 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,828 epoch 8 - iter 8/24 - loss 0.63265171 - samples/sec: 22.31 - lr: 0.010000\n",
-      "2021-09-08 01:37:10,916 epoch 8 - iter 10/24 - loss 0.63172765 - samples/sec: 22.92 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,004 epoch 8 - iter 12/24 - loss 0.63403727 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,095 epoch 8 - iter 14/24 - loss 0.63482392 - samples/sec: 22.14 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,187 epoch 8 - iter 16/24 - loss 0.63414619 - samples/sec: 22.00 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,275 epoch 8 - iter 18/24 - loss 0.63344529 - samples/sec: 22.77 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,363 epoch 8 - iter 20/24 - loss 0.63239258 - samples/sec: 22.84 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,451 epoch 8 - iter 22/24 - loss 0.63305966 - samples/sec: 22.81 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,540 epoch 8 - iter 24/24 - loss 0.63249338 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,541 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:11,541 EPOCH 8 done: loss 0.6325 - lr 0.0100000\n",
-      "2021-09-08 01:37:11,694 DEV : loss 0.6366555690765381 - score 0.3333\n",
-      "2021-09-08 01:37:11,695 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:37:11,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:11,878 epoch 9 - iter 2/24 - loss 0.64968351 - samples/sec: 22.72 - lr: 0.010000\n",
-      "2021-09-08 01:37:11,966 epoch 9 - iter 4/24 - loss 0.64152728 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,056 epoch 9 - iter 6/24 - loss 0.63912538 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,144 epoch 9 - iter 8/24 - loss 0.63548259 - samples/sec: 22.71 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,235 epoch 9 - iter 10/24 - loss 0.63409025 - samples/sec: 22.09 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,324 epoch 9 - iter 12/24 - loss 0.63528063 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,412 epoch 9 - iter 14/24 - loss 0.63422581 - samples/sec: 22.84 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,500 epoch 9 - iter 16/24 - loss 0.63262358 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,588 epoch 9 - iter 18/24 - loss 0.63320631 - samples/sec: 22.88 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,676 epoch 9 - iter 20/24 - loss 0.63285405 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,763 epoch 9 - iter 22/24 - loss 0.63436544 - samples/sec: 22.97 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,851 epoch 9 - iter 24/24 - loss 0.63455561 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 01:37:12,852 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:12,852 EPOCH 9 done: loss 0.6346 - lr 0.0100000\n",
-      "2021-09-08 01:37:12,903 DEV : loss 0.6365675330162048 - score 0.6667\n",
-      "2021-09-08 01:37:12,903 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:37:12,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:13,026 epoch 10 - iter 2/24 - loss 0.64265347 - samples/sec: 18.68 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,126 epoch 10 - iter 4/24 - loss 0.64585996 - samples/sec: 20.10 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,230 epoch 10 - iter 6/24 - loss 0.64001102 - samples/sec: 19.23 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,349 epoch 10 - iter 8/24 - loss 0.64239956 - samples/sec: 16.99 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,455 epoch 10 - iter 10/24 - loss 0.64302420 - samples/sec: 18.91 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,562 epoch 10 - iter 12/24 - loss 0.64119124 - samples/sec: 18.76 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,669 epoch 10 - iter 14/24 - loss 0.64064834 - samples/sec: 18.68 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,778 epoch 10 - iter 16/24 - loss 0.64054632 - samples/sec: 18.52 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,887 epoch 10 - iter 18/24 - loss 0.63935311 - samples/sec: 18.35 - lr: 0.010000\n",
-      "2021-09-08 01:37:13,994 epoch 10 - iter 20/24 - loss 0.64015092 - samples/sec: 18.89 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,104 epoch 10 - iter 22/24 - loss 0.64071503 - samples/sec: 18.24 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,208 epoch 10 - iter 24/24 - loss 0.64026772 - samples/sec: 19.22 - lr: 0.010000\n",
-      "2021-09-08 01:37:14,209 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:14,210 EPOCH 10 done: loss 0.6403 - lr 0.0100000\n",
-      "2021-09-08 01:37:14,265 DEV : loss 0.6365260481834412 - score 0.6667\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:37:14,265 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:37:19,676 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:08:06,849 epoch 5 - iter 8/24 - loss 0.64027829 - samples/sec: 11.45 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,032 epoch 5 - iter 10/24 - loss 0.64348131 - samples/sec: 10.95 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,186 epoch 5 - iter 12/24 - loss 0.64359029 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,376 epoch 5 - iter 14/24 - loss 0.64227655 - samples/sec: 10.56 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,567 epoch 5 - iter 16/24 - loss 0.64257239 - samples/sec: 10.52 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,739 epoch 5 - iter 18/24 - loss 0.64633201 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 21:08:07,932 epoch 5 - iter 20/24 - loss 0.64642430 - samples/sec: 10.38 - lr: 0.020000\n",
+      "2021-09-21 21:08:08,082 epoch 5 - iter 22/24 - loss 0.64613781 - samples/sec: 13.43 - lr: 0.020000\n",
+      "2021-09-21 21:08:08,217 epoch 5 - iter 24/24 - loss 0.64462537 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 21:08:08,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:08,219 EPOCH 5 done: loss 0.6446 - lr 0.0200000\n",
+      "2021-09-21 21:08:08,301 DEV : loss 0.6365445852279663 - score 1.0\n",
+      "2021-09-21 21:08:08,302 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:08:17,028 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:17,216 epoch 6 - iter 2/24 - loss 0.63389212 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 21:08:17,373 epoch 6 - iter 4/24 - loss 0.63441657 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 21:08:17,551 epoch 6 - iter 6/24 - loss 0.63682757 - samples/sec: 11.33 - lr: 0.020000\n",
+      "2021-09-21 21:08:17,686 epoch 6 - iter 8/24 - loss 0.63696944 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 21:08:17,838 epoch 6 - iter 10/24 - loss 0.63662562 - samples/sec: 13.24 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,007 epoch 6 - iter 12/24 - loss 0.63624046 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,130 epoch 6 - iter 14/24 - loss 0.63725605 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,275 epoch 6 - iter 16/24 - loss 0.63702860 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,420 epoch 6 - iter 18/24 - loss 0.63631826 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,543 epoch 6 - iter 20/24 - loss 0.63664506 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,693 epoch 6 - iter 22/24 - loss 0.63731296 - samples/sec: 13.42 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,817 epoch 6 - iter 24/24 - loss 0.63702488 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:08:18,818 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:18,818 EPOCH 6 done: loss 0.6370 - lr 0.0200000\n",
+      "2021-09-21 21:08:19,015 DEV : loss 0.636569619178772 - score 0.3333\n",
+      "2021-09-21 21:08:19,016 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:08:19,111 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:19,267 epoch 7 - iter 2/24 - loss 0.62383801 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 21:08:19,413 epoch 7 - iter 4/24 - loss 0.62522751 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:08:19,570 epoch 7 - iter 6/24 - loss 0.63042548 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 21:08:19,737 epoch 7 - iter 8/24 - loss 0.63374103 - samples/sec: 12.01 - lr: 0.020000\n",
+      "2021-09-21 21:08:19,880 epoch 7 - iter 10/24 - loss 0.63515449 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,022 epoch 7 - iter 12/24 - loss 0.63601104 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,169 epoch 7 - iter 14/24 - loss 0.63545120 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,300 epoch 7 - iter 16/24 - loss 0.63596298 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,442 epoch 7 - iter 18/24 - loss 0.63719718 - samples/sec: 14.13 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,592 epoch 7 - iter 20/24 - loss 0.63676120 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,724 epoch 7 - iter 22/24 - loss 0.63590114 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,865 epoch 7 - iter 24/24 - loss 0.63675779 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 21:08:20,866 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:20,867 EPOCH 7 done: loss 0.6368 - lr 0.0200000\n",
+      "2021-09-21 21:08:21,745 DEV : loss 0.6366881132125854 - score 0.6667\n",
+      "2021-09-21 21:08:21,746 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:08:21,754 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:21,939 epoch 8 - iter 2/24 - loss 0.62985554 - samples/sec: 12.16 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,104 epoch 8 - iter 4/24 - loss 0.63091566 - samples/sec: 12.17 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,247 epoch 8 - iter 6/24 - loss 0.63239131 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,401 epoch 8 - iter 8/24 - loss 0.63396396 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,536 epoch 8 - iter 10/24 - loss 0.63879665 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,676 epoch 8 - iter 12/24 - loss 0.63886671 - samples/sec: 14.31 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,818 epoch 8 - iter 14/24 - loss 0.63904183 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 21:08:22,959 epoch 8 - iter 16/24 - loss 0.63941149 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,082 epoch 8 - iter 18/24 - loss 0.63922285 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,224 epoch 8 - iter 20/24 - loss 0.63894715 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,361 epoch 8 - iter 22/24 - loss 0.63970517 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,509 epoch 8 - iter 24/24 - loss 0.63937081 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 21:08:23,510 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:23,510 EPOCH 8 done: loss 0.6394 - lr 0.0200000\n",
+      "2021-09-21 21:08:23,711 DEV : loss 0.6365808248519897 - score 0.6667\n",
+      "2021-09-21 21:08:23,712 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:08:23,786 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:23,953 epoch 9 - iter 2/24 - loss 0.61979982 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,101 epoch 9 - iter 4/24 - loss 0.62886985 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,233 epoch 9 - iter 6/24 - loss 0.63337613 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,338 epoch 9 - iter 8/24 - loss 0.63404194 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,459 epoch 9 - iter 10/24 - loss 0.63496085 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,604 epoch 9 - iter 12/24 - loss 0.63587096 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,755 epoch 9 - iter 14/24 - loss 0.63707065 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 21:08:24,896 epoch 9 - iter 16/24 - loss 0.63831207 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,035 epoch 9 - iter 18/24 - loss 0.64129595 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,174 epoch 9 - iter 20/24 - loss 0.64118766 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,312 epoch 9 - iter 22/24 - loss 0.64112009 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,470 epoch 9 - iter 24/24 - loss 0.64177865 - samples/sec: 12.66 - lr: 0.020000\n",
+      "2021-09-21 21:08:25,471 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:25,472 EPOCH 9 done: loss 0.6418 - lr 0.0200000\n",
+      "2021-09-21 21:08:25,593 DEV : loss 0.6367859244346619 - score 0.6667\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:08:25,594 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:08:26,421 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:26,553 epoch 10 - iter 2/24 - loss 0.62302044 - samples/sec: 17.34 - lr: 0.010000\n",
+      "2021-09-21 21:08:26,649 epoch 10 - iter 4/24 - loss 0.62954307 - samples/sec: 21.04 - lr: 0.010000\n",
+      "2021-09-21 21:08:26,765 epoch 10 - iter 6/24 - loss 0.63200818 - samples/sec: 17.39 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:19,677 Testing using best model ...\n",
-      "2021-09-08 01:37:19,708 loading file None1/best-model.pt\n",
+      "2021-09-21 21:08:26,905 epoch 10 - iter 8/24 - loss 0.63510600 - samples/sec: 14.29 - lr: 0.010000\n",
+      "2021-09-21 21:08:27,036 epoch 10 - iter 10/24 - loss 0.63415999 - samples/sec: 15.41 - lr: 0.010000\n",
+      "2021-09-21 21:08:27,186 epoch 10 - iter 12/24 - loss 0.63431285 - samples/sec: 13.38 - lr: 0.010000\n",
+      "2021-09-21 21:08:27,309 epoch 10 - iter 14/24 - loss 0.63326628 - samples/sec: 16.41 - lr: 0.010000\n",
+      "2021-09-21 21:08:27,439 epoch 10 - iter 16/24 - loss 0.63421437 - samples/sec: 15.52 - lr: 0.010000\n",
+      "2021-09-21 21:08:27,574 epoch 10 - iter 18/24 - loss 0.63310492 - samples/sec: 14.82 - lr: 0.010000\n",
+      "2021-09-21 21:08:27,718 epoch 10 - iter 20/24 - loss 0.63376463 - samples/sec: 13.93 - lr: 0.010000\n",
+      "2021-09-21 21:08:27,860 epoch 10 - iter 22/24 - loss 0.63427389 - samples/sec: 14.18 - lr: 0.010000\n",
+      "2021-09-21 21:08:28,028 epoch 10 - iter 24/24 - loss 0.63478242 - samples/sec: 11.94 - lr: 0.010000\n",
+      "2021-09-21 21:08:28,029 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:28,030 EPOCH 10 done: loss 0.6348 - lr 0.0100000\n",
+      "2021-09-21 21:08:28,138 DEV : loss 0.6365283727645874 - score 0.6667\n",
+      "2021-09-21 21:08:28,138 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:08:34,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:34,202 Testing using best model ...\n",
+      "2021-09-21 21:08:34,204 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:37:28,926 \t0.0\n",
-      "2021-09-08 01:37:28,927 \n",
+      "2021-09-21 21:08:39,177 \t0.3333\n",
+      "2021-09-21 21:08:39,178 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.3333\n",
+      "- F-score (macro) 0.1667\n",
+      "- Accuracy 0.3333\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
       "    positive     0.0000    0.0000    0.0000         0\n",
-      "     neutral     0.0000    0.0000    0.0000         2\n",
-      "    negative     0.0000    0.0000    0.0000         1\n",
+      "     neutral     0.3333    1.0000    0.5000         1\n",
+      "    negative     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "   micro avg     0.0000    0.0000    0.0000         3\n",
-      "   macro avg     0.0000    0.0000    0.0000         3\n",
-      "weighted avg     0.0000    0.0000    0.0000         3\n",
-      " samples avg     0.0000    0.0000    0.0000         3\n",
+      "   micro avg     0.3333    0.3333    0.3333         3\n",
+      "   macro avg     0.1111    0.3333    0.1667         3\n",
+      "weighted avg     0.1111    0.3333    0.1667         3\n",
+      " samples avg     0.3333    0.3333    0.3333         3\n",
       "\n",
-      "2021-09-08 01:37:28,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:44,131 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:08:39,178 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:59,048 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:37:48,127 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:09:03,306 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 39903.53it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 30823.68it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:48,130 [b'positive', b'neutral', b'negative']\n",
-      "2021-09-08 01:37:48,260 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,262 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:09:03,309 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 21:09:03,321 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:03,322 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2689,24 +2689,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:48,263 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,263 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:37:48,263 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,264 Parameters:\n",
-      "2021-09-08 01:37:48,264  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:37:48,265  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:37:48,265  - patience: \"3\"\n",
-      "2021-09-08 01:37:48,265  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:37:48,266  - max_epochs: \"10\"\n",
-      "2021-09-08 01:37:48,266  - shuffle: \"True\"\n",
-      "2021-09-08 01:37:48,266  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:37:48,267  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:37:48,267 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,267 Model training base path: \"None1\"\n",
-      "2021-09-08 01:37:48,268 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,268 Device: cuda:1\n",
-      "2021-09-08 01:37:48,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,269 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:09:03,323 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:03,323 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:09:03,324 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:03,324 Parameters:\n",
+      "2021-09-21 21:09:03,324  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:09:03,325  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:09:03,325  - patience: \"3\"\n",
+      "2021-09-21 21:09:03,325  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:09:03,325  - max_epochs: \"10\"\n",
+      "2021-09-21 21:09:03,326  - shuffle: \"True\"\n",
+      "2021-09-21 21:09:03,326  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:09:03,326  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:09:03,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:03,327 Model training base path: \"None1\"\n",
+      "2021-09-21 21:09:03,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:03,327 Device: cuda:0\n",
+      "2021-09-21 21:09:03,328 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:03,328 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:09:03,334 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:03,489 epoch 1 - iter 2/24 - loss 0.63387552 - samples/sec: 17.67 - lr: 0.020000\n"
      ]
     },
     {
@@ -2720,218 +2722,215 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:48,470 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:48,574 epoch 1 - iter 2/24 - loss 0.63042396 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 01:37:48,674 epoch 1 - iter 4/24 - loss 0.63421266 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:37:48,775 epoch 1 - iter 6/24 - loss 0.63987004 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 01:37:48,873 epoch 1 - iter 8/24 - loss 0.64210436 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 01:37:48,968 epoch 1 - iter 10/24 - loss 0.64009595 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,063 epoch 1 - iter 12/24 - loss 0.64135134 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,158 epoch 1 - iter 14/24 - loss 0.64028844 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,254 epoch 1 - iter 16/24 - loss 0.63779757 - samples/sec: 21.00 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,350 epoch 1 - iter 18/24 - loss 0.63975569 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,446 epoch 1 - iter 20/24 - loss 0.63938325 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,547 epoch 1 - iter 22/24 - loss 0.63854064 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,641 epoch 1 - iter 24/24 - loss 0.63802136 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 01:37:49,643 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:49,643 EPOCH 1 done: loss 0.6380 - lr 0.0200000\n",
-      "2021-09-08 01:37:49,787 DEV : loss 0.6365383863449097 - score 0.0\n",
-      "2021-09-08 01:37:49,788 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:09:03,612 epoch 1 - iter 4/24 - loss 0.64105959 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 21:09:03,746 epoch 1 - iter 6/24 - loss 0.63553402 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:09:03,878 epoch 1 - iter 8/24 - loss 0.63648983 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,012 epoch 1 - iter 10/24 - loss 0.63535922 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,126 epoch 1 - iter 12/24 - loss 0.63252204 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,256 epoch 1 - iter 14/24 - loss 0.63121200 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,370 epoch 1 - iter 16/24 - loss 0.63274994 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,467 epoch 1 - iter 18/24 - loss 0.63355920 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,563 epoch 1 - iter 20/24 - loss 0.63210210 - samples/sec: 20.96 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,676 epoch 1 - iter 22/24 - loss 0.63328202 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,775 epoch 1 - iter 24/24 - loss 0.63407952 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,776 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:04,776 EPOCH 1 done: loss 0.6341 - lr 0.0200000\n",
+      "2021-09-21 21:09:04,865 DEV : loss 0.6365486979484558 - score 0.3333\n",
+      "2021-09-21 21:09:04,866 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:37:57,430 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:57,541 epoch 2 - iter 2/24 - loss 0.63636914 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 01:37:57,639 epoch 2 - iter 4/24 - loss 0.63226268 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 01:37:57,729 epoch 2 - iter 6/24 - loss 0.63061903 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 01:37:57,817 epoch 2 - iter 8/24 - loss 0.63201299 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 01:37:57,907 epoch 2 - iter 10/24 - loss 0.63053273 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,002 epoch 2 - iter 12/24 - loss 0.63404385 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,093 epoch 2 - iter 14/24 - loss 0.63432452 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,182 epoch 2 - iter 16/24 - loss 0.63292312 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,270 epoch 2 - iter 18/24 - loss 0.63217129 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,358 epoch 2 - iter 20/24 - loss 0.63201338 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,451 epoch 2 - iter 22/24 - loss 0.63234569 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,544 epoch 2 - iter 24/24 - loss 0.63371975 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,545 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:58,546 EPOCH 2 done: loss 0.6337 - lr 0.0200000\n",
-      "2021-09-08 01:37:59,070 DEV : loss 0.6366221904754639 - score 0.3333\n",
-      "2021-09-08 01:37:59,071 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:09:11,932 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:12,095 epoch 2 - iter 2/24 - loss 0.62791121 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,211 epoch 2 - iter 4/24 - loss 0.62829715 - samples/sec: 17.40 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,338 epoch 2 - iter 6/24 - loss 0.63204771 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,464 epoch 2 - iter 8/24 - loss 0.63891946 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,579 epoch 2 - iter 10/24 - loss 0.63853946 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,697 epoch 2 - iter 12/24 - loss 0.63800820 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,834 epoch 2 - iter 14/24 - loss 0.63715405 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,974 epoch 2 - iter 16/24 - loss 0.63732313 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,099 epoch 2 - iter 18/24 - loss 0.64052794 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,230 epoch 2 - iter 20/24 - loss 0.63937361 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,370 epoch 2 - iter 22/24 - loss 0.63909856 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,503 epoch 2 - iter 24/24 - loss 0.63776743 - samples/sec: 15.02 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,504 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:13,505 EPOCH 2 done: loss 0.6378 - lr 0.0200000\n",
+      "2021-09-21 21:09:13,788 DEV : loss 0.6367404460906982 - score 0.3333\n",
+      "2021-09-21 21:09:13,791 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:13,793 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:13,963 epoch 3 - iter 2/24 - loss 0.64153793 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,084 epoch 3 - iter 4/24 - loss 0.63351779 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,254 epoch 3 - iter 6/24 - loss 0.63356566 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,433 epoch 3 - iter 8/24 - loss 0.63874332 - samples/sec: 11.20 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,599 epoch 3 - iter 10/24 - loss 0.64139057 - samples/sec: 12.12 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,745 epoch 3 - iter 12/24 - loss 0.63985723 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 21:09:14,918 epoch 3 - iter 14/24 - loss 0.63827746 - samples/sec: 11.62 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,079 epoch 3 - iter 16/24 - loss 0.63802743 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,249 epoch 3 - iter 18/24 - loss 0.63796159 - samples/sec: 11.81 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,411 epoch 3 - iter 20/24 - loss 0.63687977 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,582 epoch 3 - iter 22/24 - loss 0.63652735 - samples/sec: 11.75 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,755 epoch 3 - iter 24/24 - loss 0.63658400 - samples/sec: 11.58 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,756 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:15,757 EPOCH 3 done: loss 0.6366 - lr 0.0200000\n",
+      "2021-09-21 21:09:15,874 DEV : loss 0.6366502046585083 - score 0.3333\n",
+      "2021-09-21 21:09:15,876 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:09:15,878 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:16,073 epoch 4 - iter 2/24 - loss 0.63343456 - samples/sec: 12.57 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,229 epoch 4 - iter 4/24 - loss 0.62771089 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,409 epoch 4 - iter 6/24 - loss 0.62778780 - samples/sec: 11.15 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,572 epoch 4 - iter 8/24 - loss 0.63058909 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,719 epoch 4 - iter 10/24 - loss 0.63181599 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,883 epoch 4 - iter 12/24 - loss 0.63243116 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,057 epoch 4 - iter 14/24 - loss 0.63398330 - samples/sec: 11.57 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,226 epoch 4 - iter 16/24 - loss 0.63389397 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,387 epoch 4 - iter 18/24 - loss 0.63386448 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,541 epoch 4 - iter 20/24 - loss 0.63284537 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,684 epoch 4 - iter 22/24 - loss 0.63418903 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,847 epoch 4 - iter 24/24 - loss 0.63341136 - samples/sec: 12.26 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,849 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:17,849 EPOCH 4 done: loss 0.6334 - lr 0.0200000\n",
+      "2021-09-21 21:09:17,967 DEV : loss 0.6365419626235962 - score 0.3333\n",
+      "2021-09-21 21:09:17,968 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:38:05,585 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:05,724 epoch 3 - iter 2/24 - loss 0.65390295 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 01:38:05,848 epoch 3 - iter 4/24 - loss 0.64892404 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 01:38:05,963 epoch 3 - iter 6/24 - loss 0.64556663 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,077 epoch 3 - iter 8/24 - loss 0.64465912 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,204 epoch 3 - iter 10/24 - loss 0.63927824 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,327 epoch 3 - iter 12/24 - loss 0.63933509 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,435 epoch 3 - iter 14/24 - loss 0.63900013 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,545 epoch 3 - iter 16/24 - loss 0.63661780 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,660 epoch 3 - iter 18/24 - loss 0.63911115 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,777 epoch 3 - iter 20/24 - loss 0.63910010 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:38:06,890 epoch 3 - iter 22/24 - loss 0.63854198 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,005 epoch 3 - iter 24/24 - loss 0.63740729 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,006 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:07,007 EPOCH 3 done: loss 0.6374 - lr 0.0200000\n",
-      "2021-09-08 01:38:07,192 DEV : loss 0.6371937990188599 - score 0.3333\n",
-      "2021-09-08 01:38:07,194 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:07,293 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:07,429 epoch 4 - iter 2/24 - loss 0.63726351 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,550 epoch 4 - iter 4/24 - loss 0.64464538 - samples/sec: 16.55 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,666 epoch 4 - iter 6/24 - loss 0.64370871 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,779 epoch 4 - iter 8/24 - loss 0.64226295 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,888 epoch 4 - iter 10/24 - loss 0.64220923 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,005 epoch 4 - iter 12/24 - loss 0.63989896 - samples/sec: 17.05 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,126 epoch 4 - iter 14/24 - loss 0.64056402 - samples/sec: 16.72 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,246 epoch 4 - iter 16/24 - loss 0.64144207 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,373 epoch 4 - iter 18/24 - loss 0.64218131 - samples/sec: 15.90 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,502 epoch 4 - iter 20/24 - loss 0.64269228 - samples/sec: 15.53 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,622 epoch 4 - iter 22/24 - loss 0.64231788 - samples/sec: 16.72 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,755 epoch 4 - iter 24/24 - loss 0.64148947 - samples/sec: 15.20 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,756 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:08,756 EPOCH 4 done: loss 0.6415 - lr 0.0200000\n",
-      "2021-09-08 01:38:09,091 DEV : loss 0.6366130113601685 - score 0.3333\n",
-      "2021-09-08 01:38:09,092 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:38:15,918 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:16,041 epoch 5 - iter 2/24 - loss 0.65521005 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,150 epoch 5 - iter 4/24 - loss 0.64696093 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,252 epoch 5 - iter 6/24 - loss 0.64639644 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,357 epoch 5 - iter 8/24 - loss 0.64443929 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,459 epoch 5 - iter 10/24 - loss 0.64504390 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,563 epoch 5 - iter 12/24 - loss 0.64402649 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,673 epoch 5 - iter 14/24 - loss 0.64247386 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,777 epoch 5 - iter 16/24 - loss 0.63949028 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,894 epoch 5 - iter 18/24 - loss 0.63973227 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,003 epoch 5 - iter 20/24 - loss 0.63990805 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,104 epoch 5 - iter 22/24 - loss 0.64047894 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,218 epoch 5 - iter 24/24 - loss 0.63927133 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,219 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:17,219 EPOCH 5 done: loss 0.6393 - lr 0.0200000\n",
-      "2021-09-08 01:38:17,291 DEV : loss 0.6365690231323242 - score 0.3333\n",
-      "2021-09-08 01:38:17,292 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:38:21,878 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:09:26,757 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:26,898 epoch 5 - iter 2/24 - loss 0.63598517 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,058 epoch 5 - iter 4/24 - loss 0.64234494 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,223 epoch 5 - iter 6/24 - loss 0.63650750 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,401 epoch 5 - iter 8/24 - loss 0.63504113 - samples/sec: 11.29 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,562 epoch 5 - iter 10/24 - loss 0.63612626 - samples/sec: 12.46 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,724 epoch 5 - iter 12/24 - loss 0.63416970 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:09:27,893 epoch 5 - iter 14/24 - loss 0.63372541 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,061 epoch 5 - iter 16/24 - loss 0.63390230 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,216 epoch 5 - iter 18/24 - loss 0.63373647 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,402 epoch 5 - iter 20/24 - loss 0.63415515 - samples/sec: 10.83 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,575 epoch 5 - iter 22/24 - loss 0.63398149 - samples/sec: 11.62 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,728 epoch 5 - iter 24/24 - loss 0.63522020 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:09:28,729 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:28,729 EPOCH 5 done: loss 0.6352 - lr 0.0200000\n",
+      "2021-09-21 21:09:28,832 DEV : loss 0.6369094848632812 - score 0.0\n",
+      "2021-09-21 21:09:28,833 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:28,835 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:29,019 epoch 6 - iter 2/24 - loss 0.64006290 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,194 epoch 6 - iter 4/24 - loss 0.63547288 - samples/sec: 11.47 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:22,009 epoch 6 - iter 2/24 - loss 0.64861989 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,114 epoch 6 - iter 4/24 - loss 0.63547963 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,211 epoch 6 - iter 6/24 - loss 0.63218367 - samples/sec: 20.66 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,310 epoch 6 - iter 8/24 - loss 0.62941122 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,419 epoch 6 - iter 10/24 - loss 0.63062500 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,519 epoch 6 - iter 12/24 - loss 0.63288016 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,627 epoch 6 - iter 14/24 - loss 0.63134455 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,733 epoch 6 - iter 16/24 - loss 0.63043598 - samples/sec: 18.93 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,866 epoch 6 - iter 18/24 - loss 0.63181857 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 01:38:22,982 epoch 6 - iter 20/24 - loss 0.63330052 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 01:38:23,084 epoch 6 - iter 22/24 - loss 0.63329744 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 01:38:23,189 epoch 6 - iter 24/24 - loss 0.63352564 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 01:38:23,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:23,191 EPOCH 6 done: loss 0.6335 - lr 0.0200000\n",
-      "2021-09-08 01:38:24,289 DEV : loss 0.636620044708252 - score 0.0\n",
-      "2021-09-08 01:38:24,289 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:24,306 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:24,430 epoch 7 - iter 2/24 - loss 0.63916862 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:38:24,539 epoch 7 - iter 4/24 - loss 0.63982309 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 01:38:24,651 epoch 7 - iter 6/24 - loss 0.63588976 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 01:38:24,761 epoch 7 - iter 8/24 - loss 0.64145155 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 01:38:24,865 epoch 7 - iter 10/24 - loss 0.63903200 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 01:38:24,968 epoch 7 - iter 12/24 - loss 0.64030101 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,082 epoch 7 - iter 14/24 - loss 0.63960322 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,192 epoch 7 - iter 16/24 - loss 0.64001159 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,294 epoch 7 - iter 18/24 - loss 0.63896523 - samples/sec: 19.69 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,389 epoch 7 - iter 20/24 - loss 0.64001384 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,492 epoch 7 - iter 22/24 - loss 0.63932152 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,593 epoch 7 - iter 24/24 - loss 0.63985085 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,594 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:25,594 EPOCH 7 done: loss 0.6399 - lr 0.0200000\n",
-      "2021-09-08 01:38:25,762 DEV : loss 0.6365191340446472 - score 0.3333\n",
-      "2021-09-08 01:38:25,763 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:09:29,354 epoch 6 - iter 6/24 - loss 0.63585560 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,496 epoch 6 - iter 8/24 - loss 0.62983842 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,633 epoch 6 - iter 10/24 - loss 0.63148930 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,791 epoch 6 - iter 12/24 - loss 0.63213010 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 21:09:29,964 epoch 6 - iter 14/24 - loss 0.63479871 - samples/sec: 11.62 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,151 epoch 6 - iter 16/24 - loss 0.63512152 - samples/sec: 10.73 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,323 epoch 6 - iter 18/24 - loss 0.63511552 - samples/sec: 11.72 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,495 epoch 6 - iter 20/24 - loss 0.63470097 - samples/sec: 11.61 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,652 epoch 6 - iter 22/24 - loss 0.63462568 - samples/sec: 12.80 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,801 epoch 6 - iter 24/24 - loss 0.63514426 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 21:09:30,802 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:30,802 EPOCH 6 done: loss 0.6351 - lr 0.0200000\n",
+      "2021-09-21 21:09:30,925 DEV : loss 0.6366745233535767 - score 0.3333\n",
+      "2021-09-21 21:09:30,931 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:09:30,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:31,143 epoch 7 - iter 2/24 - loss 0.63991213 - samples/sec: 11.80 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,297 epoch 7 - iter 4/24 - loss 0.63238652 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,450 epoch 7 - iter 6/24 - loss 0.63312949 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,586 epoch 7 - iter 8/24 - loss 0.63271468 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,717 epoch 7 - iter 10/24 - loss 0.63440125 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,836 epoch 7 - iter 12/24 - loss 0.63479569 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 21:09:31,958 epoch 7 - iter 14/24 - loss 0.63515989 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,092 epoch 7 - iter 16/24 - loss 0.63645671 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,223 epoch 7 - iter 18/24 - loss 0.63677481 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,348 epoch 7 - iter 20/24 - loss 0.63699788 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,460 epoch 7 - iter 22/24 - loss 0.63612919 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,600 epoch 7 - iter 24/24 - loss 0.63286200 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:09:32,601 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:32,601 EPOCH 7 done: loss 0.6329 - lr 0.0200000\n",
+      "2021-09-21 21:09:32,765 DEV : loss 0.6366475820541382 - score 0.3333\n",
+      "2021-09-21 21:09:32,768 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:09:32,843 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:32,988 epoch 8 - iter 2/24 - loss 0.64098915 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,105 epoch 8 - iter 4/24 - loss 0.64764355 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,237 epoch 8 - iter 6/24 - loss 0.64242049 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,360 epoch 8 - iter 8/24 - loss 0.64403803 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,476 epoch 8 - iter 10/24 - loss 0.64183762 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,597 epoch 8 - iter 12/24 - loss 0.64078821 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,721 epoch 8 - iter 14/24 - loss 0.64166693 - samples/sec: 16.23 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,852 epoch 8 - iter 16/24 - loss 0.64110528 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 21:09:33,989 epoch 8 - iter 18/24 - loss 0.64273051 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 21:09:34,114 epoch 8 - iter 20/24 - loss 0.64285173 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 21:09:34,248 epoch 8 - iter 22/24 - loss 0.64267991 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 21:09:34,370 epoch 8 - iter 24/24 - loss 0.64260603 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 21:09:34,371 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:34,372 EPOCH 8 done: loss 0.6426 - lr 0.0200000\n",
+      "2021-09-21 21:09:34,556 DEV : loss 0.6366510391235352 - score 0.3333\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:09:34,558 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:09:34,641 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:34,799 epoch 9 - iter 2/24 - loss 0.63521558 - samples/sec: 15.99 - lr: 0.010000\n",
+      "2021-09-21 21:09:34,915 epoch 9 - iter 4/24 - loss 0.63572514 - samples/sec: 17.28 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,053 epoch 9 - iter 6/24 - loss 0.64288696 - samples/sec: 14.57 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,161 epoch 9 - iter 8/24 - loss 0.64495398 - samples/sec: 18.63 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,298 epoch 9 - iter 10/24 - loss 0.64266186 - samples/sec: 14.67 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,426 epoch 9 - iter 12/24 - loss 0.64262316 - samples/sec: 15.72 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,559 epoch 9 - iter 14/24 - loss 0.64108491 - samples/sec: 15.11 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,688 epoch 9 - iter 16/24 - loss 0.64211517 - samples/sec: 15.55 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,814 epoch 9 - iter 18/24 - loss 0.64003335 - samples/sec: 15.87 - lr: 0.010000\n",
+      "2021-09-21 21:09:35,953 epoch 9 - iter 20/24 - loss 0.63943956 - samples/sec: 14.50 - lr: 0.010000\n",
+      "2021-09-21 21:09:36,078 epoch 9 - iter 22/24 - loss 0.63986786 - samples/sec: 16.06 - lr: 0.010000\n",
+      "2021-09-21 21:09:36,208 epoch 9 - iter 24/24 - loss 0.63991364 - samples/sec: 15.52 - lr: 0.010000\n",
+      "2021-09-21 21:09:36,210 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:36,210 EPOCH 9 done: loss 0.6399 - lr 0.0100000\n",
+      "2021-09-21 21:09:36,448 DEV : loss 0.6365293264389038 - score 0.3333\n",
+      "2021-09-21 21:09:36,449 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:38:34,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:34,736 epoch 8 - iter 2/24 - loss 0.65636131 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:38:34,836 epoch 8 - iter 4/24 - loss 0.64228930 - samples/sec: 20.05 - lr: 0.020000\n",
-      "2021-09-08 01:38:34,943 epoch 8 - iter 6/24 - loss 0.63600331 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,049 epoch 8 - iter 8/24 - loss 0.63623987 - samples/sec: 19.00 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,154 epoch 8 - iter 10/24 - loss 0.63853400 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,261 epoch 8 - iter 12/24 - loss 0.63573119 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,364 epoch 8 - iter 14/24 - loss 0.63477727 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,463 epoch 8 - iter 16/24 - loss 0.63533441 - samples/sec: 20.29 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,566 epoch 8 - iter 18/24 - loss 0.63560507 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,662 epoch 8 - iter 20/24 - loss 0.63615709 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,758 epoch 8 - iter 22/24 - loss 0.63549720 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,856 epoch 8 - iter 24/24 - loss 0.63661378 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,858 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:35,858 EPOCH 8 done: loss 0.6366 - lr 0.0200000\n",
-      "2021-09-08 01:38:35,915 DEV : loss 0.6365338563919067 - score 0.0\n",
-      "2021-09-08 01:38:35,916 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:35,917 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:36,039 epoch 9 - iter 2/24 - loss 0.64773637 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,137 epoch 9 - iter 4/24 - loss 0.64070812 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,241 epoch 9 - iter 6/24 - loss 0.64177053 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,339 epoch 9 - iter 8/24 - loss 0.63856722 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,437 epoch 9 - iter 10/24 - loss 0.63981139 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,534 epoch 9 - iter 12/24 - loss 0.64136430 - samples/sec: 20.67 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,638 epoch 9 - iter 14/24 - loss 0.63890082 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,738 epoch 9 - iter 16/24 - loss 0.63854126 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,843 epoch 9 - iter 18/24 - loss 0.63860028 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,951 epoch 9 - iter 20/24 - loss 0.63796861 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,049 epoch 9 - iter 22/24 - loss 0.63832808 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,145 epoch 9 - iter 24/24 - loss 0.63865451 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:37,146 EPOCH 9 done: loss 0.6387 - lr 0.0200000\n",
-      "2021-09-08 01:38:37,212 DEV : loss 0.6365596652030945 - score 0.0\n",
-      "2021-09-08 01:38:37,213 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:38:37,215 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:37,336 epoch 10 - iter 2/24 - loss 0.64985186 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,434 epoch 10 - iter 4/24 - loss 0.64602350 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,529 epoch 10 - iter 6/24 - loss 0.64094781 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,634 epoch 10 - iter 8/24 - loss 0.64085481 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,740 epoch 10 - iter 10/24 - loss 0.64078225 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,854 epoch 10 - iter 12/24 - loss 0.63840386 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,962 epoch 10 - iter 14/24 - loss 0.63772740 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,061 epoch 10 - iter 16/24 - loss 0.63767526 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,161 epoch 10 - iter 18/24 - loss 0.63673338 - samples/sec: 20.12 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,260 epoch 10 - iter 20/24 - loss 0.63623800 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,362 epoch 10 - iter 22/24 - loss 0.63606984 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,462 epoch 10 - iter 24/24 - loss 0.63469853 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,463 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:38,463 EPOCH 10 done: loss 0.6347 - lr 0.0200000\n",
-      "2021-09-08 01:38:38,520 DEV : loss 0.6365233659744263 - score 0.0\n",
-      "2021-09-08 01:38:38,520 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:38:42,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:42,587 Testing using best model ...\n",
-      "2021-09-08 01:38:42,589 loading file None1/best-model.pt\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "2021-09-21 21:09:40,652 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:40,809 epoch 10 - iter 2/24 - loss 0.63480300 - samples/sec: 15.04 - lr: 0.010000\n",
+      "2021-09-21 21:09:40,931 epoch 10 - iter 4/24 - loss 0.63116232 - samples/sec: 16.49 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,042 epoch 10 - iter 6/24 - loss 0.63436302 - samples/sec: 17.99 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,179 epoch 10 - iter 8/24 - loss 0.63267558 - samples/sec: 14.73 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,321 epoch 10 - iter 10/24 - loss 0.63118494 - samples/sec: 14.09 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,465 epoch 10 - iter 12/24 - loss 0.63147552 - samples/sec: 13.92 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,624 epoch 10 - iter 14/24 - loss 0.63049636 - samples/sec: 12.59 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,774 epoch 10 - iter 16/24 - loss 0.63023833 - samples/sec: 13.46 - lr: 0.010000\n",
+      "2021-09-21 21:09:41,921 epoch 10 - iter 18/24 - loss 0.63062635 - samples/sec: 13.61 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,081 epoch 10 - iter 20/24 - loss 0.63077537 - samples/sec: 12.58 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,223 epoch 10 - iter 22/24 - loss 0.63353800 - samples/sec: 14.16 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,363 epoch 10 - iter 24/24 - loss 0.63253294 - samples/sec: 14.32 - lr: 0.010000\n",
+      "2021-09-21 21:09:42,364 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:42,365 EPOCH 10 done: loss 0.6325 - lr 0.0100000\n",
+      "2021-09-21 21:09:42,487 DEV : loss 0.6365551948547363 - score 0.0\n",
+      "2021-09-21 21:09:42,488 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:52,697 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:52,698 Testing using best model ...\n",
+      "2021-09-21 21:09:52,700 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:38:47,449 \t0.6667\n",
-      "2021-09-08 01:38:47,450 \n",
+      "2021-09-21 21:09:57,994 \t0.3333\n",
+      "2021-09-21 21:09:57,994 \n",
       "Results:\n",
-      "- F-score (micro) 0.6667\n",
-      "- F-score (macro) 0.2667\n",
-      "- Accuracy 0.6667\n",
+      "- F-score (micro) 0.3333\n",
+      "- F-score (macro) 0.2222\n",
+      "- Accuracy 0.3333\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "    positive     0.0000    0.0000    0.0000         1\n",
-      "     neutral     0.6667    1.0000    0.8000         2\n",
-      "    negative     0.0000    0.0000    0.0000         0\n",
-      "\n",
-      "   micro avg     0.6667    0.6667    0.6667         3\n",
-      "   macro avg     0.2222    0.3333    0.2667         3\n",
-      "weighted avg     0.4444    0.6667    0.5333         3\n",
-      " samples avg     0.6667    0.6667    0.6667         3\n",
+      "    positive     1.0000    0.5000    0.6667         2\n",
+      "     neutral     0.0000    0.0000    0.0000         0\n",
+      "    negative     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 01:38:47,450 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.33728155339805826\n"
+      "   micro avg     0.3333    0.3333    0.3333         3\n",
+      "   macro avg     0.3333    0.1667    0.2222         3\n",
+      "weighted avg     0.6667    0.3333    0.4444         3\n",
+      " samples avg     0.3333    0.3333    0.3333         3\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:09:57,995 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.34563106796116505\n"
      ]
     }
    ],
@@ -2997,6 +2996,26 @@
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}') "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "aa582fa7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.3504854368932039, 0.3271844660194175, 0.34368932038834954, 0.3650485436893204, 0.341747572815534]\n",
+      "0.01232665650338165\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "440b1d3b",
@@ -3007,7 +3026,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -3015,25 +3034,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:03,201 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:10:23,274 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:42:07,269 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:10:27,949 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 39050.42it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 34941.75it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:07,272 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 01:42:07,282 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:07,283 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:10:27,951 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 21:10:28,014 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:28,016 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3346,26 +3365,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:07,284 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:07,284 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:42:07,285 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:07,285 Parameters:\n",
-      "2021-09-08 01:42:07,285  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:42:07,286  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:42:07,286  - patience: \"3\"\n",
-      "2021-09-08 01:42:07,286  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:42:07,286  - max_epochs: \"10\"\n",
-      "2021-09-08 01:42:07,287  - shuffle: \"True\"\n",
-      "2021-09-08 01:42:07,287  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:42:07,287  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:42:07,287 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:07,288 Model training base path: \"None1\"\n",
-      "2021-09-08 01:42:07,288 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:07,288 Device: cuda:1\n",
-      "2021-09-08 01:42:07,289 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:07,289 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:42:07,296 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:07,418 epoch 1 - iter 2/24 - loss 0.63254553 - samples/sec: 21.76 - lr: 0.020000\n"
+      "2021-09-21 21:10:28,017 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:28,017 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:10:28,018 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:28,018 Parameters:\n",
+      "2021-09-21 21:10:28,018  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:10:28,018  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:10:28,019  - patience: \"3\"\n",
+      "2021-09-21 21:10:28,019  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:10:28,019  - max_epochs: \"10\"\n",
+      "2021-09-21 21:10:28,019  - shuffle: \"True\"\n",
+      "2021-09-21 21:10:28,020  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:10:28,020  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:10:28,020 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:28,021 Model training base path: \"None1\"\n",
+      "2021-09-21 21:10:28,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:28,021 Device: cuda:0\n",
+      "2021-09-21 21:10:28,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:28,022 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:10:28,028 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3379,233 +3397,233 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:07,532 epoch 1 - iter 4/24 - loss 0.63471249 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 01:42:07,630 epoch 1 - iter 6/24 - loss 0.63522120 - samples/sec: 20.48 - lr: 0.020000\n",
-      "2021-09-08 01:42:07,729 epoch 1 - iter 8/24 - loss 0.63594999 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:42:07,831 epoch 1 - iter 10/24 - loss 0.63892201 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 01:42:07,930 epoch 1 - iter 12/24 - loss 0.63712131 - samples/sec: 20.27 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,032 epoch 1 - iter 14/24 - loss 0.63735239 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,136 epoch 1 - iter 16/24 - loss 0.63738728 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,242 epoch 1 - iter 18/24 - loss 0.63544128 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,350 epoch 1 - iter 20/24 - loss 0.63621473 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,456 epoch 1 - iter 22/24 - loss 0.63659460 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,560 epoch 1 - iter 24/24 - loss 0.63636692 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,561 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:08,561 EPOCH 1 done: loss 0.6364 - lr 0.0200000\n",
-      "2021-09-08 01:42:08,629 DEV : loss 0.6365151405334473 - score 0.0\n",
-      "2021-09-08 01:42:08,630 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:10:28,194 epoch 1 - iter 2/24 - loss 0.62651241 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,341 epoch 1 - iter 4/24 - loss 0.62612525 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,487 epoch 1 - iter 6/24 - loss 0.62938326 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,631 epoch 1 - iter 8/24 - loss 0.63097177 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,776 epoch 1 - iter 10/24 - loss 0.63659755 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:10:28,917 epoch 1 - iter 12/24 - loss 0.63606327 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,056 epoch 1 - iter 14/24 - loss 0.63662155 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,204 epoch 1 - iter 16/24 - loss 0.63624172 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,352 epoch 1 - iter 18/24 - loss 0.63476484 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,509 epoch 1 - iter 20/24 - loss 0.63472037 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,666 epoch 1 - iter 22/24 - loss 0.63544254 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,813 epoch 1 - iter 24/24 - loss 0.63533229 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 21:10:29,814 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:29,814 EPOCH 1 done: loss 0.6353 - lr 0.0200000\n",
+      "2021-09-21 21:10:29,910 DEV : loss 0.6365181803703308 - score 0.3333\n",
+      "2021-09-21 21:10:29,910 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:42:12,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:12,833 epoch 2 - iter 2/24 - loss 0.64137664 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:42:12,946 epoch 2 - iter 4/24 - loss 0.63729653 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,043 epoch 2 - iter 6/24 - loss 0.64113675 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,149 epoch 2 - iter 8/24 - loss 0.64211380 - samples/sec: 19.02 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,250 epoch 2 - iter 10/24 - loss 0.64663140 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,349 epoch 2 - iter 12/24 - loss 0.64844990 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,467 epoch 2 - iter 14/24 - loss 0.64462816 - samples/sec: 17.05 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,573 epoch 2 - iter 16/24 - loss 0.64431964 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,672 epoch 2 - iter 18/24 - loss 0.64363359 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,777 epoch 2 - iter 20/24 - loss 0.64427348 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,885 epoch 2 - iter 22/24 - loss 0.64335732 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,981 epoch 2 - iter 24/24 - loss 0.64248256 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,982 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:13,982 EPOCH 2 done: loss 0.6425 - lr 0.0200000\n",
-      "2021-09-08 01:42:14,061 DEV : loss 0.6365168690681458 - score 0.6667\n",
-      "2021-09-08 01:42:14,061 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:42:18,514 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:18,620 epoch 3 - iter 2/24 - loss 0.65822756 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:42:18,723 epoch 3 - iter 4/24 - loss 0.64728814 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 01:42:18,824 epoch 3 - iter 6/24 - loss 0.64219460 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:42:18,937 epoch 3 - iter 8/24 - loss 0.64417134 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,048 epoch 3 - iter 10/24 - loss 0.64306601 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,157 epoch 3 - iter 12/24 - loss 0.64104859 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,262 epoch 3 - iter 14/24 - loss 0.64054460 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,361 epoch 3 - iter 16/24 - loss 0.63937889 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,468 epoch 3 - iter 18/24 - loss 0.63865396 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,573 epoch 3 - iter 20/24 - loss 0.63594750 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,673 epoch 3 - iter 22/24 - loss 0.63641980 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,783 epoch 3 - iter 24/24 - loss 0.63699509 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:42:19,784 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:19,784 EPOCH 3 done: loss 0.6370 - lr 0.0200000\n",
-      "2021-09-08 01:42:19,858 DEV : loss 0.6365170478820801 - score 0.6667\n",
-      "2021-09-08 01:42:19,859 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:42:19,861 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:19,986 epoch 4 - iter 2/24 - loss 0.63831797 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,086 epoch 4 - iter 4/24 - loss 0.63793108 - samples/sec: 20.05 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,184 epoch 4 - iter 6/24 - loss 0.64103479 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,287 epoch 4 - iter 8/24 - loss 0.63705783 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,384 epoch 4 - iter 10/24 - loss 0.63817810 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,483 epoch 4 - iter 12/24 - loss 0.63833425 - samples/sec: 20.38 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,586 epoch 4 - iter 14/24 - loss 0.63582468 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,694 epoch 4 - iter 16/24 - loss 0.63698792 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,796 epoch 4 - iter 18/24 - loss 0.63674035 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 01:42:20,894 epoch 4 - iter 20/24 - loss 0.63686402 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,001 epoch 4 - iter 22/24 - loss 0.63751583 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,106 epoch 4 - iter 24/24 - loss 0.63757004 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,107 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:21,107 EPOCH 4 done: loss 0.6376 - lr 0.0200000\n",
-      "2021-09-08 01:42:21,185 DEV : loss 0.6368486285209656 - score 0.3333\n",
-      "2021-09-08 01:42:21,186 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:42:21,188 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:21,326 epoch 5 - iter 2/24 - loss 0.62478358 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,440 epoch 5 - iter 4/24 - loss 0.63383947 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,543 epoch 5 - iter 6/24 - loss 0.63379355 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,649 epoch 5 - iter 8/24 - loss 0.63580435 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,750 epoch 5 - iter 10/24 - loss 0.63523664 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,856 epoch 5 - iter 12/24 - loss 0.63467524 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:42:21,963 epoch 5 - iter 14/24 - loss 0.63589409 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:42:22,072 epoch 5 - iter 16/24 - loss 0.63724846 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 01:42:22,172 epoch 5 - iter 18/24 - loss 0.63834871 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 01:42:22,274 epoch 5 - iter 20/24 - loss 0.63713696 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 01:42:22,377 epoch 5 - iter 22/24 - loss 0.63563803 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:42:22,490 epoch 5 - iter 24/24 - loss 0.63527066 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:42:22,491 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:22,491 EPOCH 5 done: loss 0.6353 - lr 0.0200000\n",
-      "2021-09-08 01:42:22,569 DEV : loss 0.6365196704864502 - score 0.0\n",
-      "2021-09-08 01:42:22,570 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:42:22,572 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:22,705 epoch 6 - iter 2/24 - loss 0.63746995 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:42:22,804 epoch 6 - iter 4/24 - loss 0.63427195 - samples/sec: 20.39 - lr: 0.020000\n"
+      "2021-09-21 21:10:38,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:38,388 epoch 2 - iter 2/24 - loss 0.62485841 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,503 epoch 2 - iter 4/24 - loss 0.62897258 - samples/sec: 17.40 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,618 epoch 2 - iter 6/24 - loss 0.63200967 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,727 epoch 2 - iter 8/24 - loss 0.63752436 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,839 epoch 2 - iter 10/24 - loss 0.63584303 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,961 epoch 2 - iter 12/24 - loss 0.63675129 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,107 epoch 2 - iter 14/24 - loss 0.63596816 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,254 epoch 2 - iter 16/24 - loss 0.63575194 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,411 epoch 2 - iter 18/24 - loss 0.63558986 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,570 epoch 2 - iter 20/24 - loss 0.63659844 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,716 epoch 2 - iter 22/24 - loss 0.63666905 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,869 epoch 2 - iter 24/24 - loss 0.63524457 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 21:10:39,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:39,871 EPOCH 2 done: loss 0.6352 - lr 0.0200000\n",
+      "2021-09-21 21:10:40,045 DEV : loss 0.6365517973899841 - score 0.0\n",
+      "2021-09-21 21:10:40,046 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:10:40,119 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:40,297 epoch 3 - iter 2/24 - loss 0.63990059 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,419 epoch 3 - iter 4/24 - loss 0.63604109 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,523 epoch 3 - iter 6/24 - loss 0.64039007 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,634 epoch 3 - iter 8/24 - loss 0.64269743 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,757 epoch 3 - iter 10/24 - loss 0.64145922 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,867 epoch 3 - iter 12/24 - loss 0.63867949 - samples/sec: 18.42 - lr: 0.020000\n",
+      "2021-09-21 21:10:40,982 epoch 3 - iter 14/24 - loss 0.63777837 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 21:10:41,097 epoch 3 - iter 16/24 - loss 0.63658578 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 21:10:41,213 epoch 3 - iter 18/24 - loss 0.63739794 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 21:10:41,323 epoch 3 - iter 20/24 - loss 0.63850563 - samples/sec: 18.38 - lr: 0.020000\n",
+      "2021-09-21 21:10:41,435 epoch 3 - iter 22/24 - loss 0.64108843 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 21:10:41,542 epoch 3 - iter 24/24 - loss 0.63959437 - samples/sec: 18.85 - lr: 0.020000\n",
+      "2021-09-21 21:10:41,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:41,544 EPOCH 3 done: loss 0.6396 - lr 0.0200000\n",
+      "2021-09-21 21:10:41,726 DEV : loss 0.6365382075309753 - score 0.3333\n",
+      "2021-09-21 21:10:41,727 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:10:41,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:41,939 epoch 4 - iter 2/24 - loss 0.64197171 - samples/sec: 18.48 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,056 epoch 4 - iter 4/24 - loss 0.63793966 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,169 epoch 4 - iter 6/24 - loss 0.64190702 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,277 epoch 4 - iter 8/24 - loss 0.63881804 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,385 epoch 4 - iter 10/24 - loss 0.63645260 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,492 epoch 4 - iter 12/24 - loss 0.63480682 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,598 epoch 4 - iter 14/24 - loss 0.63323923 - samples/sec: 18.91 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,704 epoch 4 - iter 16/24 - loss 0.63469630 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,808 epoch 4 - iter 18/24 - loss 0.63529999 - samples/sec: 19.30 - lr: 0.020000\n",
+      "2021-09-21 21:10:42,912 epoch 4 - iter 20/24 - loss 0.63403701 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 21:10:43,013 epoch 4 - iter 22/24 - loss 0.63366742 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 21:10:43,113 epoch 4 - iter 24/24 - loss 0.63384144 - samples/sec: 20.01 - lr: 0.020000\n",
+      "2021-09-21 21:10:43,115 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:43,115 EPOCH 4 done: loss 0.6338 - lr 0.0200000\n",
+      "2021-09-21 21:10:43,987 DEV : loss 0.6367366313934326 - score 0.3333\n",
+      "2021-09-21 21:10:43,988 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:10:44,103 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:44,272 epoch 5 - iter 2/24 - loss 0.63443437 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 21:10:44,406 epoch 5 - iter 4/24 - loss 0.63962203 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 21:10:44,566 epoch 5 - iter 6/24 - loss 0.63949114 - samples/sec: 12.54 - lr: 0.020000\n",
+      "2021-09-21 21:10:44,732 epoch 5 - iter 8/24 - loss 0.63551150 - samples/sec: 12.05 - lr: 0.020000\n",
+      "2021-09-21 21:10:44,892 epoch 5 - iter 10/24 - loss 0.63904286 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,040 epoch 5 - iter 12/24 - loss 0.64102450 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,170 epoch 5 - iter 14/24 - loss 0.64154481 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,314 epoch 5 - iter 16/24 - loss 0.64083655 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,469 epoch 5 - iter 18/24 - loss 0.63987487 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,616 epoch 5 - iter 20/24 - loss 0.63901967 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,783 epoch 5 - iter 22/24 - loss 0.63988052 - samples/sec: 12.05 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,932 epoch 5 - iter 24/24 - loss 0.64086342 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:45,933 EPOCH 5 done: loss 0.6409 - lr 0.0200000\n",
+      "2021-09-21 21:10:46,671 DEV : loss 0.6365309953689575 - score 0.3333\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:10:46,675 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:10:46,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:46,937 epoch 6 - iter 2/24 - loss 0.65055075 - samples/sec: 10.08 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:22,910 epoch 6 - iter 6/24 - loss 0.63448671 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,017 epoch 6 - iter 8/24 - loss 0.63713653 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,113 epoch 6 - iter 10/24 - loss 0.63547569 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,217 epoch 6 - iter 12/24 - loss 0.63410667 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,319 epoch 6 - iter 14/24 - loss 0.63514361 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,415 epoch 6 - iter 16/24 - loss 0.63438195 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,519 epoch 6 - iter 18/24 - loss 0.63424140 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,621 epoch 6 - iter 20/24 - loss 0.63509198 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,722 epoch 6 - iter 22/24 - loss 0.63511169 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,827 epoch 6 - iter 24/24 - loss 0.63536185 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:42:23,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:23,828 EPOCH 6 done: loss 0.6354 - lr 0.0200000\n",
-      "2021-09-08 01:42:23,903 DEV : loss 0.6369288563728333 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:42:23,903 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:42:23,906 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:24,025 epoch 7 - iter 2/24 - loss 0.64681110 - samples/sec: 20.34 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,135 epoch 7 - iter 4/24 - loss 0.63718319 - samples/sec: 18.24 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,234 epoch 7 - iter 6/24 - loss 0.63373113 - samples/sec: 20.38 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,337 epoch 7 - iter 8/24 - loss 0.63626724 - samples/sec: 19.47 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,437 epoch 7 - iter 10/24 - loss 0.64352535 - samples/sec: 20.10 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,535 epoch 7 - iter 12/24 - loss 0.64037869 - samples/sec: 20.57 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,641 epoch 7 - iter 14/24 - loss 0.64432985 - samples/sec: 18.89 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,754 epoch 7 - iter 16/24 - loss 0.64413752 - samples/sec: 17.82 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,859 epoch 7 - iter 18/24 - loss 0.64047696 - samples/sec: 19.30 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,967 epoch 7 - iter 20/24 - loss 0.64168523 - samples/sec: 18.63 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,066 epoch 7 - iter 22/24 - loss 0.63958833 - samples/sec: 20.27 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,168 epoch 7 - iter 24/24 - loss 0.63998229 - samples/sec: 19.75 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,169 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,170 EPOCH 7 done: loss 0.6400 - lr 0.0100000\n",
-      "2021-09-08 01:42:25,254 DEV : loss 0.6366968154907227 - score 0.0\n",
-      "2021-09-08 01:42:25,255 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:42:25,261 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,382 epoch 8 - iter 2/24 - loss 0.61850840 - samples/sec: 19.57 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,478 epoch 8 - iter 4/24 - loss 0.64195096 - samples/sec: 20.92 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,583 epoch 8 - iter 6/24 - loss 0.64192548 - samples/sec: 19.23 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,680 epoch 8 - iter 8/24 - loss 0.64326230 - samples/sec: 20.53 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,789 epoch 8 - iter 10/24 - loss 0.64459569 - samples/sec: 18.50 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,901 epoch 8 - iter 12/24 - loss 0.64522469 - samples/sec: 18.04 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,003 epoch 8 - iter 14/24 - loss 0.64322940 - samples/sec: 19.54 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,100 epoch 8 - iter 16/24 - loss 0.64286352 - samples/sec: 20.80 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,194 epoch 8 - iter 18/24 - loss 0.64345324 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,295 epoch 8 - iter 20/24 - loss 0.64250698 - samples/sec: 19.88 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,413 epoch 8 - iter 22/24 - loss 0.64064092 - samples/sec: 16.99 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,517 epoch 8 - iter 24/24 - loss 0.63946823 - samples/sec: 19.49 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,518 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:26,518 EPOCH 8 done: loss 0.6395 - lr 0.0100000\n",
-      "2021-09-08 01:42:26,592 DEV : loss 0.6365182399749756 - score 0.6667\n",
-      "2021-09-08 01:42:26,593 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:42:26,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:26,722 epoch 9 - iter 2/24 - loss 0.63607138 - samples/sec: 18.62 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,818 epoch 9 - iter 4/24 - loss 0.63682978 - samples/sec: 20.82 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,925 epoch 9 - iter 6/24 - loss 0.63369700 - samples/sec: 18.75 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,035 epoch 9 - iter 8/24 - loss 0.63347661 - samples/sec: 18.41 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,137 epoch 9 - iter 10/24 - loss 0.63141208 - samples/sec: 19.56 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,238 epoch 9 - iter 12/24 - loss 0.62893128 - samples/sec: 19.91 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,350 epoch 9 - iter 14/24 - loss 0.62919268 - samples/sec: 17.99 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,449 epoch 9 - iter 16/24 - loss 0.62882371 - samples/sec: 20.39 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,554 epoch 9 - iter 18/24 - loss 0.62900491 - samples/sec: 19.13 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,662 epoch 9 - iter 20/24 - loss 0.62645890 - samples/sec: 18.59 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,772 epoch 9 - iter 22/24 - loss 0.62692388 - samples/sec: 18.28 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,873 epoch 9 - iter 24/24 - loss 0.62543581 - samples/sec: 19.97 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:27,875 EPOCH 9 done: loss 0.6254 - lr 0.0100000\n",
-      "2021-09-08 01:42:27,962 DEV : loss 0.6368680000305176 - score 0.3333\n",
-      "2021-09-08 01:42:27,963 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:42:27,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:28,094 epoch 10 - iter 2/24 - loss 0.65563843 - samples/sec: 18.06 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,212 epoch 10 - iter 4/24 - loss 0.64794663 - samples/sec: 17.08 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,323 epoch 10 - iter 6/24 - loss 0.65183474 - samples/sec: 18.19 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,429 epoch 10 - iter 8/24 - loss 0.64598367 - samples/sec: 19.09 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,526 epoch 10 - iter 10/24 - loss 0.64269907 - samples/sec: 20.52 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,629 epoch 10 - iter 12/24 - loss 0.63937239 - samples/sec: 19.65 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,726 epoch 10 - iter 14/24 - loss 0.63457845 - samples/sec: 20.59 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,824 epoch 10 - iter 16/24 - loss 0.63113055 - samples/sec: 20.62 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,923 epoch 10 - iter 18/24 - loss 0.63063957 - samples/sec: 20.28 - lr: 0.010000\n",
-      "2021-09-08 01:42:29,021 epoch 10 - iter 20/24 - loss 0.63256508 - samples/sec: 20.65 - lr: 0.010000\n",
-      "2021-09-08 01:42:29,132 epoch 10 - iter 22/24 - loss 0.63166924 - samples/sec: 18.02 - lr: 0.010000\n",
-      "2021-09-08 01:42:29,247 epoch 10 - iter 24/24 - loss 0.63101404 - samples/sec: 17.45 - lr: 0.010000\n",
-      "2021-09-08 01:42:29,249 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:29,249 EPOCH 10 done: loss 0.6310 - lr 0.0100000\n",
-      "2021-09-08 01:42:29,427 DEV : loss 0.636518657207489 - score 0.3333\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:42:29,428 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:42:41,582 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:41,583 Testing using best model ...\n",
-      "2021-09-08 01:42:41,584 loading file None1/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:42:47,343 \t0.3333\n"
+      "2021-09-21 21:10:47,141 epoch 6 - iter 4/24 - loss 0.64495605 - samples/sec: 9.82 - lr: 0.010000\n",
+      "2021-09-21 21:10:47,345 epoch 6 - iter 6/24 - loss 0.64122822 - samples/sec: 9.82 - lr: 0.010000\n",
+      "2021-09-21 21:10:47,532 epoch 6 - iter 8/24 - loss 0.64046945 - samples/sec: 10.75 - lr: 0.010000\n",
+      "2021-09-21 21:10:47,749 epoch 6 - iter 10/24 - loss 0.63889698 - samples/sec: 9.23 - lr: 0.010000\n",
+      "2021-09-21 21:10:47,964 epoch 6 - iter 12/24 - loss 0.63951196 - samples/sec: 9.35 - lr: 0.010000\n",
+      "2021-09-21 21:10:48,177 epoch 6 - iter 14/24 - loss 0.63829451 - samples/sec: 9.42 - lr: 0.010000\n",
+      "2021-09-21 21:10:48,381 epoch 6 - iter 16/24 - loss 0.63914367 - samples/sec: 9.81 - lr: 0.010000\n",
+      "2021-09-21 21:10:48,573 epoch 6 - iter 18/24 - loss 0.63964234 - samples/sec: 10.47 - lr: 0.010000\n",
+      "2021-09-21 21:10:48,754 epoch 6 - iter 20/24 - loss 0.63942906 - samples/sec: 11.10 - lr: 0.010000\n",
+      "2021-09-21 21:10:48,926 epoch 6 - iter 22/24 - loss 0.64020734 - samples/sec: 11.65 - lr: 0.010000\n",
+      "2021-09-21 21:10:49,068 epoch 6 - iter 24/24 - loss 0.64009086 - samples/sec: 14.17 - lr: 0.010000\n",
+      "2021-09-21 21:10:49,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:49,069 EPOCH 6 done: loss 0.6401 - lr 0.0100000\n",
+      "2021-09-21 21:10:51,179 DEV : loss 0.6365222930908203 - score 0.3333\n",
+      "2021-09-21 21:10:51,180 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:10:51,221 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:51,351 epoch 7 - iter 2/24 - loss 0.62828130 - samples/sec: 17.22 - lr: 0.010000\n",
+      "2021-09-21 21:10:51,466 epoch 7 - iter 4/24 - loss 0.62188959 - samples/sec: 17.59 - lr: 0.010000\n",
+      "2021-09-21 21:10:51,579 epoch 7 - iter 6/24 - loss 0.62696986 - samples/sec: 17.81 - lr: 0.010000\n",
+      "2021-09-21 21:10:51,693 epoch 7 - iter 8/24 - loss 0.62753624 - samples/sec: 17.65 - lr: 0.010000\n",
+      "2021-09-21 21:10:51,804 epoch 7 - iter 10/24 - loss 0.63028767 - samples/sec: 18.09 - lr: 0.010000\n",
+      "2021-09-21 21:10:51,917 epoch 7 - iter 12/24 - loss 0.63269571 - samples/sec: 17.80 - lr: 0.010000\n",
+      "2021-09-21 21:10:52,029 epoch 7 - iter 14/24 - loss 0.63704329 - samples/sec: 18.06 - lr: 0.010000\n",
+      "2021-09-21 21:10:52,141 epoch 7 - iter 16/24 - loss 0.63577934 - samples/sec: 18.06 - lr: 0.010000\n",
+      "2021-09-21 21:10:52,252 epoch 7 - iter 18/24 - loss 0.63620706 - samples/sec: 18.09 - lr: 0.010000\n",
+      "2021-09-21 21:10:52,360 epoch 7 - iter 20/24 - loss 0.63539682 - samples/sec: 18.54 - lr: 0.010000\n",
+      "2021-09-21 21:10:52,472 epoch 7 - iter 22/24 - loss 0.63573097 - samples/sec: 18.00 - lr: 0.010000\n",
+      "2021-09-21 21:10:52,581 epoch 7 - iter 24/24 - loss 0.63606093 - samples/sec: 18.50 - lr: 0.010000\n",
+      "2021-09-21 21:10:52,582 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:52,582 EPOCH 7 done: loss 0.6361 - lr 0.0100000\n",
+      "2021-09-21 21:10:52,765 DEV : loss 0.6367160677909851 - score 0.0\n",
+      "2021-09-21 21:10:52,766 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:10:52,869 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:52,988 epoch 8 - iter 2/24 - loss 0.64899895 - samples/sec: 18.95 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,084 epoch 8 - iter 4/24 - loss 0.64412335 - samples/sec: 21.10 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,177 epoch 8 - iter 6/24 - loss 0.64304264 - samples/sec: 21.57 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,270 epoch 8 - iter 8/24 - loss 0.64335170 - samples/sec: 21.53 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,382 epoch 8 - iter 10/24 - loss 0.64037814 - samples/sec: 17.93 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,492 epoch 8 - iter 12/24 - loss 0.63945269 - samples/sec: 18.34 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,602 epoch 8 - iter 14/24 - loss 0.64166342 - samples/sec: 18.25 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,715 epoch 8 - iter 16/24 - loss 0.64080074 - samples/sec: 17.83 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,822 epoch 8 - iter 18/24 - loss 0.64131131 - samples/sec: 18.83 - lr: 0.010000\n",
+      "2021-09-21 21:10:53,928 epoch 8 - iter 20/24 - loss 0.64026740 - samples/sec: 18.94 - lr: 0.010000\n",
+      "2021-09-21 21:10:54,040 epoch 8 - iter 22/24 - loss 0.64044560 - samples/sec: 17.94 - lr: 0.010000\n",
+      "2021-09-21 21:10:54,151 epoch 8 - iter 24/24 - loss 0.64042149 - samples/sec: 18.14 - lr: 0.010000\n",
+      "2021-09-21 21:10:54,152 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:54,152 EPOCH 8 done: loss 0.6404 - lr 0.0100000\n",
+      "2021-09-21 21:10:54,336 DEV : loss 0.6365194320678711 - score 0.3333\n",
+      "2021-09-21 21:10:54,337 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:10:54,417 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:54,540 epoch 9 - iter 2/24 - loss 0.63389915 - samples/sec: 18.45 - lr: 0.010000\n",
+      "2021-09-21 21:10:54,647 epoch 9 - iter 4/24 - loss 0.64184520 - samples/sec: 18.81 - lr: 0.010000\n",
+      "2021-09-21 21:10:54,762 epoch 9 - iter 6/24 - loss 0.64330472 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 21:10:54,865 epoch 9 - iter 8/24 - loss 0.64083966 - samples/sec: 19.37 - lr: 0.010000\n",
+      "2021-09-21 21:10:54,977 epoch 9 - iter 10/24 - loss 0.64070603 - samples/sec: 17.97 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,072 epoch 9 - iter 12/24 - loss 0.63924512 - samples/sec: 21.24 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,165 epoch 9 - iter 14/24 - loss 0.64006610 - samples/sec: 21.47 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,260 epoch 9 - iter 16/24 - loss 0.64009986 - samples/sec: 21.20 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,366 epoch 9 - iter 18/24 - loss 0.64071821 - samples/sec: 19.05 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,471 epoch 9 - iter 20/24 - loss 0.63954753 - samples/sec: 19.03 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,586 epoch 9 - iter 22/24 - loss 0.63918603 - samples/sec: 17.49 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,699 epoch 9 - iter 24/24 - loss 0.63951440 - samples/sec: 17.92 - lr: 0.010000\n",
+      "2021-09-21 21:10:55,700 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:55,700 EPOCH 9 done: loss 0.6395 - lr 0.0100000\n",
+      "2021-09-21 21:10:55,873 DEV : loss 0.6365883350372314 - score 0.3333\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:10:55,875 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:10:55,965 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:56,093 epoch 10 - iter 2/24 - loss 0.64120689 - samples/sec: 17.99 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,199 epoch 10 - iter 4/24 - loss 0.64116618 - samples/sec: 18.94 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,318 epoch 10 - iter 6/24 - loss 0.64280479 - samples/sec: 16.97 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,428 epoch 10 - iter 8/24 - loss 0.64132196 - samples/sec: 18.28 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,538 epoch 10 - iter 10/24 - loss 0.64170412 - samples/sec: 18.20 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,656 epoch 10 - iter 12/24 - loss 0.64081931 - samples/sec: 17.05 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,772 epoch 10 - iter 14/24 - loss 0.63914379 - samples/sec: 17.41 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,882 epoch 10 - iter 16/24 - loss 0.63747775 - samples/sec: 18.16 - lr: 0.005000\n",
+      "2021-09-21 21:10:56,982 epoch 10 - iter 18/24 - loss 0.63674971 - samples/sec: 20.25 - lr: 0.005000\n",
+      "2021-09-21 21:10:57,074 epoch 10 - iter 20/24 - loss 0.63718921 - samples/sec: 21.69 - lr: 0.005000\n",
+      "2021-09-21 21:10:57,166 epoch 10 - iter 22/24 - loss 0.63815889 - samples/sec: 21.87 - lr: 0.005000\n",
+      "2021-09-21 21:10:57,258 epoch 10 - iter 24/24 - loss 0.63706886 - samples/sec: 22.00 - lr: 0.005000\n",
+      "2021-09-21 21:10:57,259 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:57,259 EPOCH 10 done: loss 0.6371 - lr 0.0050000\n",
+      "2021-09-21 21:11:01,966 DEV : loss 0.6365358829498291 - score 0.3333\n",
+      "2021-09-21 21:11:01,967 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:11:09,620 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:09,621 Testing using best model ...\n",
+      "2021-09-21 21:11:09,645 loading file None1/best-model.pt\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:47,344 \n",
+      "init TARS\n",
+      "2021-09-21 21:11:16,251 \t1.0\n",
+      "2021-09-21 21:11:16,252 \n",
       "Results:\n",
-      "- F-score (micro) 0.3333\n",
-      "- F-score (macro) 0.2222\n",
-      "- Accuracy 0.3333\n",
+      "- F-score (micro) 1.0\n",
+      "- F-score (macro) 0.3333\n",
+      "- Accuracy 1.0\n",
       "\n",
       "By class:\n",
       "                                        precision    recall  f1-score   support\n",
       "\n",
-      "This text entails a positive sentiment     1.0000    0.5000    0.6667         2\n",
+      "This text entails a positive sentiment     1.0000    1.0000    1.0000         3\n",
       " This text entails a neutral sentiment     0.0000    0.0000    0.0000         0\n",
-      "This text entails a negative sentiment     0.0000    0.0000    0.0000         1\n",
+      "This text entails a negative sentiment     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                             micro avg     0.3333    0.3333    0.3333         3\n",
-      "                             macro avg     0.3333    0.1667    0.2222         3\n",
-      "                          weighted avg     0.6667    0.3333    0.4444         3\n",
-      "                           samples avg     0.3333    0.3333    0.3333         3\n",
+      "                             micro avg     1.0000    1.0000    1.0000         3\n",
+      "                             macro avg     0.3333    0.3333    0.3333         3\n",
+      "                          weighted avg     1.0000    1.0000    1.0000         3\n",
+      "                           samples avg     1.0000    1.0000    1.0000         3\n",
       "\n",
-      "2021-09-08 01:42:47,344 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:04,105 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:11:16,252 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:43,377 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:43:08,159 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:11:47,399 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 14426.27it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 36948.19it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:08,163 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 01:43:08,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,177 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:11:47,401 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 21:11:47,551 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:47,553 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3918,26 +3936,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:08,177 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,178 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:43:08,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,178 Parameters:\n",
-      "2021-09-08 01:43:08,179  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:43:08,179  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:43:08,179  - patience: \"3\"\n",
-      "2021-09-08 01:43:08,180  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:43:08,180  - max_epochs: \"10\"\n",
-      "2021-09-08 01:43:08,180  - shuffle: \"True\"\n",
-      "2021-09-08 01:43:08,181  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:43:08,181  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:43:08,181 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,182 Model training base path: \"None1\"\n",
-      "2021-09-08 01:43:08,182 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,182 Device: cuda:1\n",
-      "2021-09-08 01:43:08,183 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,183 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:43:08,190 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:08,312 epoch 1 - iter 2/24 - loss 0.63308150 - samples/sec: 19.42 - lr: 0.020000\n"
+      "2021-09-21 21:11:47,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:47,554 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:11:47,554 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:47,554 Parameters:\n",
+      "2021-09-21 21:11:47,555  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:11:47,555  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:11:47,555  - patience: \"3\"\n",
+      "2021-09-21 21:11:47,555  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:11:47,556  - max_epochs: \"10\"\n",
+      "2021-09-21 21:11:47,556  - shuffle: \"True\"\n",
+      "2021-09-21 21:11:47,556  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:11:47,557  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:11:47,557 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:47,557 Model training base path: \"None1\"\n",
+      "2021-09-21 21:11:47,557 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:47,558 Device: cuda:0\n",
+      "2021-09-21 21:11:47,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:47,558 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -3951,233 +3967,235 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:08,424 epoch 1 - iter 4/24 - loss 0.62822872 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,536 epoch 1 - iter 6/24 - loss 0.63221976 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,650 epoch 1 - iter 8/24 - loss 0.63272517 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,765 epoch 1 - iter 10/24 - loss 0.63435624 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,876 epoch 1 - iter 12/24 - loss 0.63440595 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:43:08,986 epoch 1 - iter 14/24 - loss 0.63410246 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,097 epoch 1 - iter 16/24 - loss 0.63598235 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,204 epoch 1 - iter 18/24 - loss 0.63497508 - samples/sec: 18.92 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,320 epoch 1 - iter 20/24 - loss 0.63567767 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,436 epoch 1 - iter 22/24 - loss 0.63429483 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,546 epoch 1 - iter 24/24 - loss 0.63366930 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 01:43:09,547 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:09,548 EPOCH 1 done: loss 0.6337 - lr 0.0200000\n",
-      "2021-09-08 01:43:09,612 DEV : loss 0.6365950107574463 - score 0.0\n",
-      "2021-09-08 01:43:09,613 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:11:47,760 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:47,884 epoch 1 - iter 2/24 - loss 0.63854551 - samples/sec: 18.97 - lr: 0.020000\n",
+      "2021-09-21 21:11:47,999 epoch 1 - iter 4/24 - loss 0.63288741 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,102 epoch 1 - iter 6/24 - loss 0.63849908 - samples/sec: 19.55 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,194 epoch 1 - iter 8/24 - loss 0.63922890 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,296 epoch 1 - iter 10/24 - loss 0.64012569 - samples/sec: 19.76 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,395 epoch 1 - iter 12/24 - loss 0.64083049 - samples/sec: 20.32 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,496 epoch 1 - iter 14/24 - loss 0.64024234 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,601 epoch 1 - iter 16/24 - loss 0.63858336 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,697 epoch 1 - iter 18/24 - loss 0.63800624 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,789 epoch 1 - iter 20/24 - loss 0.63825461 - samples/sec: 21.68 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,886 epoch 1 - iter 22/24 - loss 0.63734606 - samples/sec: 20.73 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,988 epoch 1 - iter 24/24 - loss 0.63631675 - samples/sec: 19.88 - lr: 0.020000\n",
+      "2021-09-21 21:11:48,989 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:48,989 EPOCH 1 done: loss 0.6363 - lr 0.0200000\n",
+      "2021-09-21 21:11:49,171 DEV : loss 0.63651442527771 - score 0.3333\n",
+      "2021-09-21 21:11:49,172 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:43:14,999 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:15,122 epoch 2 - iter 2/24 - loss 0.62832445 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,227 epoch 2 - iter 4/24 - loss 0.62964527 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,331 epoch 2 - iter 6/24 - loss 0.62702576 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,436 epoch 2 - iter 8/24 - loss 0.62840161 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,541 epoch 2 - iter 10/24 - loss 0.63037658 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,646 epoch 2 - iter 12/24 - loss 0.63145167 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,754 epoch 2 - iter 14/24 - loss 0.63512066 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,858 epoch 2 - iter 16/24 - loss 0.63495526 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 01:43:15,963 epoch 2 - iter 18/24 - loss 0.63476083 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 01:43:16,068 epoch 2 - iter 20/24 - loss 0.63360619 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 01:43:16,173 epoch 2 - iter 22/24 - loss 0.63428519 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 01:43:16,278 epoch 2 - iter 24/24 - loss 0.63443330 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:43:16,279 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:16,279 EPOCH 2 done: loss 0.6344 - lr 0.0200000\n",
-      "2021-09-08 01:43:16,458 DEV : loss 0.6365174055099487 - score 0.3333\n",
-      "2021-09-08 01:43:16,459 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:12:01,948 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:02,066 epoch 2 - iter 2/24 - loss 0.63969147 - samples/sec: 19.52 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,185 epoch 2 - iter 4/24 - loss 0.63608798 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,309 epoch 2 - iter 6/24 - loss 0.64045351 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,442 epoch 2 - iter 8/24 - loss 0.63780978 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,583 epoch 2 - iter 10/24 - loss 0.63416530 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,727 epoch 2 - iter 12/24 - loss 0.63474008 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:12:02,876 epoch 2 - iter 14/24 - loss 0.63702298 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,011 epoch 2 - iter 16/24 - loss 0.63775300 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,138 epoch 2 - iter 18/24 - loss 0.63989697 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,262 epoch 2 - iter 20/24 - loss 0.63950879 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,399 epoch 2 - iter 22/24 - loss 0.63613883 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,512 epoch 2 - iter 24/24 - loss 0.63526123 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 21:12:03,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:03,513 EPOCH 2 done: loss 0.6353 - lr 0.0200000\n",
+      "2021-09-21 21:12:03,590 DEV : loss 0.6371603012084961 - score 0.6667\n",
+      "2021-09-21 21:12:03,590 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:43:22,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:23,009 epoch 3 - iter 2/24 - loss 0.63002753 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,128 epoch 3 - iter 4/24 - loss 0.62951964 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,240 epoch 3 - iter 6/24 - loss 0.63365926 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,357 epoch 3 - iter 8/24 - loss 0.63427671 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,469 epoch 3 - iter 10/24 - loss 0.63566593 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,589 epoch 3 - iter 12/24 - loss 0.63784884 - samples/sec: 16.81 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,719 epoch 3 - iter 14/24 - loss 0.63867314 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,838 epoch 3 - iter 16/24 - loss 0.63886724 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 01:43:23,957 epoch 3 - iter 18/24 - loss 0.63863764 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,080 epoch 3 - iter 20/24 - loss 0.63668086 - samples/sec: 16.37 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,203 epoch 3 - iter 22/24 - loss 0.63654324 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,327 epoch 3 - iter 24/24 - loss 0.63689852 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,328 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:24,328 EPOCH 3 done: loss 0.6369 - lr 0.0200000\n",
-      "2021-09-08 01:43:24,398 DEV : loss 0.6365253925323486 - score 0.3333\n",
-      "2021-09-08 01:43:24,399 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:43:24,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:24,549 epoch 4 - iter 2/24 - loss 0.64487085 - samples/sec: 15.21 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,674 epoch 4 - iter 4/24 - loss 0.63771357 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,792 epoch 4 - iter 6/24 - loss 0.63920586 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 01:43:24,920 epoch 4 - iter 8/24 - loss 0.63682580 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,046 epoch 4 - iter 10/24 - loss 0.63724447 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,156 epoch 4 - iter 12/24 - loss 0.63869653 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,278 epoch 4 - iter 14/24 - loss 0.63805592 - samples/sec: 16.55 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,407 epoch 4 - iter 16/24 - loss 0.63827846 - samples/sec: 15.51 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,522 epoch 4 - iter 18/24 - loss 0.63800944 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,640 epoch 4 - iter 20/24 - loss 0.63938938 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,773 epoch 4 - iter 22/24 - loss 0.63782382 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,900 epoch 4 - iter 24/24 - loss 0.63776057 - samples/sec: 15.77 - lr: 0.020000\n",
-      "2021-09-08 01:43:25,902 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:25,903 EPOCH 4 done: loss 0.6378 - lr 0.0200000\n",
-      "2021-09-08 01:43:25,974 DEV : loss 0.6371406316757202 - score 0.3333\n",
-      "2021-09-08 01:43:25,977 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:43:25,981 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:26,120 epoch 5 - iter 2/24 - loss 0.63803577 - samples/sec: 16.55 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,244 epoch 5 - iter 4/24 - loss 0.63492639 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,394 epoch 5 - iter 6/24 - loss 0.63636108 - samples/sec: 13.42 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,522 epoch 5 - iter 8/24 - loss 0.63287168 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,654 epoch 5 - iter 10/24 - loss 0.63189426 - samples/sec: 15.26 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,771 epoch 5 - iter 12/24 - loss 0.62885120 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,891 epoch 5 - iter 14/24 - loss 0.62948014 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 01:43:26,997 epoch 5 - iter 16/24 - loss 0.62924501 - samples/sec: 18.93 - lr: 0.020000\n",
-      "2021-09-08 01:43:27,102 epoch 5 - iter 18/24 - loss 0.63079921 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:43:27,207 epoch 5 - iter 20/24 - loss 0.63130875 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:43:27,317 epoch 5 - iter 22/24 - loss 0.63255168 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 01:43:27,422 epoch 5 - iter 24/24 - loss 0.63372167 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 01:43:27,424 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:27,424 EPOCH 5 done: loss 0.6337 - lr 0.0200000\n",
-      "2021-09-08 01:43:27,485 DEV : loss 0.6365988254547119 - score 0.0\n",
-      "2021-09-08 01:43:27,486 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:43:27,487 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:27,607 epoch 6 - iter 2/24 - loss 0.64345765 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 01:43:27,712 epoch 6 - iter 4/24 - loss 0.64021347 - samples/sec: 19.20 - lr: 0.020000\n"
+      "2021-09-21 21:12:07,882 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:08,079 epoch 3 - iter 2/24 - loss 0.64887878 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,216 epoch 3 - iter 4/24 - loss 0.63737226 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,345 epoch 3 - iter 6/24 - loss 0.63951776 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,478 epoch 3 - iter 8/24 - loss 0.63723041 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,608 epoch 3 - iter 10/24 - loss 0.64000319 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,744 epoch 3 - iter 12/24 - loss 0.63982393 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 21:12:08,935 epoch 3 - iter 14/24 - loss 0.64446045 - samples/sec: 10.54 - lr: 0.020000\n",
+      "2021-09-21 21:12:09,098 epoch 3 - iter 16/24 - loss 0.64410709 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 21:12:09,248 epoch 3 - iter 18/24 - loss 0.64430731 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 21:12:09,397 epoch 3 - iter 20/24 - loss 0.64426559 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 21:12:09,534 epoch 3 - iter 22/24 - loss 0.64454454 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 21:12:09,694 epoch 3 - iter 24/24 - loss 0.64401225 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 21:12:09,695 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:09,695 EPOCH 3 done: loss 0.6440 - lr 0.0200000\n",
+      "2021-09-21 21:12:09,798 DEV : loss 0.6365320682525635 - score 0.3333\n",
+      "2021-09-21 21:12:09,799 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:12:09,801 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:09,993 epoch 4 - iter 2/24 - loss 0.65314850 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 21:12:10,146 epoch 4 - iter 4/24 - loss 0.64643249 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 21:12:10,278 epoch 4 - iter 6/24 - loss 0.64685698 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 21:12:10,422 epoch 4 - iter 8/24 - loss 0.64381924 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 21:12:10,565 epoch 4 - iter 10/24 - loss 0.64015943 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:12:10,699 epoch 4 - iter 12/24 - loss 0.63960046 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 21:12:10,835 epoch 4 - iter 14/24 - loss 0.63984424 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 21:12:10,980 epoch 4 - iter 16/24 - loss 0.63906144 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:12:11,099 epoch 4 - iter 18/24 - loss 0.63856055 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 21:12:11,261 epoch 4 - iter 20/24 - loss 0.63922720 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:12:11,406 epoch 4 - iter 22/24 - loss 0.63762283 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 21:12:11,563 epoch 4 - iter 24/24 - loss 0.63825024 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 21:12:11,564 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:11,564 EPOCH 4 done: loss 0.6383 - lr 0.0200000\n",
+      "2021-09-21 21:12:11,673 DEV : loss 0.6367926597595215 - score 0.3333\n",
+      "2021-09-21 21:12:11,674 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:12:11,676 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:11,865 epoch 5 - iter 2/24 - loss 0.63250163 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:12:12,029 epoch 5 - iter 4/24 - loss 0.63946107 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:12:12,161 epoch 5 - iter 6/24 - loss 0.63564003 - samples/sec: 15.21 - lr: 0.020000\n",
+      "2021-09-21 21:12:12,307 epoch 5 - iter 8/24 - loss 0.63772132 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 21:12:12,450 epoch 5 - iter 10/24 - loss 0.64040368 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 21:12:12,630 epoch 5 - iter 12/24 - loss 0.64209463 - samples/sec: 11.15 - lr: 0.020000\n",
+      "2021-09-21 21:12:12,781 epoch 5 - iter 14/24 - loss 0.63887745 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 21:12:12,958 epoch 5 - iter 16/24 - loss 0.63621347 - samples/sec: 11.35 - lr: 0.020000\n",
+      "2021-09-21 21:12:13,132 epoch 5 - iter 18/24 - loss 0.63802396 - samples/sec: 11.52 - lr: 0.020000\n",
+      "2021-09-21 21:12:13,288 epoch 5 - iter 20/24 - loss 0.63731723 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 21:12:13,436 epoch 5 - iter 22/24 - loss 0.63860637 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 21:12:13,596 epoch 5 - iter 24/24 - loss 0.63802989 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:12:13,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:13,597 EPOCH 5 done: loss 0.6380 - lr 0.0200000\n",
+      "2021-09-21 21:12:13,679 DEV : loss 0.6369102001190186 - score 0.6667\n",
+      "2021-09-21 21:12:13,682 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:12:17,284 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:27,817 epoch 6 - iter 6/24 - loss 0.63592299 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 01:43:27,921 epoch 6 - iter 8/24 - loss 0.63682421 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,026 epoch 6 - iter 10/24 - loss 0.63726522 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,130 epoch 6 - iter 12/24 - loss 0.63754166 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,234 epoch 6 - iter 14/24 - loss 0.63644360 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,338 epoch 6 - iter 16/24 - loss 0.63678388 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,447 epoch 6 - iter 18/24 - loss 0.63634651 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,551 epoch 6 - iter 20/24 - loss 0.63799483 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,656 epoch 6 - iter 22/24 - loss 0.63730770 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,766 epoch 6 - iter 24/24 - loss 0.63656270 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 01:43:28,767 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:28,767 EPOCH 6 done: loss 0.6366 - lr 0.0200000\n",
-      "2021-09-08 01:43:28,923 DEV : loss 0.6365501880645752 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:43:28,923 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:43:29,025 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:29,144 epoch 7 - iter 2/24 - loss 0.65361363 - samples/sec: 19.23 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,249 epoch 7 - iter 4/24 - loss 0.64785680 - samples/sec: 19.18 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,353 epoch 7 - iter 6/24 - loss 0.64163977 - samples/sec: 19.28 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,458 epoch 7 - iter 8/24 - loss 0.63914054 - samples/sec: 19.27 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,562 epoch 7 - iter 10/24 - loss 0.63834221 - samples/sec: 19.34 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,670 epoch 7 - iter 12/24 - loss 0.63696726 - samples/sec: 18.60 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,775 epoch 7 - iter 14/24 - loss 0.63603225 - samples/sec: 19.14 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,885 epoch 7 - iter 16/24 - loss 0.63885741 - samples/sec: 18.26 - lr: 0.010000\n",
-      "2021-09-08 01:43:29,992 epoch 7 - iter 18/24 - loss 0.63862252 - samples/sec: 18.85 - lr: 0.010000\n",
-      "2021-09-08 01:43:30,096 epoch 7 - iter 20/24 - loss 0.63783053 - samples/sec: 19.30 - lr: 0.010000\n",
-      "2021-09-08 01:43:30,201 epoch 7 - iter 22/24 - loss 0.63928759 - samples/sec: 19.14 - lr: 0.010000\n",
-      "2021-09-08 01:43:30,307 epoch 7 - iter 24/24 - loss 0.64050401 - samples/sec: 18.93 - lr: 0.010000\n",
-      "2021-09-08 01:43:30,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:30,309 EPOCH 7 done: loss 0.6405 - lr 0.0100000\n",
-      "2021-09-08 01:43:31,236 DEV : loss 0.6365354061126709 - score 0.0\n",
-      "2021-09-08 01:43:31,237 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:43:31,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:31,374 epoch 8 - iter 2/24 - loss 0.63501781 - samples/sec: 16.99 - lr: 0.010000\n",
-      "2021-09-08 01:43:31,490 epoch 8 - iter 4/24 - loss 0.64600629 - samples/sec: 17.30 - lr: 0.010000\n",
-      "2021-09-08 01:43:31,606 epoch 8 - iter 6/24 - loss 0.64642032 - samples/sec: 17.33 - lr: 0.010000\n",
-      "2021-09-08 01:43:31,722 epoch 8 - iter 8/24 - loss 0.64619292 - samples/sec: 17.36 - lr: 0.010000\n",
-      "2021-09-08 01:43:31,851 epoch 8 - iter 10/24 - loss 0.64444977 - samples/sec: 15.61 - lr: 0.010000\n",
-      "2021-09-08 01:43:31,970 epoch 8 - iter 12/24 - loss 0.64339516 - samples/sec: 16.98 - lr: 0.010000\n",
-      "2021-09-08 01:43:32,101 epoch 8 - iter 14/24 - loss 0.64258201 - samples/sec: 15.33 - lr: 0.010000\n",
-      "2021-09-08 01:43:32,224 epoch 8 - iter 16/24 - loss 0.64061524 - samples/sec: 16.36 - lr: 0.010000\n",
-      "2021-09-08 01:43:32,349 epoch 8 - iter 18/24 - loss 0.63941040 - samples/sec: 16.11 - lr: 0.010000\n",
-      "2021-09-08 01:43:32,463 epoch 8 - iter 20/24 - loss 0.63897237 - samples/sec: 17.68 - lr: 0.010000\n",
-      "2021-09-08 01:43:32,594 epoch 8 - iter 22/24 - loss 0.63882173 - samples/sec: 15.35 - lr: 0.010000\n",
-      "2021-09-08 01:43:32,718 epoch 8 - iter 24/24 - loss 0.63788490 - samples/sec: 16.16 - lr: 0.010000\n",
-      "2021-09-08 01:43:32,719 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:32,720 EPOCH 8 done: loss 0.6379 - lr 0.0100000\n",
-      "2021-09-08 01:43:32,792 DEV : loss 0.6365883350372314 - score 0.3333\n",
-      "2021-09-08 01:43:32,793 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:43:32,795 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:32,939 epoch 9 - iter 2/24 - loss 0.62589183 - samples/sec: 15.65 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,063 epoch 9 - iter 4/24 - loss 0.63045211 - samples/sec: 16.34 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,182 epoch 9 - iter 6/24 - loss 0.63005015 - samples/sec: 16.82 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,299 epoch 9 - iter 8/24 - loss 0.63205536 - samples/sec: 17.20 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,414 epoch 9 - iter 10/24 - loss 0.63247905 - samples/sec: 17.47 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,544 epoch 9 - iter 12/24 - loss 0.63554751 - samples/sec: 15.52 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,663 epoch 9 - iter 14/24 - loss 0.63502788 - samples/sec: 16.92 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,782 epoch 9 - iter 16/24 - loss 0.63478659 - samples/sec: 16.82 - lr: 0.010000\n",
-      "2021-09-08 01:43:33,892 epoch 9 - iter 18/24 - loss 0.63434898 - samples/sec: 18.20 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,018 epoch 9 - iter 20/24 - loss 0.63310916 - samples/sec: 16.02 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,143 epoch 9 - iter 22/24 - loss 0.63200060 - samples/sec: 16.03 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,262 epoch 9 - iter 24/24 - loss 0.63225439 - samples/sec: 16.87 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,263 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:34,264 EPOCH 9 done: loss 0.6323 - lr 0.0100000\n",
-      "2021-09-08 01:43:34,335 DEV : loss 0.6365259885787964 - score 0.3333\n",
-      "2021-09-08 01:43:34,336 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:43:34,339 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:34,480 epoch 10 - iter 2/24 - loss 0.63752133 - samples/sec: 16.45 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,603 epoch 10 - iter 4/24 - loss 0.63595493 - samples/sec: 16.37 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,726 epoch 10 - iter 6/24 - loss 0.63250418 - samples/sec: 16.25 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,854 epoch 10 - iter 8/24 - loss 0.63902099 - samples/sec: 15.70 - lr: 0.010000\n",
-      "2021-09-08 01:43:34,977 epoch 10 - iter 10/24 - loss 0.63808714 - samples/sec: 16.41 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,096 epoch 10 - iter 12/24 - loss 0.64105589 - samples/sec: 16.80 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,219 epoch 10 - iter 14/24 - loss 0.64227799 - samples/sec: 16.42 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,338 epoch 10 - iter 16/24 - loss 0.64063388 - samples/sec: 16.84 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,448 epoch 10 - iter 18/24 - loss 0.63906562 - samples/sec: 18.24 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,578 epoch 10 - iter 20/24 - loss 0.63974195 - samples/sec: 15.50 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,698 epoch 10 - iter 22/24 - loss 0.63988552 - samples/sec: 16.75 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,813 epoch 10 - iter 24/24 - loss 0.64073530 - samples/sec: 17.51 - lr: 0.010000\n",
-      "2021-09-08 01:43:35,814 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:35,814 EPOCH 10 done: loss 0.6407 - lr 0.0100000\n",
-      "2021-09-08 01:43:35,883 DEV : loss 0.636676549911499 - score 0.3333\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:43:35,883 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:43:39,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:39,605 Testing using best model ...\n",
-      "2021-09-08 01:43:39,606 loading file None1/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:43:45,134 \t0.3333\n"
+      "2021-09-21 21:12:17,447 epoch 6 - iter 2/24 - loss 0.61745849 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 21:12:17,605 epoch 6 - iter 4/24 - loss 0.63006422 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 21:12:17,747 epoch 6 - iter 6/24 - loss 0.63836197 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:12:17,889 epoch 6 - iter 8/24 - loss 0.63758851 - samples/sec: 14.13 - lr: 0.020000\n",
+      "2021-09-21 21:12:17,999 epoch 6 - iter 10/24 - loss 0.63862590 - samples/sec: 18.28 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,105 epoch 6 - iter 12/24 - loss 0.63925752 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,231 epoch 6 - iter 14/24 - loss 0.64037373 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,372 epoch 6 - iter 16/24 - loss 0.64041195 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,508 epoch 6 - iter 18/24 - loss 0.64033503 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,645 epoch 6 - iter 20/24 - loss 0.63931781 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,782 epoch 6 - iter 22/24 - loss 0.63967372 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,933 epoch 6 - iter 24/24 - loss 0.63819120 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:12:18,934 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:18,935 EPOCH 6 done: loss 0.6382 - lr 0.0200000\n",
+      "2021-09-21 21:12:19,029 DEV : loss 0.6365169286727905 - score 0.3333\n",
+      "2021-09-21 21:12:19,030 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:12:19,032 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:19,208 epoch 7 - iter 2/24 - loss 0.64010334 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 21:12:19,347 epoch 7 - iter 4/24 - loss 0.63788772 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 21:12:19,501 epoch 7 - iter 6/24 - loss 0.63827243 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:12:19,635 epoch 7 - iter 8/24 - loss 0.63800819 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 21:12:19,776 epoch 7 - iter 10/24 - loss 0.64013406 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:12:19,932 epoch 7 - iter 12/24 - loss 0.63998004 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 21:12:20,059 epoch 7 - iter 14/24 - loss 0.63847189 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:12:20,209 epoch 7 - iter 16/24 - loss 0.63819049 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 21:12:20,357 epoch 7 - iter 18/24 - loss 0.63825081 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 21:12:20,537 epoch 7 - iter 20/24 - loss 0.63717729 - samples/sec: 11.13 - lr: 0.020000\n",
+      "2021-09-21 21:12:20,723 epoch 7 - iter 22/24 - loss 0.63562548 - samples/sec: 10.80 - lr: 0.020000\n",
+      "2021-09-21 21:12:20,911 epoch 7 - iter 24/24 - loss 0.63585291 - samples/sec: 10.68 - lr: 0.020000\n",
+      "2021-09-21 21:12:20,912 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:20,912 EPOCH 7 done: loss 0.6359 - lr 0.0200000\n",
+      "2021-09-21 21:12:21,058 DEV : loss 0.6365651488304138 - score 0.3333\n",
+      "2021-09-21 21:12:21,058 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:12:21,060 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:21,282 epoch 8 - iter 2/24 - loss 0.63371319 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 21:12:21,463 epoch 8 - iter 4/24 - loss 0.63515322 - samples/sec: 11.07 - lr: 0.020000\n",
+      "2021-09-21 21:12:21,641 epoch 8 - iter 6/24 - loss 0.63869072 - samples/sec: 11.29 - lr: 0.020000\n",
+      "2021-09-21 21:12:21,802 epoch 8 - iter 8/24 - loss 0.63450849 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 21:12:21,986 epoch 8 - iter 10/24 - loss 0.63620669 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,176 epoch 8 - iter 12/24 - loss 0.63642205 - samples/sec: 10.58 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,331 epoch 8 - iter 14/24 - loss 0.63707321 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,472 epoch 8 - iter 16/24 - loss 0.63739183 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,609 epoch 8 - iter 18/24 - loss 0.63841451 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,774 epoch 8 - iter 20/24 - loss 0.63644490 - samples/sec: 12.18 - lr: 0.020000\n",
+      "2021-09-21 21:12:22,924 epoch 8 - iter 22/24 - loss 0.63690618 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 21:12:23,084 epoch 8 - iter 24/24 - loss 0.63739749 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 21:12:23,085 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:23,085 EPOCH 8 done: loss 0.6374 - lr 0.0200000\n",
+      "2021-09-21 21:12:23,266 DEV : loss 0.6365205645561218 - score 0.6667\n",
+      "2021-09-21 21:12:23,267 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:12:32,511 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:32,669 epoch 9 - iter 2/24 - loss 0.63053206 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 21:12:32,774 epoch 9 - iter 4/24 - loss 0.62918296 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 21:12:32,890 epoch 9 - iter 6/24 - loss 0.63154669 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,012 epoch 9 - iter 8/24 - loss 0.63029849 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,126 epoch 9 - iter 10/24 - loss 0.62790889 - samples/sec: 17.72 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,252 epoch 9 - iter 12/24 - loss 0.62870184 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,360 epoch 9 - iter 14/24 - loss 0.62923072 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,465 epoch 9 - iter 16/24 - loss 0.63088056 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,581 epoch 9 - iter 18/24 - loss 0.63274993 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,704 epoch 9 - iter 20/24 - loss 0.63350497 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,821 epoch 9 - iter 22/24 - loss 0.63223626 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,948 epoch 9 - iter 24/24 - loss 0.63305300 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:12:33,949 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:33,949 EPOCH 9 done: loss 0.6331 - lr 0.0200000\n",
+      "2021-09-21 21:12:35,852 DEV : loss 0.6367690563201904 - score 0.3333\n",
+      "2021-09-21 21:12:35,853 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:12:35,855 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:36,024 epoch 10 - iter 2/24 - loss 0.63516214 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 21:12:36,173 epoch 10 - iter 4/24 - loss 0.64000614 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:12:36,333 epoch 10 - iter 6/24 - loss 0.63447623 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 21:12:36,481 epoch 10 - iter 8/24 - loss 0.63432381 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:12:36,629 epoch 10 - iter 10/24 - loss 0.63574992 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 21:12:36,780 epoch 10 - iter 12/24 - loss 0.63501025 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 21:12:36,916 epoch 10 - iter 14/24 - loss 0.63322066 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,059 epoch 10 - iter 16/24 - loss 0.63316594 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,215 epoch 10 - iter 18/24 - loss 0.63493012 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,359 epoch 10 - iter 20/24 - loss 0.63384239 - samples/sec: 13.89 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,523 epoch 10 - iter 22/24 - loss 0.63420021 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,670 epoch 10 - iter 24/24 - loss 0.63479556 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 21:12:37,671 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:37,672 EPOCH 10 done: loss 0.6348 - lr 0.0200000\n",
+      "2021-09-21 21:12:37,820 DEV : loss 0.6371375322341919 - score 0.0\n",
+      "2021-09-21 21:12:37,821 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:12:41,879 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:41,881 Testing using best model ...\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:45,134 \n",
+      "2021-09-21 21:12:41,882 loading file None1/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:12:47,112 \t0.3333\n",
+      "2021-09-21 21:12:47,113 \n",
       "Results:\n",
       "- F-score (micro) 0.3333\n",
-      "- F-score (macro) 0.3333\n",
+      "- F-score (macro) 0.1667\n",
       "- Accuracy 0.3333\n",
       "\n",
       "By class:\n",
       "                                        precision    recall  f1-score   support\n",
       "\n",
       "This text entails a positive sentiment     0.0000    0.0000    0.0000         1\n",
-      " This text entails a neutral sentiment     1.0000    1.0000    1.0000         1\n",
-      "This text entails a negative sentiment     0.0000    0.0000    0.0000         1\n",
+      " This text entails a neutral sentiment     0.5000    0.5000    0.5000         2\n",
+      "This text entails a negative sentiment     0.0000    0.0000    0.0000         0\n",
       "\n",
       "                             micro avg     0.3333    0.3333    0.3333         3\n",
-      "                             macro avg     0.3333    0.3333    0.3333         3\n",
+      "                             macro avg     0.1667    0.1667    0.1667         3\n",
       "                          weighted avg     0.3333    0.3333    0.3333         3\n",
       "                           samples avg     0.3333    0.3333    0.3333         3\n",
       "\n",
-      "2021-09-08 01:43:45,135 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:01,666 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:12:47,113 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:13,869 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:44:05,764 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:13:18,419 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 26873.80it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 40897.87it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:05,767 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 01:44:05,923 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:05,925 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:13:18,422 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 21:13:18,430 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,432 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4490,24 +4508,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:05,926 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:05,926 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:44:05,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:05,927 Parameters:\n",
-      "2021-09-08 01:44:05,927  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:44:05,927  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:44:05,928  - patience: \"3\"\n",
-      "2021-09-08 01:44:05,928  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:44:05,928  - max_epochs: \"10\"\n",
-      "2021-09-08 01:44:05,928  - shuffle: \"True\"\n",
-      "2021-09-08 01:44:05,929  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:44:05,929  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:44:05,929 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:05,930 Model training base path: \"None1\"\n",
-      "2021-09-08 01:44:05,930 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:05,930 Device: cuda:1\n",
-      "2021-09-08 01:44:05,930 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:05,931 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:13:18,433 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,433 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:13:18,433 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,434 Parameters:\n",
+      "2021-09-21 21:13:18,434  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:13:18,434  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:13:18,434  - patience: \"3\"\n",
+      "2021-09-21 21:13:18,435  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:13:18,435  - max_epochs: \"10\"\n",
+      "2021-09-21 21:13:18,435  - shuffle: \"True\"\n",
+      "2021-09-21 21:13:18,435  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:13:18,436  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:13:18,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,436 Model training base path: \"None1\"\n",
+      "2021-09-21 21:13:18,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,437 Device: cuda:0\n",
+      "2021-09-21 21:13:18,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,437 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:13:18,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,610 epoch 1 - iter 2/24 - loss 0.60926500 - samples/sec: 14.97 - lr: 0.020000\n"
      ]
     },
     {
@@ -4521,236 +4541,235 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:06,106 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:06,242 epoch 1 - iter 2/24 - loss 0.63519093 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 01:44:06,362 epoch 1 - iter 4/24 - loss 0.63920321 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 01:44:06,479 epoch 1 - iter 6/24 - loss 0.63649066 - samples/sec: 17.16 - lr: 0.020000\n",
-      "2021-09-08 01:44:06,602 epoch 1 - iter 8/24 - loss 0.63665318 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:44:06,738 epoch 1 - iter 10/24 - loss 0.64039026 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 01:44:06,869 epoch 1 - iter 12/24 - loss 0.64020487 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 01:44:06,999 epoch 1 - iter 14/24 - loss 0.63667800 - samples/sec: 15.51 - lr: 0.020000\n",
-      "2021-09-08 01:44:07,121 epoch 1 - iter 16/24 - loss 0.63436997 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 01:44:07,252 epoch 1 - iter 18/24 - loss 0.63322773 - samples/sec: 15.32 - lr: 0.020000\n",
-      "2021-09-08 01:44:07,389 epoch 1 - iter 20/24 - loss 0.63384084 - samples/sec: 14.76 - lr: 0.020000\n",
-      "2021-09-08 01:44:07,524 epoch 1 - iter 22/24 - loss 0.63300778 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 01:44:07,653 epoch 1 - iter 24/24 - loss 0.63378512 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:44:07,654 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:07,655 EPOCH 1 done: loss 0.6338 - lr 0.0200000\n",
-      "2021-09-08 01:44:07,827 DEV : loss 0.6373426914215088 - score 0.0\n",
-      "2021-09-08 01:44:07,827 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:44:13,370 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:13,641 epoch 2 - iter 2/24 - loss 0.63177511 - samples/sec: 8.02 - lr: 0.020000\n",
-      "2021-09-08 01:44:13,765 epoch 2 - iter 4/24 - loss 0.63903955 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 01:44:13,880 epoch 2 - iter 6/24 - loss 0.64066152 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,000 epoch 2 - iter 8/24 - loss 0.64055396 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,131 epoch 2 - iter 10/24 - loss 0.63825353 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,252 epoch 2 - iter 12/24 - loss 0.63825142 - samples/sec: 16.53 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,379 epoch 2 - iter 14/24 - loss 0.63816860 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,493 epoch 2 - iter 16/24 - loss 0.63683310 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,606 epoch 2 - iter 18/24 - loss 0.63787510 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,735 epoch 2 - iter 20/24 - loss 0.63629164 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,871 epoch 2 - iter 22/24 - loss 0.63615030 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 01:44:15,002 epoch 2 - iter 24/24 - loss 0.63566449 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 01:44:15,003 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:15,003 EPOCH 2 done: loss 0.6357 - lr 0.0200000\n",
-      "2021-09-08 01:44:15,076 DEV : loss 0.6366764307022095 - score 0.0\n",
-      "2021-09-08 01:44:15,077 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:13:18,826 epoch 1 - iter 4/24 - loss 0.62857077 - samples/sec: 9.30 - lr: 0.020000\n",
+      "2021-09-21 21:13:18,957 epoch 1 - iter 6/24 - loss 0.63026896 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,064 epoch 1 - iter 8/24 - loss 0.63210157 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,170 epoch 1 - iter 10/24 - loss 0.63639687 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,279 epoch 1 - iter 12/24 - loss 0.63851524 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,417 epoch 1 - iter 14/24 - loss 0.63744688 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,559 epoch 1 - iter 16/24 - loss 0.63679960 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,702 epoch 1 - iter 18/24 - loss 0.63648297 - samples/sec: 13.99 - lr: 0.020000\n",
+      "2021-09-21 21:13:19,848 epoch 1 - iter 20/24 - loss 0.63456359 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 21:13:20,007 epoch 1 - iter 22/24 - loss 0.63506769 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 21:13:20,154 epoch 1 - iter 24/24 - loss 0.63443789 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 21:13:20,155 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:20,156 EPOCH 1 done: loss 0.6344 - lr 0.0200000\n",
+      "2021-09-21 21:13:20,349 DEV : loss 0.6365970373153687 - score 0.6667\n",
+      "2021-09-21 21:13:20,350 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:44:19,559 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:19,719 epoch 3 - iter 2/24 - loss 0.63955402 - samples/sec: 15.17 - lr: 0.020000\n",
-      "2021-09-08 01:44:19,836 epoch 3 - iter 4/24 - loss 0.63329141 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 01:44:19,948 epoch 3 - iter 6/24 - loss 0.64089505 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,068 epoch 3 - iter 8/24 - loss 0.64263274 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,188 epoch 3 - iter 10/24 - loss 0.64362962 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,313 epoch 3 - iter 12/24 - loss 0.64372469 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,437 epoch 3 - iter 14/24 - loss 0.64163741 - samples/sec: 16.11 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,557 epoch 3 - iter 16/24 - loss 0.64245773 - samples/sec: 16.85 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,682 epoch 3 - iter 18/24 - loss 0.64170982 - samples/sec: 15.99 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,796 epoch 3 - iter 20/24 - loss 0.64209858 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,919 epoch 3 - iter 22/24 - loss 0.64134380 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 01:44:21,050 epoch 3 - iter 24/24 - loss 0.64118593 - samples/sec: 15.39 - lr: 0.020000\n",
-      "2021-09-08 01:44:21,051 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:21,051 EPOCH 3 done: loss 0.6412 - lr 0.0200000\n",
-      "2021-09-08 01:44:21,117 DEV : loss 0.6366477012634277 - score 0.0\n",
-      "2021-09-08 01:44:21,118 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:13:25,231 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:25,481 epoch 2 - iter 2/24 - loss 0.64239377 - samples/sec: 9.69 - lr: 0.020000\n",
+      "2021-09-21 21:13:25,683 epoch 2 - iter 4/24 - loss 0.65177901 - samples/sec: 9.94 - lr: 0.020000\n",
+      "2021-09-21 21:13:25,853 epoch 2 - iter 6/24 - loss 0.64129071 - samples/sec: 11.81 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,035 epoch 2 - iter 8/24 - loss 0.63914562 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,205 epoch 2 - iter 10/24 - loss 0.64252373 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,378 epoch 2 - iter 12/24 - loss 0.64479199 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,549 epoch 2 - iter 14/24 - loss 0.64160436 - samples/sec: 11.70 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,688 epoch 2 - iter 16/24 - loss 0.63879372 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,850 epoch 2 - iter 18/24 - loss 0.64008741 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,001 epoch 2 - iter 20/24 - loss 0.63870215 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,149 epoch 2 - iter 22/24 - loss 0.63953457 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,291 epoch 2 - iter 24/24 - loss 0.63948249 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:27,293 EPOCH 2 done: loss 0.6395 - lr 0.0200000\n",
+      "2021-09-21 21:13:27,512 DEV : loss 0.6365600824356079 - score 0.6667\n",
+      "2021-09-21 21:13:27,516 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:44:25,305 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:25,433 epoch 4 - iter 2/24 - loss 0.63770381 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 01:44:25,537 epoch 4 - iter 4/24 - loss 0.64435649 - samples/sec: 19.35 - lr: 0.020000\n",
-      "2021-09-08 01:44:25,640 epoch 4 - iter 6/24 - loss 0.63942374 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 01:44:25,744 epoch 4 - iter 8/24 - loss 0.63582800 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:44:25,853 epoch 4 - iter 10/24 - loss 0.63678290 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:44:25,974 epoch 4 - iter 12/24 - loss 0.64010126 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:44:26,088 epoch 4 - iter 14/24 - loss 0.64044380 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:44:26,198 epoch 4 - iter 16/24 - loss 0.63938911 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 01:44:26,304 epoch 4 - iter 18/24 - loss 0.64042741 - samples/sec: 19.09 - lr: 0.020000\n",
-      "2021-09-08 01:44:26,414 epoch 4 - iter 20/24 - loss 0.64044793 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 01:44:26,526 epoch 4 - iter 22/24 - loss 0.63984256 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:44:26,633 epoch 4 - iter 24/24 - loss 0.63827784 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 01:44:26,634 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:26,635 EPOCH 4 done: loss 0.6383 - lr 0.0200000\n",
-      "2021-09-08 01:44:26,701 DEV : loss 0.6365256309509277 - score 0.3333\n",
-      "2021-09-08 01:44:26,702 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:44:30,623 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:30,770 epoch 5 - iter 2/24 - loss 0.63373154 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 01:44:30,888 epoch 5 - iter 4/24 - loss 0.62881938 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 01:44:30,989 epoch 5 - iter 6/24 - loss 0.63386964 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,090 epoch 5 - iter 8/24 - loss 0.63328034 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,201 epoch 5 - iter 10/24 - loss 0.63292434 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,306 epoch 5 - iter 12/24 - loss 0.63254265 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,418 epoch 5 - iter 14/24 - loss 0.63278309 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,522 epoch 5 - iter 16/24 - loss 0.63354184 - samples/sec: 19.29 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,641 epoch 5 - iter 18/24 - loss 0.63268480 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,739 epoch 5 - iter 20/24 - loss 0.63369580 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,861 epoch 5 - iter 22/24 - loss 0.63216659 - samples/sec: 16.53 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,967 epoch 5 - iter 24/24 - loss 0.63474289 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 01:44:31,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:31,969 EPOCH 5 done: loss 0.6347 - lr 0.0200000\n",
-      "2021-09-08 01:44:32,154 DEV : loss 0.6366846561431885 - score 0.0\n",
-      "2021-09-08 01:44:32,155 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:44:32,228 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:13:38,722 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:38,849 epoch 3 - iter 2/24 - loss 0.63091624 - samples/sec: 18.33 - lr: 0.020000\n",
+      "2021-09-21 21:13:38,960 epoch 3 - iter 4/24 - loss 0.63437980 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,069 epoch 3 - iter 6/24 - loss 0.63292538 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,178 epoch 3 - iter 8/24 - loss 0.63154587 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,277 epoch 3 - iter 10/24 - loss 0.63274370 - samples/sec: 20.33 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,372 epoch 3 - iter 12/24 - loss 0.63218083 - samples/sec: 21.17 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,463 epoch 3 - iter 14/24 - loss 0.63229106 - samples/sec: 22.14 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,554 epoch 3 - iter 16/24 - loss 0.63203904 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,647 epoch 3 - iter 18/24 - loss 0.63292665 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,739 epoch 3 - iter 20/24 - loss 0.63408796 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,830 epoch 3 - iter 22/24 - loss 0.63235233 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,921 epoch 3 - iter 24/24 - loss 0.63143189 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 21:13:39,922 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:39,923 EPOCH 3 done: loss 0.6314 - lr 0.0200000\n",
+      "2021-09-21 21:13:43,982 DEV : loss 0.6367173194885254 - score 0.6667\n",
+      "2021-09-21 21:13:43,983 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:44,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:44,231 epoch 4 - iter 2/24 - loss 0.63023454 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 21:13:44,370 epoch 4 - iter 4/24 - loss 0.62504934 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 21:13:44,496 epoch 4 - iter 6/24 - loss 0.63195874 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 21:13:44,626 epoch 4 - iter 8/24 - loss 0.63070416 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:13:44,767 epoch 4 - iter 10/24 - loss 0.62651547 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 21:13:44,889 epoch 4 - iter 12/24 - loss 0.62818419 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,006 epoch 4 - iter 14/24 - loss 0.62917539 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,143 epoch 4 - iter 16/24 - loss 0.62895645 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,263 epoch 4 - iter 18/24 - loss 0.63076257 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,393 epoch 4 - iter 20/24 - loss 0.63159176 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,513 epoch 4 - iter 22/24 - loss 0.63172642 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,636 epoch 4 - iter 24/24 - loss 0.63247525 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,637 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:45,637 EPOCH 4 done: loss 0.6325 - lr 0.0200000\n",
+      "2021-09-21 21:13:45,782 DEV : loss 0.6366326808929443 - score 0.6667\n",
+      "2021-09-21 21:13:45,783 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:13:45,928 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:46,078 epoch 5 - iter 2/24 - loss 0.61655611 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,201 epoch 5 - iter 4/24 - loss 0.62771302 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,327 epoch 5 - iter 6/24 - loss 0.63537470 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,445 epoch 5 - iter 8/24 - loss 0.63146784 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,590 epoch 5 - iter 10/24 - loss 0.63461292 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,709 epoch 5 - iter 12/24 - loss 0.63367186 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,826 epoch 5 - iter 14/24 - loss 0.63329874 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,948 epoch 5 - iter 16/24 - loss 0.63362205 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,066 epoch 5 - iter 18/24 - loss 0.63459704 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,200 epoch 5 - iter 20/24 - loss 0.63781659 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,317 epoch 5 - iter 22/24 - loss 0.63591473 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,432 epoch 5 - iter 24/24 - loss 0.63590120 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,433 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:47,433 EPOCH 5 done: loss 0.6359 - lr 0.0200000\n",
+      "2021-09-21 21:13:47,512 DEV : loss 0.6365262866020203 - score 0.3333\n",
+      "2021-09-21 21:13:47,513 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:13:47,533 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:47,648 epoch 6 - iter 2/24 - loss 0.62418735 - samples/sec: 19.78 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,750 epoch 6 - iter 4/24 - loss 0.62298563 - samples/sec: 19.66 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:32,351 epoch 6 - iter 2/24 - loss 0.62862533 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:44:32,455 epoch 6 - iter 4/24 - loss 0.62969095 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 01:44:32,560 epoch 6 - iter 6/24 - loss 0.63139584 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:44:32,656 epoch 6 - iter 8/24 - loss 0.63065921 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 01:44:32,759 epoch 6 - iter 10/24 - loss 0.62571866 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 01:44:32,859 epoch 6 - iter 12/24 - loss 0.62905105 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 01:44:32,969 epoch 6 - iter 14/24 - loss 0.62621798 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 01:44:33,077 epoch 6 - iter 16/24 - loss 0.62746528 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:44:33,180 epoch 6 - iter 18/24 - loss 0.62956996 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:44:33,273 epoch 6 - iter 20/24 - loss 0.62839147 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 01:44:33,378 epoch 6 - iter 22/24 - loss 0.62926520 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:44:33,483 epoch 6 - iter 24/24 - loss 0.62894599 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:44:33,484 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:33,485 EPOCH 6 done: loss 0.6289 - lr 0.0200000\n",
-      "2021-09-08 01:44:33,732 DEV : loss 0.6367440223693848 - score 0.3333\n",
-      "2021-09-08 01:44:33,735 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:44:33,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:33,862 epoch 7 - iter 2/24 - loss 0.64949563 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 01:44:33,972 epoch 7 - iter 4/24 - loss 0.64572710 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,080 epoch 7 - iter 6/24 - loss 0.63920604 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,180 epoch 7 - iter 8/24 - loss 0.63684544 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,284 epoch 7 - iter 10/24 - loss 0.63799752 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,392 epoch 7 - iter 12/24 - loss 0.63883497 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,504 epoch 7 - iter 14/24 - loss 0.63681454 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,601 epoch 7 - iter 16/24 - loss 0.63653612 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,704 epoch 7 - iter 18/24 - loss 0.63588572 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,806 epoch 7 - iter 20/24 - loss 0.63329434 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 01:44:34,911 epoch 7 - iter 22/24 - loss 0.63268898 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,026 epoch 7 - iter 24/24 - loss 0.63352431 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,027 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:35,028 EPOCH 7 done: loss 0.6335 - lr 0.0200000\n",
-      "2021-09-08 01:44:35,098 DEV : loss 0.636554479598999 - score 0.0\n",
-      "2021-09-08 01:44:35,099 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:44:35,103 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:35,227 epoch 8 - iter 2/24 - loss 0.66730598 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,331 epoch 8 - iter 4/24 - loss 0.64940295 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,448 epoch 8 - iter 6/24 - loss 0.64772029 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,567 epoch 8 - iter 8/24 - loss 0.64287223 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,690 epoch 8 - iter 10/24 - loss 0.63955561 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,798 epoch 8 - iter 12/24 - loss 0.63905858 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 01:44:35,907 epoch 8 - iter 14/24 - loss 0.63841650 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 01:44:36,010 epoch 8 - iter 16/24 - loss 0.63851465 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 01:44:36,123 epoch 8 - iter 18/24 - loss 0.63890827 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:44:36,223 epoch 8 - iter 20/24 - loss 0.63845141 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 01:44:36,322 epoch 8 - iter 22/24 - loss 0.63887303 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 01:44:36,424 epoch 8 - iter 24/24 - loss 0.63799461 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 01:44:36,425 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:36,426 EPOCH 8 done: loss 0.6380 - lr 0.0200000\n",
-      "2021-09-08 01:44:36,502 DEV : loss 0.6365698575973511 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:44:36,503 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:44:36,507 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:36,625 epoch 9 - iter 2/24 - loss 0.64721170 - samples/sec: 19.87 - lr: 0.010000\n",
-      "2021-09-08 01:44:36,723 epoch 9 - iter 4/24 - loss 0.64332868 - samples/sec: 20.70 - lr: 0.010000\n",
-      "2021-09-08 01:44:36,828 epoch 9 - iter 6/24 - loss 0.63558804 - samples/sec: 19.11 - lr: 0.010000\n",
-      "2021-09-08 01:44:36,931 epoch 9 - iter 8/24 - loss 0.64241906 - samples/sec: 19.57 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,033 epoch 9 - iter 10/24 - loss 0.63947213 - samples/sec: 19.85 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,135 epoch 9 - iter 12/24 - loss 0.63831455 - samples/sec: 19.73 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,236 epoch 9 - iter 14/24 - loss 0.64226097 - samples/sec: 20.00 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,333 epoch 9 - iter 16/24 - loss 0.64202892 - samples/sec: 20.60 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,428 epoch 9 - iter 18/24 - loss 0.64243032 - samples/sec: 21.39 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,523 epoch 9 - iter 20/24 - loss 0.64348910 - samples/sec: 21.09 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,625 epoch 9 - iter 22/24 - loss 0.64390482 - samples/sec: 19.68 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,722 epoch 9 - iter 24/24 - loss 0.64306804 - samples/sec: 20.86 - lr: 0.010000\n",
-      "2021-09-08 01:44:37,724 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:37,724 EPOCH 9 done: loss 0.6431 - lr 0.0100000\n",
-      "2021-09-08 01:44:37,797 DEV : loss 0.6366032958030701 - score 0.3333\n",
-      "2021-09-08 01:44:37,798 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:44:37,800 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:37,928 epoch 10 - iter 2/24 - loss 0.62636766 - samples/sec: 18.18 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,032 epoch 10 - iter 4/24 - loss 0.63207379 - samples/sec: 19.36 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,134 epoch 10 - iter 6/24 - loss 0.63730939 - samples/sec: 19.82 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,232 epoch 10 - iter 8/24 - loss 0.64005375 - samples/sec: 20.51 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,336 epoch 10 - iter 10/24 - loss 0.63801755 - samples/sec: 19.55 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,437 epoch 10 - iter 12/24 - loss 0.63824792 - samples/sec: 19.92 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,541 epoch 10 - iter 14/24 - loss 0.63789132 - samples/sec: 19.47 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,651 epoch 10 - iter 16/24 - loss 0.63742351 - samples/sec: 18.17 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,750 epoch 10 - iter 18/24 - loss 0.63786284 - samples/sec: 20.29 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,855 epoch 10 - iter 20/24 - loss 0.63906524 - samples/sec: 19.27 - lr: 0.010000\n",
-      "2021-09-08 01:44:38,959 epoch 10 - iter 22/24 - loss 0.63945796 - samples/sec: 19.18 - lr: 0.010000\n",
-      "2021-09-08 01:44:39,067 epoch 10 - iter 24/24 - loss 0.63936968 - samples/sec: 18.61 - lr: 0.010000\n",
-      "2021-09-08 01:44:39,068 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:39,069 EPOCH 10 done: loss 0.6394 - lr 0.0100000\n",
-      "2021-09-08 01:44:39,149 DEV : loss 0.6366710662841797 - score 0.0\n",
-      "2021-09-08 01:44:39,150 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:44:43,341 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:43,342 Testing using best model ...\n"
+      "2021-09-21 21:13:47,850 epoch 6 - iter 6/24 - loss 0.63107493 - samples/sec: 20.20 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,950 epoch 6 - iter 8/24 - loss 0.63846882 - samples/sec: 20.04 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,048 epoch 6 - iter 10/24 - loss 0.63843389 - samples/sec: 20.57 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,149 epoch 6 - iter 12/24 - loss 0.63969716 - samples/sec: 20.03 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,251 epoch 6 - iter 14/24 - loss 0.64040944 - samples/sec: 19.67 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,361 epoch 6 - iter 16/24 - loss 0.64038200 - samples/sec: 18.32 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,460 epoch 6 - iter 18/24 - loss 0.64106224 - samples/sec: 20.20 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,558 epoch 6 - iter 20/24 - loss 0.64152884 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,663 epoch 6 - iter 22/24 - loss 0.64099890 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,763 epoch 6 - iter 24/24 - loss 0.64216842 - samples/sec: 20.13 - lr: 0.020000\n",
+      "2021-09-21 21:13:48,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:48,765 EPOCH 6 done: loss 0.6422 - lr 0.0200000\n",
+      "2021-09-21 21:13:48,920 DEV : loss 0.6365677714347839 - score 0.6667\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:13:48,921 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:13:48,994 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:49,112 epoch 7 - iter 2/24 - loss 0.63841888 - samples/sec: 19.74 - lr: 0.010000\n",
+      "2021-09-21 21:13:49,223 epoch 7 - iter 4/24 - loss 0.64171691 - samples/sec: 18.16 - lr: 0.010000\n",
+      "2021-09-21 21:13:49,341 epoch 7 - iter 6/24 - loss 0.64161777 - samples/sec: 17.09 - lr: 0.010000\n",
+      "2021-09-21 21:13:49,455 epoch 7 - iter 8/24 - loss 0.63682474 - samples/sec: 17.63 - lr: 0.010000\n",
+      "2021-09-21 21:13:49,573 epoch 7 - iter 10/24 - loss 0.63629894 - samples/sec: 17.05 - lr: 0.010000\n",
+      "2021-09-21 21:13:49,704 epoch 7 - iter 12/24 - loss 0.63510905 - samples/sec: 15.28 - lr: 0.010000\n",
+      "2021-09-21 21:13:49,820 epoch 7 - iter 14/24 - loss 0.63486238 - samples/sec: 17.36 - lr: 0.010000\n",
+      "2021-09-21 21:13:49,942 epoch 7 - iter 16/24 - loss 0.63555731 - samples/sec: 16.43 - lr: 0.010000\n",
+      "2021-09-21 21:13:50,057 epoch 7 - iter 18/24 - loss 0.63603194 - samples/sec: 17.50 - lr: 0.010000\n",
+      "2021-09-21 21:13:50,183 epoch 7 - iter 20/24 - loss 0.63708464 - samples/sec: 15.90 - lr: 0.010000\n",
+      "2021-09-21 21:13:50,300 epoch 7 - iter 22/24 - loss 0.63481089 - samples/sec: 17.17 - lr: 0.010000\n",
+      "2021-09-21 21:13:50,419 epoch 7 - iter 24/24 - loss 0.63745893 - samples/sec: 17.01 - lr: 0.010000\n",
+      "2021-09-21 21:13:50,420 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:50,420 EPOCH 7 done: loss 0.6375 - lr 0.0100000\n",
+      "2021-09-21 21:13:50,579 DEV : loss 0.636551558971405 - score 0.6667\n",
+      "2021-09-21 21:13:50,580 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:13:54,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:54,601 epoch 8 - iter 2/24 - loss 0.62449938 - samples/sec: 12.55 - lr: 0.010000\n",
+      "2021-09-21 21:13:54,754 epoch 8 - iter 4/24 - loss 0.62506814 - samples/sec: 13.08 - lr: 0.010000\n",
+      "2021-09-21 21:13:54,880 epoch 8 - iter 6/24 - loss 0.63610059 - samples/sec: 15.94 - lr: 0.010000\n",
+      "2021-09-21 21:13:55,006 epoch 8 - iter 8/24 - loss 0.63489464 - samples/sec: 16.01 - lr: 0.010000\n",
+      "2021-09-21 21:13:55,157 epoch 8 - iter 10/24 - loss 0.63794904 - samples/sec: 13.25 - lr: 0.010000\n",
+      "2021-09-21 21:13:55,307 epoch 8 - iter 12/24 - loss 0.63699312 - samples/sec: 13.39 - lr: 0.010000\n",
+      "2021-09-21 21:13:55,436 epoch 8 - iter 14/24 - loss 0.63835316 - samples/sec: 15.65 - lr: 0.010000\n",
+      "2021-09-21 21:13:55,581 epoch 8 - iter 16/24 - loss 0.63826995 - samples/sec: 13.79 - lr: 0.010000\n",
+      "2021-09-21 21:13:55,742 epoch 8 - iter 18/24 - loss 0.63793815 - samples/sec: 12.43 - lr: 0.010000\n",
+      "2021-09-21 21:13:55,884 epoch 8 - iter 20/24 - loss 0.63807077 - samples/sec: 14.16 - lr: 0.010000\n",
+      "2021-09-21 21:13:56,032 epoch 8 - iter 22/24 - loss 0.63766750 - samples/sec: 13.62 - lr: 0.010000\n",
+      "2021-09-21 21:13:56,180 epoch 8 - iter 24/24 - loss 0.63847020 - samples/sec: 13.48 - lr: 0.010000\n",
+      "2021-09-21 21:13:56,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:56,182 EPOCH 8 done: loss 0.6385 - lr 0.0100000\n",
+      "2021-09-21 21:13:56,289 DEV : loss 0.6365305185317993 - score 0.6667\n",
+      "2021-09-21 21:13:56,291 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:14:00,319 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:00,504 epoch 9 - iter 2/24 - loss 0.64722782 - samples/sec: 12.65 - lr: 0.010000\n",
+      "2021-09-21 21:14:00,637 epoch 9 - iter 4/24 - loss 0.63691863 - samples/sec: 15.05 - lr: 0.010000\n",
+      "2021-09-21 21:14:00,781 epoch 9 - iter 6/24 - loss 0.63209270 - samples/sec: 13.95 - lr: 0.010000\n",
+      "2021-09-21 21:14:00,938 epoch 9 - iter 8/24 - loss 0.63333332 - samples/sec: 12.80 - lr: 0.010000\n",
+      "2021-09-21 21:14:01,086 epoch 9 - iter 10/24 - loss 0.63476210 - samples/sec: 13.55 - lr: 0.010000\n",
+      "2021-09-21 21:14:01,231 epoch 9 - iter 12/24 - loss 0.63314023 - samples/sec: 13.79 - lr: 0.010000\n",
+      "2021-09-21 21:14:01,376 epoch 9 - iter 14/24 - loss 0.63289978 - samples/sec: 13.93 - lr: 0.010000\n",
+      "2021-09-21 21:14:01,522 epoch 9 - iter 16/24 - loss 0.63159716 - samples/sec: 13.69 - lr: 0.010000\n",
+      "2021-09-21 21:14:01,668 epoch 9 - iter 18/24 - loss 0.63438393 - samples/sec: 13.74 - lr: 0.010000\n",
+      "2021-09-21 21:14:01,818 epoch 9 - iter 20/24 - loss 0.63429052 - samples/sec: 13.43 - lr: 0.010000\n",
+      "2021-09-21 21:14:01,958 epoch 9 - iter 22/24 - loss 0.63299762 - samples/sec: 14.32 - lr: 0.010000\n",
+      "2021-09-21 21:14:02,088 epoch 9 - iter 24/24 - loss 0.63310061 - samples/sec: 15.56 - lr: 0.010000\n",
+      "2021-09-21 21:14:02,089 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:02,089 EPOCH 9 done: loss 0.6331 - lr 0.0100000\n",
+      "2021-09-21 21:14:02,184 DEV : loss 0.6365377306938171 - score 0.3333\n",
+      "2021-09-21 21:14:02,185 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:14:02,187 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:02,346 epoch 10 - iter 2/24 - loss 0.63859332 - samples/sec: 14.90 - lr: 0.010000\n",
+      "2021-09-21 21:14:02,495 epoch 10 - iter 4/24 - loss 0.63364390 - samples/sec: 13.48 - lr: 0.010000\n",
+      "2021-09-21 21:14:02,615 epoch 10 - iter 6/24 - loss 0.63664794 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 21:14:02,739 epoch 10 - iter 8/24 - loss 0.63366000 - samples/sec: 16.17 - lr: 0.010000\n",
+      "2021-09-21 21:14:02,904 epoch 10 - iter 10/24 - loss 0.63677911 - samples/sec: 12.15 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,034 epoch 10 - iter 12/24 - loss 0.63616760 - samples/sec: 15.50 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,149 epoch 10 - iter 14/24 - loss 0.63935332 - samples/sec: 17.39 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,282 epoch 10 - iter 16/24 - loss 0.63853626 - samples/sec: 15.13 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,422 epoch 10 - iter 18/24 - loss 0.63905559 - samples/sec: 14.35 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,562 epoch 10 - iter 20/24 - loss 0.63906234 - samples/sec: 14.30 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,678 epoch 10 - iter 22/24 - loss 0.63583708 - samples/sec: 17.39 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,810 epoch 10 - iter 24/24 - loss 0.63572776 - samples/sec: 15.20 - lr: 0.010000\n",
+      "2021-09-21 21:14:03,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:03,811 EPOCH 10 done: loss 0.6357 - lr 0.0100000\n",
+      "2021-09-21 21:14:03,918 DEV : loss 0.6365141868591309 - score 0.6667\n",
+      "2021-09-21 21:14:03,919 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:14:15,057 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:15,058 Testing using best model ...\n",
+      "2021-09-21 21:14:15,104 loading file None1/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:14:23,409 \t0.3333\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:43,343 loading file None1/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:44:48,285 \t0.0\n",
-      "2021-09-08 01:44:48,286 \n",
+      "2021-09-21 21:14:23,409 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.3333\n",
+      "- F-score (macro) 0.2222\n",
+      "- Accuracy 0.3333\n",
       "\n",
       "By class:\n",
       "                                        precision    recall  f1-score   support\n",
       "\n",
-      "This text entails a positive sentiment     0.0000    0.0000    0.0000         2\n",
-      " This text entails a neutral sentiment     0.0000    0.0000    0.0000         0\n",
+      "This text entails a positive sentiment     0.0000    0.0000    0.0000         0\n",
+      " This text entails a neutral sentiment     1.0000    0.5000    0.6667         2\n",
       "This text entails a negative sentiment     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                             micro avg     0.0000    0.0000    0.0000         3\n",
-      "                             macro avg     0.0000    0.0000    0.0000         3\n",
-      "                          weighted avg     0.0000    0.0000    0.0000         3\n",
-      "                           samples avg     0.0000    0.0000    0.0000         3\n",
+      "                             micro avg     0.3333    0.3333    0.3333         3\n",
+      "                             macro avg     0.3333    0.1667    0.2222         3\n",
+      "                          weighted avg     0.6667    0.3333    0.4444         3\n",
+      "                           samples avg     0.3333    0.3333    0.3333         3\n",
       "\n",
-      "2021-09-08 01:44:48,286 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:04,762 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:14:23,409 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:52,678 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:45:08,684 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:14:57,031 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 38916.22it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 38271.78it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:45:08,687 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 01:45:08,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:08,879 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:14:57,033 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 21:14:57,042 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:57,044 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5063,24 +5082,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:45:08,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:08,880 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:45:08,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:08,881 Parameters:\n",
-      "2021-09-08 01:45:08,881  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:45:08,881  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:45:08,881  - patience: \"3\"\n",
-      "2021-09-08 01:45:08,882  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:45:08,882  - max_epochs: \"10\"\n",
-      "2021-09-08 01:45:08,882  - shuffle: \"True\"\n",
-      "2021-09-08 01:45:08,882  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:45:08,883  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:45:08,883 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:08,883 Model training base path: \"None1\"\n",
-      "2021-09-08 01:45:08,884 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:08,884 Device: cuda:1\n",
-      "2021-09-08 01:45:08,884 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:08,884 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:14:57,044 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:57,045 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:14:57,045 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:57,045 Parameters:\n",
+      "2021-09-21 21:14:57,046  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:14:57,046  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:14:57,046  - patience: \"3\"\n",
+      "2021-09-21 21:14:57,046  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:14:57,047  - max_epochs: \"10\"\n",
+      "2021-09-21 21:14:57,047  - shuffle: \"True\"\n",
+      "2021-09-21 21:14:57,047  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:14:57,048  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:14:57,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:57,048 Model training base path: \"None1\"\n",
+      "2021-09-21 21:14:57,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:57,049 Device: cuda:0\n",
+      "2021-09-21 21:14:57,049 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:57,049 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:14:57,056 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:57,214 epoch 1 - iter 2/24 - loss 0.63480669 - samples/sec: 15.76 - lr: 0.020000\n"
      ]
     },
     {
@@ -5094,235 +5115,232 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:45:09,073 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:09,169 epoch 1 - iter 2/24 - loss 0.64286193 - samples/sec: 24.54 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,258 epoch 1 - iter 4/24 - loss 0.63755585 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,348 epoch 1 - iter 6/24 - loss 0.63613951 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,435 epoch 1 - iter 8/24 - loss 0.64082691 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,523 epoch 1 - iter 10/24 - loss 0.64007902 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,611 epoch 1 - iter 12/24 - loss 0.63927948 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,698 epoch 1 - iter 14/24 - loss 0.64060195 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,787 epoch 1 - iter 16/24 - loss 0.64160337 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,874 epoch 1 - iter 18/24 - loss 0.64155052 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 01:45:09,962 epoch 1 - iter 20/24 - loss 0.64059866 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 01:45:10,050 epoch 1 - iter 22/24 - loss 0.64008802 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:45:10,141 epoch 1 - iter 24/24 - loss 0.64057062 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 01:45:10,142 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:10,142 EPOCH 1 done: loss 0.6406 - lr 0.0200000\n",
-      "2021-09-08 01:45:10,292 DEV : loss 0.6366015672683716 - score 0.3333\n",
-      "2021-09-08 01:45:10,293 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:45:14,753 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:14,874 epoch 2 - iter 2/24 - loss 0.63432130 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 01:45:14,976 epoch 2 - iter 4/24 - loss 0.62732856 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,081 epoch 2 - iter 6/24 - loss 0.63132820 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,187 epoch 2 - iter 8/24 - loss 0.62742820 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,297 epoch 2 - iter 10/24 - loss 0.62751535 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,410 epoch 2 - iter 12/24 - loss 0.62531859 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,521 epoch 2 - iter 14/24 - loss 0.62651641 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,628 epoch 2 - iter 16/24 - loss 0.62776620 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,742 epoch 2 - iter 18/24 - loss 0.62800378 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,856 epoch 2 - iter 20/24 - loss 0.62930851 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 01:45:15,968 epoch 2 - iter 22/24 - loss 0.62917565 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:45:16,079 epoch 2 - iter 24/24 - loss 0.63012932 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:45:16,081 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:16,081 EPOCH 2 done: loss 0.6301 - lr 0.0200000\n",
-      "2021-09-08 01:45:16,149 DEV : loss 0.6365284323692322 - score 0.3333\n",
-      "2021-09-08 01:45:16,150 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:14:57,346 epoch 1 - iter 4/24 - loss 0.63613735 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 21:14:57,481 epoch 1 - iter 6/24 - loss 0.63634797 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 21:14:57,622 epoch 1 - iter 8/24 - loss 0.64110930 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:14:57,762 epoch 1 - iter 10/24 - loss 0.63828494 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 21:14:57,907 epoch 1 - iter 12/24 - loss 0.64058989 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 21:14:58,045 epoch 1 - iter 14/24 - loss 0.63842808 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 21:14:58,162 epoch 1 - iter 16/24 - loss 0.63892416 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 21:14:58,277 epoch 1 - iter 18/24 - loss 0.63867141 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 21:14:58,433 epoch 1 - iter 20/24 - loss 0.63895504 - samples/sec: 12.88 - lr: 0.020000\n",
+      "2021-09-21 21:14:58,585 epoch 1 - iter 22/24 - loss 0.63861923 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 21:14:58,719 epoch 1 - iter 24/24 - loss 0.63829734 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 21:14:58,720 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:58,721 EPOCH 1 done: loss 0.6383 - lr 0.0200000\n",
+      "2021-09-21 21:14:58,810 DEV : loss 0.6367031931877136 - score 0.6667\n",
+      "2021-09-21 21:14:58,811 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:20,298 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:20,420 epoch 3 - iter 2/24 - loss 0.64453933 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,529 epoch 3 - iter 4/24 - loss 0.64475195 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,641 epoch 3 - iter 6/24 - loss 0.64727528 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,746 epoch 3 - iter 8/24 - loss 0.64319623 - samples/sec: 18.99 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,856 epoch 3 - iter 10/24 - loss 0.64037852 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:45:20,973 epoch 3 - iter 12/24 - loss 0.63837639 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,072 epoch 3 - iter 14/24 - loss 0.63664797 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,183 epoch 3 - iter 16/24 - loss 0.63670377 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,292 epoch 3 - iter 18/24 - loss 0.63645936 - samples/sec: 18.49 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,397 epoch 3 - iter 20/24 - loss 0.63561673 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,506 epoch 3 - iter 22/24 - loss 0.63435965 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,616 epoch 3 - iter 24/24 - loss 0.63514574 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,617 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:21,617 EPOCH 3 done: loss 0.6351 - lr 0.0200000\n",
-      "2021-09-08 01:45:21,671 DEV : loss 0.6365157961845398 - score 0.0\n",
-      "2021-09-08 01:45:21,672 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:45:21,674 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:21,798 epoch 4 - iter 2/24 - loss 0.61938772 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 01:45:21,908 epoch 4 - iter 4/24 - loss 0.62474227 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,009 epoch 4 - iter 6/24 - loss 0.62382872 - samples/sec: 19.86 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,121 epoch 4 - iter 8/24 - loss 0.62768892 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,229 epoch 4 - iter 10/24 - loss 0.63024983 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,332 epoch 4 - iter 12/24 - loss 0.63173774 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,435 epoch 4 - iter 14/24 - loss 0.63438206 - samples/sec: 19.35 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,546 epoch 4 - iter 16/24 - loss 0.63408755 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,659 epoch 4 - iter 18/24 - loss 0.63423229 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,761 epoch 4 - iter 20/24 - loss 0.63513057 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,863 epoch 4 - iter 22/24 - loss 0.63279676 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,973 epoch 4 - iter 24/24 - loss 0.63469213 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:45:22,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:22,974 EPOCH 4 done: loss 0.6347 - lr 0.0200000\n",
-      "2021-09-08 01:45:23,042 DEV : loss 0.6370250582695007 - score 0.3333\n",
-      "2021-09-08 01:45:23,043 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:45:23,046 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:23,169 epoch 5 - iter 2/24 - loss 0.64527586 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,273 epoch 5 - iter 4/24 - loss 0.64555041 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,389 epoch 5 - iter 6/24 - loss 0.64243581 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,494 epoch 5 - iter 8/24 - loss 0.63846152 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,589 epoch 5 - iter 10/24 - loss 0.63898381 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,683 epoch 5 - iter 12/24 - loss 0.63949126 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,780 epoch 5 - iter 14/24 - loss 0.63764565 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,875 epoch 5 - iter 16/24 - loss 0.63517031 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:45:23,969 epoch 5 - iter 18/24 - loss 0.63550612 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,066 epoch 5 - iter 20/24 - loss 0.63494388 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,164 epoch 5 - iter 22/24 - loss 0.63556751 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,260 epoch 5 - iter 24/24 - loss 0.63519439 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:24,262 EPOCH 5 done: loss 0.6352 - lr 0.0200000\n",
-      "2021-09-08 01:45:24,415 DEV : loss 0.6365795135498047 - score 0.0\n",
-      "2021-09-08 01:45:24,415 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:45:24,488 ----------------------------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:45:24,590 epoch 6 - iter 2/24 - loss 0.63986900 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,678 epoch 6 - iter 4/24 - loss 0.63302360 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,766 epoch 6 - iter 6/24 - loss 0.64001857 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,858 epoch 6 - iter 8/24 - loss 0.63805117 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 01:45:24,951 epoch 6 - iter 10/24 - loss 0.63616779 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,039 epoch 6 - iter 12/24 - loss 0.63708104 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,127 epoch 6 - iter 14/24 - loss 0.63746308 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,214 epoch 6 - iter 16/24 - loss 0.63847822 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,303 epoch 6 - iter 18/24 - loss 0.63813693 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,391 epoch 6 - iter 20/24 - loss 0.63859886 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,479 epoch 6 - iter 22/24 - loss 0.63846567 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,566 epoch 6 - iter 24/24 - loss 0.63824827 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 01:45:25,567 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:25,567 EPOCH 6 done: loss 0.6382 - lr 0.0200000\n",
-      "2021-09-08 01:45:25,712 DEV : loss 0.6365149021148682 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:45:25,712 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:45:25,786 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:25,891 epoch 7 - iter 2/24 - loss 0.62431288 - samples/sec: 22.65 - lr: 0.010000\n",
-      "2021-09-08 01:45:25,980 epoch 7 - iter 4/24 - loss 0.62251976 - samples/sec: 22.79 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,067 epoch 7 - iter 6/24 - loss 0.62865153 - samples/sec: 22.99 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,155 epoch 7 - iter 8/24 - loss 0.62768516 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,244 epoch 7 - iter 10/24 - loss 0.63636011 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,332 epoch 7 - iter 12/24 - loss 0.63564954 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,420 epoch 7 - iter 14/24 - loss 0.63348908 - samples/sec: 22.92 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,508 epoch 7 - iter 16/24 - loss 0.63394194 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,598 epoch 7 - iter 18/24 - loss 0.63584667 - samples/sec: 22.37 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,685 epoch 7 - iter 20/24 - loss 0.63778304 - samples/sec: 22.96 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,774 epoch 7 - iter 22/24 - loss 0.63929244 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,862 epoch 7 - iter 24/24 - loss 0.63822630 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:45:26,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:26,863 EPOCH 7 done: loss 0.6382 - lr 0.0100000\n",
-      "2021-09-08 01:45:27,439 DEV : loss 0.6367820501327515 - score 0.3333\n",
-      "2021-09-08 01:45:27,440 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:45:27,445 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:27,566 epoch 8 - iter 2/24 - loss 0.63459185 - samples/sec: 18.56 - lr: 0.010000\n",
-      "2021-09-08 01:45:27,674 epoch 8 - iter 4/24 - loss 0.65107903 - samples/sec: 18.65 - lr: 0.010000\n",
-      "2021-09-08 01:45:27,790 epoch 8 - iter 6/24 - loss 0.65415683 - samples/sec: 17.38 - lr: 0.010000\n",
-      "2021-09-08 01:45:27,908 epoch 8 - iter 8/24 - loss 0.65038797 - samples/sec: 16.97 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,022 epoch 8 - iter 10/24 - loss 0.64769532 - samples/sec: 17.72 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,130 epoch 8 - iter 12/24 - loss 0.64725432 - samples/sec: 18.59 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,237 epoch 8 - iter 14/24 - loss 0.64441746 - samples/sec: 18.70 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,348 epoch 8 - iter 16/24 - loss 0.64410581 - samples/sec: 18.16 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,458 epoch 8 - iter 18/24 - loss 0.64138788 - samples/sec: 18.34 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,569 epoch 8 - iter 20/24 - loss 0.64121079 - samples/sec: 17.98 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,676 epoch 8 - iter 22/24 - loss 0.64075347 - samples/sec: 18.94 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,782 epoch 8 - iter 24/24 - loss 0.63960612 - samples/sec: 18.93 - lr: 0.010000\n",
-      "2021-09-08 01:45:28,783 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:28,783 EPOCH 8 done: loss 0.6396 - lr 0.0100000\n",
-      "2021-09-08 01:45:28,853 DEV : loss 0.6366152167320251 - score 0.0\n",
-      "2021-09-08 01:45:28,853 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:45:28,855 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:28,993 epoch 9 - iter 2/24 - loss 0.63870749 - samples/sec: 17.00 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,108 epoch 9 - iter 4/24 - loss 0.63752651 - samples/sec: 17.48 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,222 epoch 9 - iter 6/24 - loss 0.63711987 - samples/sec: 17.68 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,340 epoch 9 - iter 8/24 - loss 0.63243527 - samples/sec: 16.98 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,452 epoch 9 - iter 10/24 - loss 0.63921940 - samples/sec: 17.97 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,558 epoch 9 - iter 12/24 - loss 0.64047694 - samples/sec: 18.90 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,666 epoch 9 - iter 14/24 - loss 0.63926222 - samples/sec: 18.72 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,775 epoch 9 - iter 16/24 - loss 0.63907193 - samples/sec: 18.40 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,884 epoch 9 - iter 18/24 - loss 0.63623345 - samples/sec: 18.37 - lr: 0.010000\n",
-      "2021-09-08 01:45:29,997 epoch 9 - iter 20/24 - loss 0.63760092 - samples/sec: 17.80 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,101 epoch 9 - iter 22/24 - loss 0.63763629 - samples/sec: 19.31 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,218 epoch 9 - iter 24/24 - loss 0.63762459 - samples/sec: 17.30 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,219 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:30,220 EPOCH 9 done: loss 0.6376 - lr 0.0100000\n",
-      "2021-09-08 01:45:30,282 DEV : loss 0.6365534663200378 - score 0.0\n",
-      "2021-09-08 01:45:30,283 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:45:30,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:30,428 epoch 10 - iter 2/24 - loss 0.63968047 - samples/sec: 17.84 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,539 epoch 10 - iter 4/24 - loss 0.63306653 - samples/sec: 18.21 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,650 epoch 10 - iter 6/24 - loss 0.63247577 - samples/sec: 18.02 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,769 epoch 10 - iter 8/24 - loss 0.63025536 - samples/sec: 16.97 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,882 epoch 10 - iter 10/24 - loss 0.63301840 - samples/sec: 17.74 - lr: 0.010000\n",
-      "2021-09-08 01:45:30,997 epoch 10 - iter 12/24 - loss 0.63569047 - samples/sec: 17.52 - lr: 0.010000\n",
-      "2021-09-08 01:45:31,113 epoch 10 - iter 14/24 - loss 0.64078891 - samples/sec: 17.23 - lr: 0.010000\n",
-      "2021-09-08 01:45:31,222 epoch 10 - iter 16/24 - loss 0.63994783 - samples/sec: 18.48 - lr: 0.010000\n",
-      "2021-09-08 01:45:31,335 epoch 10 - iter 18/24 - loss 0.64016933 - samples/sec: 17.88 - lr: 0.010000\n",
-      "2021-09-08 01:45:31,441 epoch 10 - iter 20/24 - loss 0.64068473 - samples/sec: 18.97 - lr: 0.010000\n",
-      "2021-09-08 01:45:31,554 epoch 10 - iter 22/24 - loss 0.64127696 - samples/sec: 17.81 - lr: 0.010000\n",
-      "2021-09-08 01:45:31,657 epoch 10 - iter 24/24 - loss 0.64166516 - samples/sec: 19.39 - lr: 0.010000\n",
-      "2021-09-08 01:45:31,658 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:31,659 EPOCH 10 done: loss 0.6417 - lr 0.0100000\n",
-      "2021-09-08 01:45:31,717 DEV : loss 0.63651442527771 - score 0.3333\n",
-      "2021-09-08 01:45:31,718 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:15:02,885 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:03,045 epoch 2 - iter 2/24 - loss 0.64394253 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 21:15:03,190 epoch 2 - iter 4/24 - loss 0.63083622 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 21:15:03,318 epoch 2 - iter 6/24 - loss 0.63270353 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 21:15:03,447 epoch 2 - iter 8/24 - loss 0.63466573 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:15:03,586 epoch 2 - iter 10/24 - loss 0.63542066 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 21:15:03,713 epoch 2 - iter 12/24 - loss 0.63341101 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:15:03,862 epoch 2 - iter 14/24 - loss 0.63395180 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:15:03,998 epoch 2 - iter 16/24 - loss 0.63417792 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:15:04,123 epoch 2 - iter 18/24 - loss 0.63397881 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 21:15:04,262 epoch 2 - iter 20/24 - loss 0.63508994 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 21:15:04,388 epoch 2 - iter 22/24 - loss 0.63680335 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 21:15:04,528 epoch 2 - iter 24/24 - loss 0.63653798 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:15:04,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:04,529 EPOCH 2 done: loss 0.6365 - lr 0.0200000\n",
+      "2021-09-21 21:15:04,620 DEV : loss 0.636620283126831 - score 0.3333\n",
+      "2021-09-21 21:15:04,621 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:04,623 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:04,768 epoch 3 - iter 2/24 - loss 0.63671815 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 21:15:04,906 epoch 3 - iter 4/24 - loss 0.63249622 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,026 epoch 3 - iter 6/24 - loss 0.63282848 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,151 epoch 3 - iter 8/24 - loss 0.63375164 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,285 epoch 3 - iter 10/24 - loss 0.63494847 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,421 epoch 3 - iter 12/24 - loss 0.63443281 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,560 epoch 3 - iter 14/24 - loss 0.63221278 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,688 epoch 3 - iter 16/24 - loss 0.63383229 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,835 epoch 3 - iter 18/24 - loss 0.63523077 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 21:15:05,969 epoch 3 - iter 20/24 - loss 0.63589184 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 21:15:06,125 epoch 3 - iter 22/24 - loss 0.63647884 - samples/sec: 12.90 - lr: 0.020000\n",
+      "2021-09-21 21:15:06,266 epoch 3 - iter 24/24 - loss 0.63701773 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:15:06,267 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:06,267 EPOCH 3 done: loss 0.6370 - lr 0.0200000\n",
+      "2021-09-21 21:15:06,355 DEV : loss 0.6368786096572876 - score 0.3333\n",
+      "2021-09-21 21:15:06,356 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:15:06,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:06,520 epoch 4 - iter 2/24 - loss 0.62940657 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 21:15:06,652 epoch 4 - iter 4/24 - loss 0.63702466 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:15:06,798 epoch 4 - iter 6/24 - loss 0.63397562 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 21:15:06,943 epoch 4 - iter 8/24 - loss 0.62998040 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,069 epoch 4 - iter 10/24 - loss 0.62940003 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,197 epoch 4 - iter 12/24 - loss 0.62993004 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,325 epoch 4 - iter 14/24 - loss 0.63059374 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,466 epoch 4 - iter 16/24 - loss 0.62940504 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,587 epoch 4 - iter 18/24 - loss 0.63032730 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,706 epoch 4 - iter 20/24 - loss 0.62988594 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,854 epoch 4 - iter 22/24 - loss 0.62858203 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,983 epoch 4 - iter 24/24 - loss 0.62934584 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:15:07,984 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:07,984 EPOCH 4 done: loss 0.6293 - lr 0.0200000\n",
+      "2021-09-21 21:15:08,098 DEV : loss 0.6369802951812744 - score 0.3333\n",
+      "2021-09-21 21:15:08,098 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:15:08,100 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:08,277 epoch 5 - iter 2/24 - loss 0.63535869 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 21:15:08,407 epoch 5 - iter 4/24 - loss 0.63272512 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:15:08,532 epoch 5 - iter 6/24 - loss 0.63264761 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 21:15:08,683 epoch 5 - iter 8/24 - loss 0.63121679 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 21:15:08,814 epoch 5 - iter 10/24 - loss 0.63071997 - samples/sec: 15.30 - lr: 0.020000\n",
+      "2021-09-21 21:15:08,962 epoch 5 - iter 12/24 - loss 0.63342150 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:15:09,076 epoch 5 - iter 14/24 - loss 0.63462140 - samples/sec: 17.69 - lr: 0.020000\n",
+      "2021-09-21 21:15:09,204 epoch 5 - iter 16/24 - loss 0.63241789 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:15:09,342 epoch 5 - iter 18/24 - loss 0.63322429 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 21:15:09,484 epoch 5 - iter 20/24 - loss 0.63400274 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 21:15:09,631 epoch 5 - iter 22/24 - loss 0.63368244 - samples/sec: 13.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:09,777 epoch 5 - iter 24/24 - loss 0.63304998 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 21:15:09,779 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:09,779 EPOCH 5 done: loss 0.6330 - lr 0.0200000\n",
+      "2021-09-21 21:15:09,865 DEV : loss 0.6365150809288025 - score 0.6667\n",
+      "2021-09-21 21:15:09,867 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:46,526 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:15:14,214 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:14,340 epoch 6 - iter 2/24 - loss 0.64397171 - samples/sec: 19.17 - lr: 0.020000\n",
+      "2021-09-21 21:15:14,441 epoch 6 - iter 4/24 - loss 0.64484608 - samples/sec: 19.90 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:45:46,527 Testing using best model ...\n",
-      "2021-09-08 01:45:46,552 loading file None1/best-model.pt\n",
+      "2021-09-21 21:15:14,569 epoch 6 - iter 6/24 - loss 0.64573438 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:15:14,691 epoch 6 - iter 8/24 - loss 0.64478888 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 21:15:14,822 epoch 6 - iter 10/24 - loss 0.63921482 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 21:15:14,931 epoch 6 - iter 12/24 - loss 0.63817501 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,050 epoch 6 - iter 14/24 - loss 0.63915305 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,181 epoch 6 - iter 16/24 - loss 0.63955677 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,287 epoch 6 - iter 18/24 - loss 0.64124274 - samples/sec: 19.02 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,404 epoch 6 - iter 20/24 - loss 0.64065557 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,529 epoch 6 - iter 22/24 - loss 0.64057582 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,649 epoch 6 - iter 24/24 - loss 0.64169157 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,651 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:15,651 EPOCH 6 done: loss 0.6417 - lr 0.0200000\n",
+      "2021-09-21 21:15:15,722 DEV : loss 0.6365584135055542 - score 0.6667\n",
+      "2021-09-21 21:15:15,724 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:15,726 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:15,858 epoch 7 - iter 2/24 - loss 0.63008347 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:15:15,966 epoch 7 - iter 4/24 - loss 0.63778174 - samples/sec: 18.71 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,061 epoch 7 - iter 6/24 - loss 0.64269044 - samples/sec: 21.22 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,164 epoch 7 - iter 8/24 - loss 0.63981411 - samples/sec: 19.62 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,259 epoch 7 - iter 10/24 - loss 0.64001709 - samples/sec: 21.16 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,354 epoch 7 - iter 12/24 - loss 0.64003845 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,454 epoch 7 - iter 14/24 - loss 0.64030001 - samples/sec: 20.11 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,562 epoch 7 - iter 16/24 - loss 0.64049758 - samples/sec: 18.66 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,668 epoch 7 - iter 18/24 - loss 0.63987128 - samples/sec: 19.01 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,788 epoch 7 - iter 20/24 - loss 0.64307082 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 21:15:16,901 epoch 7 - iter 22/24 - loss 0.64155757 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,013 epoch 7 - iter 24/24 - loss 0.64131527 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,014 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:17,015 EPOCH 7 done: loss 0.6413 - lr 0.0200000\n",
+      "2021-09-21 21:15:17,075 DEV : loss 0.6365180015563965 - score 0.3333\n",
+      "2021-09-21 21:15:17,076 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:15:17,078 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:17,202 epoch 8 - iter 2/24 - loss 0.63005909 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,316 epoch 8 - iter 4/24 - loss 0.63819058 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,425 epoch 8 - iter 6/24 - loss 0.63609459 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,532 epoch 8 - iter 8/24 - loss 0.63423371 - samples/sec: 18.74 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,647 epoch 8 - iter 10/24 - loss 0.63485131 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,763 epoch 8 - iter 12/24 - loss 0.63867226 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,875 epoch 8 - iter 14/24 - loss 0.63853321 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 21:15:17,988 epoch 8 - iter 16/24 - loss 0.63800081 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,102 epoch 8 - iter 18/24 - loss 0.63714200 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,218 epoch 8 - iter 20/24 - loss 0.63946325 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,315 epoch 8 - iter 22/24 - loss 0.63998541 - samples/sec: 20.74 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,404 epoch 8 - iter 24/24 - loss 0.63903115 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,405 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:18,406 EPOCH 8 done: loss 0.6390 - lr 0.0200000\n",
+      "2021-09-21 21:15:18,455 DEV : loss 0.6367494463920593 - score 0.6667\n",
+      "2021-09-21 21:15:18,455 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:15:18,458 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:18,559 epoch 9 - iter 2/24 - loss 0.63270429 - samples/sec: 22.58 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,653 epoch 9 - iter 4/24 - loss 0.63173100 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,743 epoch 9 - iter 6/24 - loss 0.62953370 - samples/sec: 22.49 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,832 epoch 9 - iter 8/24 - loss 0.63138258 - samples/sec: 22.66 - lr: 0.020000\n",
+      "2021-09-21 21:15:18,920 epoch 9 - iter 10/24 - loss 0.63144242 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,009 epoch 9 - iter 12/24 - loss 0.63202107 - samples/sec: 22.60 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,098 epoch 9 - iter 14/24 - loss 0.63412429 - samples/sec: 22.74 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,189 epoch 9 - iter 16/24 - loss 0.63427254 - samples/sec: 22.14 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,279 epoch 9 - iter 18/24 - loss 0.63422112 - samples/sec: 22.37 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,369 epoch 9 - iter 20/24 - loss 0.63436484 - samples/sec: 22.56 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,462 epoch 9 - iter 22/24 - loss 0.63588620 - samples/sec: 21.43 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,551 epoch 9 - iter 24/24 - loss 0.63515005 - samples/sec: 22.65 - lr: 0.020000\n",
+      "2021-09-21 21:15:19,552 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,553 EPOCH 9 done: loss 0.6352 - lr 0.0200000\n",
+      "2021-09-21 21:15:19,722 DEV : loss 0.6367934346199036 - score 0.6667\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:15:19,724 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:15:19,872 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:19,977 epoch 10 - iter 2/24 - loss 0.63517222 - samples/sec: 21.90 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,081 epoch 10 - iter 4/24 - loss 0.63408966 - samples/sec: 19.24 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,191 epoch 10 - iter 6/24 - loss 0.63676569 - samples/sec: 18.24 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,309 epoch 10 - iter 8/24 - loss 0.63753308 - samples/sec: 17.17 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,415 epoch 10 - iter 10/24 - loss 0.63926963 - samples/sec: 18.88 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,528 epoch 10 - iter 12/24 - loss 0.64022329 - samples/sec: 17.86 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,644 epoch 10 - iter 14/24 - loss 0.63905200 - samples/sec: 17.22 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,752 epoch 10 - iter 16/24 - loss 0.63851088 - samples/sec: 18.62 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,863 epoch 10 - iter 18/24 - loss 0.63804331 - samples/sec: 18.24 - lr: 0.010000\n",
+      "2021-09-21 21:15:20,972 epoch 10 - iter 20/24 - loss 0.63823169 - samples/sec: 18.34 - lr: 0.010000\n",
+      "2021-09-21 21:15:21,081 epoch 10 - iter 22/24 - loss 0.63729508 - samples/sec: 18.60 - lr: 0.010000\n",
+      "2021-09-21 21:15:21,185 epoch 10 - iter 24/24 - loss 0.63610691 - samples/sec: 19.26 - lr: 0.010000\n",
+      "2021-09-21 21:15:21,187 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:21,187 EPOCH 10 done: loss 0.6361 - lr 0.0100000\n",
+      "2021-09-21 21:15:21,348 DEV : loss 0.6365156173706055 - score 0.3333\n",
+      "2021-09-21 21:15:21,348 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:29,452 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:29,453 Testing using best model ...\n",
+      "2021-09-21 21:15:29,454 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:45:51,729 \t1.0\n",
-      "2021-09-08 01:45:51,729 \n",
+      "2021-09-21 21:15:37,529 \t0.0\n",
+      "2021-09-21 21:15:37,530 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.6667\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                        precision    recall  f1-score   support\n",
       "\n",
-      "This text entails a positive sentiment     1.0000    1.0000    1.0000         2\n",
-      " This text entails a neutral sentiment     1.0000    1.0000    1.0000         1\n",
-      "This text entails a negative sentiment     0.0000    0.0000    0.0000         0\n",
-      "\n",
-      "                             micro avg     1.0000    1.0000    1.0000         3\n",
-      "                             macro avg     0.6667    0.6667    0.6667         3\n",
-      "                          weighted avg     1.0000    1.0000    1.0000         3\n",
-      "                           samples avg     1.0000    1.0000    1.0000         3\n",
+      "This text entails a positive sentiment     0.0000    0.0000    0.0000         1\n",
+      " This text entails a neutral sentiment     0.0000    0.0000    0.0000         0\n",
+      "This text entails a negative sentiment     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 01:45:51,730 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:08,050 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "                             micro avg     0.0000    0.0000    0.0000         3\n",
+      "                             macro avg     0.0000    0.0000    0.0000         3\n",
+      "                          weighted avg     0.0000    0.0000    0.0000         3\n",
+      "                           samples avg     0.0000    0.0000    0.0000         3\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:15:37,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:11,919 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:46:12,097 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:16:16,201 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 38796.23it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 34213.36it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:46:12,100 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 01:46:12,109 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:12,111 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:16:16,204 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 21:16:16,360 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:16,362 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5635,26 +5653,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:46:12,112 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:12,112 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:46:12,112 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:12,113 Parameters:\n",
-      "2021-09-08 01:46:12,113  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:46:12,113  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:46:12,113  - patience: \"3\"\n",
-      "2021-09-08 01:46:12,114  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:46:12,114  - max_epochs: \"10\"\n",
-      "2021-09-08 01:46:12,114  - shuffle: \"True\"\n",
-      "2021-09-08 01:46:12,115  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:46:12,115  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:46:12,115 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:12,115 Model training base path: \"None1\"\n",
-      "2021-09-08 01:46:12,116 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:12,116 Device: cuda:1\n",
-      "2021-09-08 01:46:12,116 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:12,116 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:46:12,124 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:12,244 epoch 1 - iter 2/24 - loss 0.63487551 - samples/sec: 19.73 - lr: 0.020000\n"
+      "2021-09-21 21:16:16,363 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:16,363 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:16:16,364 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:16,364 Parameters:\n",
+      "2021-09-21 21:16:16,364  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:16:16,364  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:16:16,365  - patience: \"3\"\n",
+      "2021-09-21 21:16:16,365  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:16:16,365  - max_epochs: \"10\"\n",
+      "2021-09-21 21:16:16,365  - shuffle: \"True\"\n",
+      "2021-09-21 21:16:16,366  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:16:16,366  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:16:16,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:16,367 Model training base path: \"None1\"\n",
+      "2021-09-21 21:16:16,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:16,367 Device: cuda:0\n",
+      "2021-09-21 21:16:16,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:16,368 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -5668,214 +5684,217 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:46:12,340 epoch 1 - iter 4/24 - loss 0.63512152 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 01:46:12,444 epoch 1 - iter 6/24 - loss 0.63558443 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:46:12,539 epoch 1 - iter 8/24 - loss 0.64010697 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 01:46:12,638 epoch 1 - iter 10/24 - loss 0.64010463 - samples/sec: 20.29 - lr: 0.020000\n",
-      "2021-09-08 01:46:12,741 epoch 1 - iter 12/24 - loss 0.63847345 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:46:12,855 epoch 1 - iter 14/24 - loss 0.63755079 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:46:12,950 epoch 1 - iter 16/24 - loss 0.63705941 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:46:13,047 epoch 1 - iter 18/24 - loss 0.63718604 - samples/sec: 20.66 - lr: 0.020000\n",
-      "2021-09-08 01:46:13,149 epoch 1 - iter 20/24 - loss 0.63671316 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 01:46:13,266 epoch 1 - iter 22/24 - loss 0.63724079 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:46:13,367 epoch 1 - iter 24/24 - loss 0.63603937 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 01:46:13,368 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:13,369 EPOCH 1 done: loss 0.6360 - lr 0.0200000\n",
-      "2021-09-08 01:46:13,445 DEV : loss 0.6368467807769775 - score 0.6667\n",
-      "2021-09-08 01:46:13,446 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:16:16,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:16,708 epoch 1 - iter 2/24 - loss 0.63372120 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:16:16,838 epoch 1 - iter 4/24 - loss 0.63469954 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:16:16,981 epoch 1 - iter 6/24 - loss 0.63883640 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:16:17,118 epoch 1 - iter 8/24 - loss 0.63925643 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 21:16:17,259 epoch 1 - iter 10/24 - loss 0.63607311 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:16:17,386 epoch 1 - iter 12/24 - loss 0.63561341 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 21:16:17,548 epoch 1 - iter 14/24 - loss 0.63518075 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 21:16:17,677 epoch 1 - iter 16/24 - loss 0.63367365 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 21:16:17,799 epoch 1 - iter 18/24 - loss 0.63397484 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 21:16:17,939 epoch 1 - iter 20/24 - loss 0.63325711 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 21:16:18,092 epoch 1 - iter 22/24 - loss 0.63277643 - samples/sec: 13.13 - lr: 0.020000\n",
+      "2021-09-21 21:16:18,217 epoch 1 - iter 24/24 - loss 0.63184000 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 21:16:18,218 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:18,219 EPOCH 1 done: loss 0.6318 - lr 0.0200000\n",
+      "2021-09-21 21:16:18,396 DEV : loss 0.63651442527771 - score 0.0\n",
+      "2021-09-21 21:16:18,397 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:46:17,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:17,669 epoch 2 - iter 2/24 - loss 0.63281858 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 01:46:17,778 epoch 2 - iter 4/24 - loss 0.63551551 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 01:46:17,883 epoch 2 - iter 6/24 - loss 0.63679862 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:46:17,995 epoch 2 - iter 8/24 - loss 0.64135438 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,095 epoch 2 - iter 10/24 - loss 0.63863303 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,204 epoch 2 - iter 12/24 - loss 0.63826071 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,306 epoch 2 - iter 14/24 - loss 0.63648482 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,412 epoch 2 - iter 16/24 - loss 0.63764254 - samples/sec: 18.99 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,514 epoch 2 - iter 18/24 - loss 0.63719123 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,620 epoch 2 - iter 20/24 - loss 0.63699818 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,718 epoch 2 - iter 22/24 - loss 0.63675911 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,825 epoch 2 - iter 24/24 - loss 0.63649370 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 01:46:18,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:18,826 EPOCH 2 done: loss 0.6365 - lr 0.0200000\n",
-      "2021-09-08 01:46:18,907 DEV : loss 0.6367577314376831 - score 0.3333\n",
-      "2021-09-08 01:46:18,908 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:46:18,909 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:19,040 epoch 3 - iter 2/24 - loss 0.62996864 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,144 epoch 3 - iter 4/24 - loss 0.62775272 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,238 epoch 3 - iter 6/24 - loss 0.63100627 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,340 epoch 3 - iter 8/24 - loss 0.63440567 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,441 epoch 3 - iter 10/24 - loss 0.63558449 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,548 epoch 3 - iter 12/24 - loss 0.63648249 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,653 epoch 3 - iter 14/24 - loss 0.63834960 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,759 epoch 3 - iter 16/24 - loss 0.63888638 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,863 epoch 3 - iter 18/24 - loss 0.63854852 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 01:46:19,959 epoch 3 - iter 20/24 - loss 0.63820062 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,061 epoch 3 - iter 22/24 - loss 0.63712747 - samples/sec: 19.69 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,165 epoch 3 - iter 24/24 - loss 0.63789884 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,166 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:20,167 EPOCH 3 done: loss 0.6379 - lr 0.0200000\n",
-      "2021-09-08 01:46:20,237 DEV : loss 0.6372421979904175 - score 0.6667\n",
-      "2021-09-08 01:46:20,239 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:46:20,241 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:20,376 epoch 4 - iter 2/24 - loss 0.63991949 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,478 epoch 4 - iter 4/24 - loss 0.63631000 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,579 epoch 4 - iter 6/24 - loss 0.63526457 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,676 epoch 4 - iter 8/24 - loss 0.63715364 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,778 epoch 4 - iter 10/24 - loss 0.63699685 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,887 epoch 4 - iter 12/24 - loss 0.63655414 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 01:46:20,992 epoch 4 - iter 14/24 - loss 0.63635133 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 01:46:21,085 epoch 4 - iter 16/24 - loss 0.63645766 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 01:46:21,176 epoch 4 - iter 18/24 - loss 0.63611646 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:46:21,273 epoch 4 - iter 20/24 - loss 0.63419707 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 01:46:21,372 epoch 4 - iter 22/24 - loss 0.63465506 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 01:46:21,470 epoch 4 - iter 24/24 - loss 0.63480224 - samples/sec: 20.61 - lr: 0.020000\n",
-      "2021-09-08 01:46:21,471 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:21,472 EPOCH 4 done: loss 0.6348 - lr 0.0200000\n",
-      "2021-09-08 01:46:21,654 DEV : loss 0.6365276575088501 - score 0.0\n",
-      "2021-09-08 01:46:21,655 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:46:21,739 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:21,871 epoch 5 - iter 2/24 - loss 0.63362280 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 01:46:21,985 epoch 5 - iter 4/24 - loss 0.63978454 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,100 epoch 5 - iter 6/24 - loss 0.63689480 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,199 epoch 5 - iter 8/24 - loss 0.63786964 - samples/sec: 20.30 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,304 epoch 5 - iter 10/24 - loss 0.63528817 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,408 epoch 5 - iter 12/24 - loss 0.63839254 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,512 epoch 5 - iter 14/24 - loss 0.63825168 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,609 epoch 5 - iter 16/24 - loss 0.63965488 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,716 epoch 5 - iter 18/24 - loss 0.63882498 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,818 epoch 5 - iter 20/24 - loss 0.63726585 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 01:46:22,917 epoch 5 - iter 22/24 - loss 0.63723487 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 01:46:23,029 epoch 5 - iter 24/24 - loss 0.63718840 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 01:46:23,030 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:23,031 EPOCH 5 done: loss 0.6372 - lr 0.0200000\n",
-      "2021-09-08 01:46:23,204 DEV : loss 0.6365406513214111 - score 0.6667\n",
-      "2021-09-08 01:46:23,206 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:16:22,312 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:22,511 epoch 2 - iter 2/24 - loss 0.63691175 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 21:16:22,684 epoch 2 - iter 4/24 - loss 0.64043826 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:16:22,847 epoch 2 - iter 6/24 - loss 0.63844846 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 21:16:23,045 epoch 2 - iter 8/24 - loss 0.64016663 - samples/sec: 10.09 - lr: 0.020000\n",
+      "2021-09-21 21:16:23,218 epoch 2 - iter 10/24 - loss 0.63473180 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:16:23,400 epoch 2 - iter 12/24 - loss 0.63552876 - samples/sec: 11.00 - lr: 0.020000\n",
+      "2021-09-21 21:16:23,584 epoch 2 - iter 14/24 - loss 0.63753361 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 21:16:23,767 epoch 2 - iter 16/24 - loss 0.63592490 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 21:16:23,946 epoch 2 - iter 18/24 - loss 0.63642496 - samples/sec: 11.24 - lr: 0.020000\n",
+      "2021-09-21 21:16:24,140 epoch 2 - iter 20/24 - loss 0.63621227 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 21:16:24,302 epoch 2 - iter 22/24 - loss 0.63550758 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 21:16:24,477 epoch 2 - iter 24/24 - loss 0.63552464 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:16:24,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:24,478 EPOCH 2 done: loss 0.6355 - lr 0.0200000\n",
+      "2021-09-21 21:16:24,586 DEV : loss 0.6365176439285278 - score 0.3333\n",
+      "2021-09-21 21:16:24,587 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:46:29,947 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:30,091 epoch 6 - iter 2/24 - loss 0.64319605 - samples/sec: 16.35 - lr: 0.020000\n",
-      "2021-09-08 01:46:30,202 epoch 6 - iter 4/24 - loss 0.64658703 - samples/sec: 18.06 - lr: 0.020000\n"
+      "2021-09-21 21:16:28,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:28,904 epoch 3 - iter 2/24 - loss 0.64095941 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,026 epoch 3 - iter 4/24 - loss 0.63604471 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,155 epoch 3 - iter 6/24 - loss 0.63566961 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,279 epoch 3 - iter 8/24 - loss 0.63587128 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,405 epoch 3 - iter 10/24 - loss 0.63481833 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,520 epoch 3 - iter 12/24 - loss 0.63446396 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,636 epoch 3 - iter 14/24 - loss 0.63379160 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,760 epoch 3 - iter 16/24 - loss 0.63348185 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 21:16:29,884 epoch 3 - iter 18/24 - loss 0.63416497 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:16:30,014 epoch 3 - iter 20/24 - loss 0.63441311 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 21:16:30,155 epoch 3 - iter 22/24 - loss 0.63606930 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:16:30,282 epoch 3 - iter 24/24 - loss 0.63630010 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 21:16:30,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:30,283 EPOCH 3 done: loss 0.6363 - lr 0.0200000\n",
+      "2021-09-21 21:16:30,467 DEV : loss 0.6367182731628418 - score 0.3333\n",
+      "2021-09-21 21:16:30,470 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:16:30,556 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:30,703 epoch 4 - iter 2/24 - loss 0.61270756 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:16:30,820 epoch 4 - iter 4/24 - loss 0.62639372 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:16:30,933 epoch 4 - iter 6/24 - loss 0.62836706 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,028 epoch 4 - iter 8/24 - loss 0.62598795 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,119 epoch 4 - iter 10/24 - loss 0.63271835 - samples/sec: 22.16 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,225 epoch 4 - iter 12/24 - loss 0.63262511 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,347 epoch 4 - iter 14/24 - loss 0.63271120 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,463 epoch 4 - iter 16/24 - loss 0.63236303 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,576 epoch 4 - iter 18/24 - loss 0.63346407 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,685 epoch 4 - iter 20/24 - loss 0.63416260 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,804 epoch 4 - iter 22/24 - loss 0.63438236 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,916 epoch 4 - iter 24/24 - loss 0.63488930 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 21:16:31,917 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:31,918 EPOCH 4 done: loss 0.6349 - lr 0.0200000\n",
+      "2021-09-21 21:16:32,097 DEV : loss 0.6365840435028076 - score 0.0\n",
+      "2021-09-21 21:16:32,097 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:16:32,099 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:32,223 epoch 5 - iter 2/24 - loss 0.63730469 - samples/sec: 18.26 - lr: 0.020000\n",
+      "2021-09-21 21:16:32,336 epoch 5 - iter 4/24 - loss 0.63603671 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 21:16:32,449 epoch 5 - iter 6/24 - loss 0.63606325 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 21:16:32,557 epoch 5 - iter 8/24 - loss 0.63800253 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 21:16:32,667 epoch 5 - iter 10/24 - loss 0.63593933 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 21:16:32,784 epoch 5 - iter 12/24 - loss 0.64279904 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 21:16:32,889 epoch 5 - iter 14/24 - loss 0.64554689 - samples/sec: 19.26 - lr: 0.020000\n",
+      "2021-09-21 21:16:32,998 epoch 5 - iter 16/24 - loss 0.64365692 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 21:16:33,111 epoch 5 - iter 18/24 - loss 0.64596084 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:16:33,221 epoch 5 - iter 20/24 - loss 0.64361030 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 21:16:33,321 epoch 5 - iter 22/24 - loss 0.64266332 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 21:16:33,433 epoch 5 - iter 24/24 - loss 0.64246952 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 21:16:33,434 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:33,434 EPOCH 5 done: loss 0.6425 - lr 0.0200000\n",
+      "2021-09-21 21:16:35,171 DEV : loss 0.6365221738815308 - score 0.0\n",
+      "2021-09-21 21:16:35,172 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:16:35,174 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:46:30,307 epoch 6 - iter 6/24 - loss 0.64238652 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:46:30,420 epoch 6 - iter 8/24 - loss 0.63944468 - samples/sec: 17.72 - lr: 0.020000\n",
-      "2021-09-08 01:46:30,537 epoch 6 - iter 10/24 - loss 0.63981252 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:46:30,641 epoch 6 - iter 12/24 - loss 0.63723737 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 01:46:30,748 epoch 6 - iter 14/24 - loss 0.63459348 - samples/sec: 18.93 - lr: 0.020000\n",
-      "2021-09-08 01:46:30,864 epoch 6 - iter 16/24 - loss 0.63459880 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:46:30,978 epoch 6 - iter 18/24 - loss 0.63492271 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,088 epoch 6 - iter 20/24 - loss 0.63378229 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,201 epoch 6 - iter 22/24 - loss 0.63480659 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,306 epoch 6 - iter 24/24 - loss 0.63476220 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,307 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:31,307 EPOCH 6 done: loss 0.6348 - lr 0.0200000\n",
-      "2021-09-08 01:46:31,382 DEV : loss 0.6365647315979004 - score 0.3333\n",
-      "2021-09-08 01:46:31,383 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:46:31,385 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:31,516 epoch 7 - iter 2/24 - loss 0.60878706 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,619 epoch 7 - iter 4/24 - loss 0.61750373 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,724 epoch 7 - iter 6/24 - loss 0.62612867 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,827 epoch 7 - iter 8/24 - loss 0.63267981 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:46:31,940 epoch 7 - iter 10/24 - loss 0.63375964 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,038 epoch 7 - iter 12/24 - loss 0.63215933 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,157 epoch 7 - iter 14/24 - loss 0.63130681 - samples/sec: 16.85 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,262 epoch 7 - iter 16/24 - loss 0.63624822 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,372 epoch 7 - iter 18/24 - loss 0.63650642 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,491 epoch 7 - iter 20/24 - loss 0.63595585 - samples/sec: 16.90 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,610 epoch 7 - iter 22/24 - loss 0.63453399 - samples/sec: 16.95 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,732 epoch 7 - iter 24/24 - loss 0.63328094 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 01:46:32,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:32,734 EPOCH 7 done: loss 0.6333 - lr 0.0200000\n",
-      "2021-09-08 01:46:32,811 DEV : loss 0.6366124153137207 - score 0.3333\n",
-      "2021-09-08 01:46:32,812 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:46:32,814 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:32,942 epoch 8 - iter 2/24 - loss 0.63096687 - samples/sec: 17.72 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,055 epoch 8 - iter 4/24 - loss 0.64510891 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,157 epoch 8 - iter 6/24 - loss 0.63983301 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,256 epoch 8 - iter 8/24 - loss 0.63841146 - samples/sec: 20.30 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,354 epoch 8 - iter 10/24 - loss 0.63699864 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,458 epoch 8 - iter 12/24 - loss 0.63509725 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,556 epoch 8 - iter 14/24 - loss 0.63561199 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,662 epoch 8 - iter 16/24 - loss 0.63527133 - samples/sec: 19.00 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,766 epoch 8 - iter 18/24 - loss 0.63710623 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,869 epoch 8 - iter 20/24 - loss 0.63856602 - samples/sec: 19.51 - lr: 0.020000\n",
-      "2021-09-08 01:46:33,970 epoch 8 - iter 22/24 - loss 0.63755828 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,085 epoch 8 - iter 24/24 - loss 0.63859774 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:34,087 EPOCH 8 done: loss 0.6386 - lr 0.0200000\n",
-      "2021-09-08 01:46:34,163 DEV : loss 0.6366041898727417 - score 0.3333\n",
-      "2021-09-08 01:46:34,164 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:46:34,166 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:34,292 epoch 9 - iter 2/24 - loss 0.62783113 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,409 epoch 9 - iter 4/24 - loss 0.62730646 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,523 epoch 9 - iter 6/24 - loss 0.62708227 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,636 epoch 9 - iter 8/24 - loss 0.62920910 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,752 epoch 9 - iter 10/24 - loss 0.62816207 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,854 epoch 9 - iter 12/24 - loss 0.63243732 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 01:46:34,955 epoch 9 - iter 14/24 - loss 0.63408347 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 01:46:35,065 epoch 9 - iter 16/24 - loss 0.63316920 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 01:46:35,165 epoch 9 - iter 18/24 - loss 0.63377255 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 01:46:35,262 epoch 9 - iter 20/24 - loss 0.63600312 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 01:46:35,372 epoch 9 - iter 22/24 - loss 0.63611460 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:46:35,473 epoch 9 - iter 24/24 - loss 0.63637242 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 01:46:35,474 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:35,474 EPOCH 9 done: loss 0.6364 - lr 0.0200000\n",
-      "2021-09-08 01:46:35,554 DEV : loss 0.6368011832237244 - score 0.3333\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:46:35,555 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:46:35,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:35,686 epoch 10 - iter 2/24 - loss 0.62906215 - samples/sec: 18.20 - lr: 0.010000\n",
-      "2021-09-08 01:46:35,796 epoch 10 - iter 4/24 - loss 0.63712178 - samples/sec: 18.36 - lr: 0.010000\n",
-      "2021-09-08 01:46:35,900 epoch 10 - iter 6/24 - loss 0.63994923 - samples/sec: 19.30 - lr: 0.010000\n",
-      "2021-09-08 01:46:35,996 epoch 10 - iter 8/24 - loss 0.64405210 - samples/sec: 20.86 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,102 epoch 10 - iter 10/24 - loss 0.64219437 - samples/sec: 18.98 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,210 epoch 10 - iter 12/24 - loss 0.64143449 - samples/sec: 18.62 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,310 epoch 10 - iter 14/24 - loss 0.64246170 - samples/sec: 20.02 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,408 epoch 10 - iter 16/24 - loss 0.64030809 - samples/sec: 20.54 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,507 epoch 10 - iter 18/24 - loss 0.64542159 - samples/sec: 20.37 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,609 epoch 10 - iter 20/24 - loss 0.64529118 - samples/sec: 19.64 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,711 epoch 10 - iter 22/24 - loss 0.64402487 - samples/sec: 19.64 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,809 epoch 10 - iter 24/24 - loss 0.64246700 - samples/sec: 20.58 - lr: 0.010000\n",
-      "2021-09-08 01:46:36,810 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:36,810 EPOCH 10 done: loss 0.6425 - lr 0.0100000\n",
-      "2021-09-08 01:46:36,899 DEV : loss 0.6368597745895386 - score 0.6667\n",
-      "2021-09-08 01:46:36,900 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:46:41,179 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:41,180 Testing using best model ...\n",
-      "2021-09-08 01:46:41,181 loading file None1/best-model.pt\n",
+      "2021-09-21 21:16:35,281 epoch 6 - iter 2/24 - loss 0.64817113 - samples/sec: 21.42 - lr: 0.020000\n",
+      "2021-09-21 21:16:35,373 epoch 6 - iter 4/24 - loss 0.65216514 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 21:16:35,480 epoch 6 - iter 6/24 - loss 0.64242278 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 21:16:35,578 epoch 6 - iter 8/24 - loss 0.64097093 - samples/sec: 20.55 - lr: 0.020000\n",
+      "2021-09-21 21:16:35,677 epoch 6 - iter 10/24 - loss 0.64117136 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 21:16:35,779 epoch 6 - iter 12/24 - loss 0.64226115 - samples/sec: 19.92 - lr: 0.020000\n",
+      "2021-09-21 21:16:35,874 epoch 6 - iter 14/24 - loss 0.63921490 - samples/sec: 21.05 - lr: 0.020000\n",
+      "2021-09-21 21:16:35,964 epoch 6 - iter 16/24 - loss 0.63851382 - samples/sec: 22.66 - lr: 0.020000\n",
+      "2021-09-21 21:16:36,052 epoch 6 - iter 18/24 - loss 0.64150807 - samples/sec: 22.78 - lr: 0.020000\n",
+      "2021-09-21 21:16:36,141 epoch 6 - iter 20/24 - loss 0.64219391 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 21:16:36,230 epoch 6 - iter 22/24 - loss 0.64457726 - samples/sec: 22.64 - lr: 0.020000\n",
+      "2021-09-21 21:16:36,319 epoch 6 - iter 24/24 - loss 0.64534693 - samples/sec: 22.48 - lr: 0.020000\n",
+      "2021-09-21 21:16:36,321 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:36,321 EPOCH 6 done: loss 0.6453 - lr 0.0200000\n",
+      "2021-09-21 21:16:36,621 DEV : loss 0.6369831562042236 - score 0.3333\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:16:36,622 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:16:36,908 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:37,039 epoch 7 - iter 2/24 - loss 0.63743365 - samples/sec: 18.14 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,150 epoch 7 - iter 4/24 - loss 0.63662633 - samples/sec: 18.11 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,268 epoch 7 - iter 6/24 - loss 0.63541211 - samples/sec: 17.13 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,376 epoch 7 - iter 8/24 - loss 0.63547698 - samples/sec: 18.55 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,487 epoch 7 - iter 10/24 - loss 0.63556658 - samples/sec: 18.19 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,597 epoch 7 - iter 12/24 - loss 0.63407284 - samples/sec: 18.33 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,698 epoch 7 - iter 14/24 - loss 0.63586064 - samples/sec: 19.84 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,811 epoch 7 - iter 16/24 - loss 0.63819381 - samples/sec: 17.74 - lr: 0.010000\n",
+      "2021-09-21 21:16:37,923 epoch 7 - iter 18/24 - loss 0.63732923 - samples/sec: 18.07 - lr: 0.010000\n",
+      "2021-09-21 21:16:38,035 epoch 7 - iter 20/24 - loss 0.63783913 - samples/sec: 17.89 - lr: 0.010000\n",
+      "2021-09-21 21:16:38,154 epoch 7 - iter 22/24 - loss 0.63818532 - samples/sec: 16.89 - lr: 0.010000\n",
+      "2021-09-21 21:16:38,268 epoch 7 - iter 24/24 - loss 0.63777145 - samples/sec: 17.72 - lr: 0.010000\n",
+      "2021-09-21 21:16:38,270 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:38,270 EPOCH 7 done: loss 0.6378 - lr 0.0100000\n",
+      "2021-09-21 21:16:38,428 DEV : loss 0.6365172863006592 - score 0.3333\n",
+      "2021-09-21 21:16:38,429 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:16:47,997 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:48,159 epoch 8 - iter 2/24 - loss 0.63009265 - samples/sec: 14.53 - lr: 0.010000\n",
+      "2021-09-21 21:16:48,294 epoch 8 - iter 4/24 - loss 0.63172458 - samples/sec: 14.95 - lr: 0.010000\n",
+      "2021-09-21 21:16:48,433 epoch 8 - iter 6/24 - loss 0.62972418 - samples/sec: 14.38 - lr: 0.010000\n",
+      "2021-09-21 21:16:48,697 epoch 8 - iter 8/24 - loss 0.63206923 - samples/sec: 7.61 - lr: 0.010000\n",
+      "2021-09-21 21:16:48,839 epoch 8 - iter 10/24 - loss 0.63374078 - samples/sec: 14.21 - lr: 0.010000\n",
+      "2021-09-21 21:16:48,975 epoch 8 - iter 12/24 - loss 0.63442327 - samples/sec: 14.68 - lr: 0.010000\n",
+      "2021-09-21 21:16:49,112 epoch 8 - iter 14/24 - loss 0.63312768 - samples/sec: 14.65 - lr: 0.010000\n",
+      "2021-09-21 21:16:49,263 epoch 8 - iter 16/24 - loss 0.63373585 - samples/sec: 13.30 - lr: 0.010000\n",
+      "2021-09-21 21:16:49,425 epoch 8 - iter 18/24 - loss 0.63480356 - samples/sec: 12.42 - lr: 0.010000\n",
+      "2021-09-21 21:16:49,563 epoch 8 - iter 20/24 - loss 0.63459178 - samples/sec: 14.53 - lr: 0.010000\n",
+      "2021-09-21 21:16:49,714 epoch 8 - iter 22/24 - loss 0.63482270 - samples/sec: 13.35 - lr: 0.010000\n",
+      "2021-09-21 21:16:49,873 epoch 8 - iter 24/24 - loss 0.63514822 - samples/sec: 12.63 - lr: 0.010000\n",
+      "2021-09-21 21:16:49,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:49,874 EPOCH 8 done: loss 0.6351 - lr 0.0100000\n",
+      "2021-09-21 21:16:51,831 DEV : loss 0.6365262269973755 - score 0.3333\n",
+      "2021-09-21 21:16:51,832 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:16:51,904 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:52,118 epoch 9 - iter 2/24 - loss 0.64333099 - samples/sec: 10.27 - lr: 0.010000\n",
+      "2021-09-21 21:16:52,272 epoch 9 - iter 4/24 - loss 0.63768598 - samples/sec: 13.04 - lr: 0.010000\n",
+      "2021-09-21 21:16:52,422 epoch 9 - iter 6/24 - loss 0.63576359 - samples/sec: 13.33 - lr: 0.010000\n",
+      "2021-09-21 21:16:52,560 epoch 9 - iter 8/24 - loss 0.63845018 - samples/sec: 14.53 - lr: 0.010000\n",
+      "2021-09-21 21:16:52,721 epoch 9 - iter 10/24 - loss 0.63672230 - samples/sec: 12.48 - lr: 0.010000\n",
+      "2021-09-21 21:16:52,863 epoch 9 - iter 12/24 - loss 0.63926310 - samples/sec: 14.19 - lr: 0.010000\n",
+      "2021-09-21 21:16:53,005 epoch 9 - iter 14/24 - loss 0.63780020 - samples/sec: 14.08 - lr: 0.010000\n",
+      "2021-09-21 21:16:53,187 epoch 9 - iter 16/24 - loss 0.63614561 - samples/sec: 11.05 - lr: 0.010000\n",
+      "2021-09-21 21:16:53,349 epoch 9 - iter 18/24 - loss 0.63697130 - samples/sec: 12.44 - lr: 0.010000\n",
+      "2021-09-21 21:16:53,529 epoch 9 - iter 20/24 - loss 0.63620184 - samples/sec: 11.11 - lr: 0.010000\n",
+      "2021-09-21 21:16:53,699 epoch 9 - iter 22/24 - loss 0.63624385 - samples/sec: 11.83 - lr: 0.010000\n",
+      "2021-09-21 21:16:53,861 epoch 9 - iter 24/24 - loss 0.63706628 - samples/sec: 12.39 - lr: 0.010000\n",
+      "2021-09-21 21:16:53,862 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:53,862 EPOCH 9 done: loss 0.6371 - lr 0.0100000\n",
+      "2021-09-21 21:16:53,993 DEV : loss 0.6367186903953552 - score 0.0\n",
+      "2021-09-21 21:16:53,994 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:16:53,995 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:54,176 epoch 10 - iter 2/24 - loss 0.62535283 - samples/sec: 14.16 - lr: 0.010000\n",
+      "2021-09-21 21:16:54,352 epoch 10 - iter 4/24 - loss 0.63115211 - samples/sec: 11.42 - lr: 0.010000\n",
+      "2021-09-21 21:16:54,501 epoch 10 - iter 6/24 - loss 0.63692294 - samples/sec: 13.41 - lr: 0.010000\n",
+      "2021-09-21 21:16:54,648 epoch 10 - iter 8/24 - loss 0.63604110 - samples/sec: 13.72 - lr: 0.010000\n",
+      "2021-09-21 21:16:54,807 epoch 10 - iter 10/24 - loss 0.63357283 - samples/sec: 12.61 - lr: 0.010000\n",
+      "2021-09-21 21:16:54,914 epoch 10 - iter 12/24 - loss 0.63255819 - samples/sec: 18.68 - lr: 0.010000\n",
+      "2021-09-21 21:16:55,040 epoch 10 - iter 14/24 - loss 0.63327179 - samples/sec: 16.10 - lr: 0.010000\n",
+      "2021-09-21 21:16:55,167 epoch 10 - iter 16/24 - loss 0.63149781 - samples/sec: 15.77 - lr: 0.010000\n",
+      "2021-09-21 21:16:55,282 epoch 10 - iter 18/24 - loss 0.63330414 - samples/sec: 17.45 - lr: 0.010000\n",
+      "2021-09-21 21:16:55,381 epoch 10 - iter 20/24 - loss 0.63389129 - samples/sec: 20.50 - lr: 0.010000\n",
+      "2021-09-21 21:16:55,514 epoch 10 - iter 22/24 - loss 0.63567063 - samples/sec: 15.04 - lr: 0.010000\n",
+      "2021-09-21 21:16:55,615 epoch 10 - iter 24/24 - loss 0.63662905 - samples/sec: 20.04 - lr: 0.010000\n",
+      "2021-09-21 21:16:55,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:55,616 EPOCH 10 done: loss 0.6366 - lr 0.0100000\n",
+      "2021-09-21 21:16:55,792 DEV : loss 0.6365327835083008 - score 0.3333\n",
+      "2021-09-21 21:16:55,794 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:17:01,281 ----------------------------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:17:01,282 Testing using best model ...\n",
+      "2021-09-21 21:17:01,283 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:46:46,070 \t0.3333\n",
-      "2021-09-08 01:46:46,070 \n",
+      "2021-09-21 21:17:06,616 \t0.6667\n",
+      "2021-09-21 21:17:06,616 \n",
       "Results:\n",
-      "- F-score (micro) 0.3333\n",
-      "- F-score (macro) 0.1667\n",
-      "- Accuracy 0.3333\n",
+      "- F-score (micro) 0.6667\n",
+      "- F-score (macro) 0.2667\n",
+      "- Accuracy 0.6667\n",
       "\n",
       "By class:\n",
       "                                        precision    recall  f1-score   support\n",
       "\n",
-      "This text entails a positive sentiment     0.0000    0.0000    0.0000         0\n",
-      " This text entails a neutral sentiment     0.5000    0.5000    0.5000         2\n",
-      "This text entails a negative sentiment     0.0000    0.0000    0.0000         1\n",
+      "This text entails a positive sentiment     0.0000    0.0000    0.0000         1\n",
+      " This text entails a neutral sentiment     0.6667    1.0000    0.8000         2\n",
+      "This text entails a negative sentiment     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                             micro avg     0.3333    0.3333    0.3333         3\n",
-      "                             macro avg     0.1667    0.1667    0.1667         3\n",
-      "                          weighted avg     0.3333    0.3333    0.3333         3\n",
-      "                           samples avg     0.3333    0.3333    0.3333         3\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:46:46,071 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.338804780876494\n"
+      "                             micro avg     0.6667    0.6667    0.6667         3\n",
+      "                             macro avg     0.2222    0.3333    0.2667         3\n",
+      "                          weighted avg     0.4444    0.6667    0.5333         3\n",
+      "                           samples avg     0.6667    0.6667    0.6667         3\n",
+      "\n",
+      "2021-09-21 21:17:06,617 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.3509163346613546\n"
      ]
     }
    ],
@@ -5944,11 +5963,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "86988d74",
+   "execution_count": 7,
+   "id": "ee3fafc0",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.3322709163346614, 0.33306772908366533, 0.34820717131474105, 0.3816733067729084, 0.3593625498007968]\n",
+      "0.018389637273946338\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -5968,25 +5999,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:07,072 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:17:35,168 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:47:11,016 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:17:39,261 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 38414.59it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 36876.00it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:11,018 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
-      "2021-09-08 01:47:11,027 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,028 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:17:39,263 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 21:17:39,272 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:39,274 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6299,26 +6330,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:11,029 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,029 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:47:11,030 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,030 Parameters:\n",
-      "2021-09-08 01:47:11,030  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:47:11,030  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:47:11,031  - patience: \"3\"\n",
-      "2021-09-08 01:47:11,031  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:47:11,031  - max_epochs: \"10\"\n",
-      "2021-09-08 01:47:11,032  - shuffle: \"True\"\n",
-      "2021-09-08 01:47:11,032  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:47:11,032  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:47:11,032 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,033 Model training base path: \"None1\"\n",
-      "2021-09-08 01:47:11,033 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,033 Device: cuda:1\n",
-      "2021-09-08 01:47:11,033 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,034 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:47:11,041 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:11,167 epoch 1 - iter 2/24 - loss 0.63032022 - samples/sec: 17.99 - lr: 0.020000\n"
+      "2021-09-21 21:17:39,274 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:39,275 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:17:39,275 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:39,275 Parameters:\n",
+      "2021-09-21 21:17:39,276  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:17:39,276  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:17:39,276  - patience: \"3\"\n",
+      "2021-09-21 21:17:39,276  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:17:39,277  - max_epochs: \"10\"\n",
+      "2021-09-21 21:17:39,277  - shuffle: \"True\"\n",
+      "2021-09-21 21:17:39,277  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:17:39,278  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:17:39,278 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:39,278 Model training base path: \"None1\"\n",
+      "2021-09-21 21:17:39,278 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:39,279 Device: cuda:0\n",
+      "2021-09-21 21:17:39,279 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:39,279 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:17:39,285 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:39,407 epoch 1 - iter 2/24 - loss 0.63225573 - samples/sec: 18.90 - lr: 0.020000\n"
      ]
     },
     {
@@ -6332,246 +6363,233 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:11,282 epoch 1 - iter 4/24 - loss 0.62803896 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,401 epoch 1 - iter 6/24 - loss 0.62868383 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,506 epoch 1 - iter 8/24 - loss 0.63699736 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,615 epoch 1 - iter 10/24 - loss 0.63702615 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,729 epoch 1 - iter 12/24 - loss 0.63733753 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,842 epoch 1 - iter 14/24 - loss 0.63841598 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 01:47:11,931 epoch 1 - iter 16/24 - loss 0.63801572 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:47:12,027 epoch 1 - iter 18/24 - loss 0.63709382 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 01:47:12,125 epoch 1 - iter 20/24 - loss 0.63782848 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 01:47:12,225 epoch 1 - iter 22/24 - loss 0.63874631 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 01:47:12,316 epoch 1 - iter 24/24 - loss 0.63794315 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:47:12,317 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:12,317 EPOCH 1 done: loss 0.6379 - lr 0.0200000\n",
-      "2021-09-08 01:47:12,371 DEV : loss 0.6365196704864502 - score 0.6667\n",
-      "2021-09-08 01:47:12,372 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:39,523 epoch 1 - iter 4/24 - loss 0.63404168 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 21:17:39,631 epoch 1 - iter 6/24 - loss 0.63619886 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:17:39,737 epoch 1 - iter 8/24 - loss 0.63574886 - samples/sec: 19.02 - lr: 0.020000\n",
+      "2021-09-21 21:17:39,835 epoch 1 - iter 10/24 - loss 0.63545286 - samples/sec: 20.42 - lr: 0.020000\n",
+      "2021-09-21 21:17:39,943 epoch 1 - iter 12/24 - loss 0.63561576 - samples/sec: 18.65 - lr: 0.020000\n",
+      "2021-09-21 21:17:40,052 epoch 1 - iter 14/24 - loss 0.63697612 - samples/sec: 18.42 - lr: 0.020000\n",
+      "2021-09-21 21:17:40,162 epoch 1 - iter 16/24 - loss 0.63634041 - samples/sec: 18.30 - lr: 0.020000\n",
+      "2021-09-21 21:17:40,272 epoch 1 - iter 18/24 - loss 0.63567456 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 21:17:40,380 epoch 1 - iter 20/24 - loss 0.63521867 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 21:17:40,495 epoch 1 - iter 22/24 - loss 0.63405988 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 21:17:40,623 epoch 1 - iter 24/24 - loss 0.63382826 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:17:40,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:40,624 EPOCH 1 done: loss 0.6338 - lr 0.0200000\n",
+      "2021-09-21 21:17:40,705 DEV : loss 0.6365157961845398 - score 0.3333\n",
+      "2021-09-21 21:17:40,705 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:47:19,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:20,106 epoch 2 - iter 2/24 - loss 0.63517794 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,227 epoch 2 - iter 4/24 - loss 0.64263178 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,342 epoch 2 - iter 6/24 - loss 0.64058725 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,457 epoch 2 - iter 8/24 - loss 0.63639705 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,577 epoch 2 - iter 10/24 - loss 0.63858743 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,689 epoch 2 - iter 12/24 - loss 0.63843838 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,804 epoch 2 - iter 14/24 - loss 0.63787668 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 01:47:20,920 epoch 2 - iter 16/24 - loss 0.63928667 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,031 epoch 2 - iter 18/24 - loss 0.63797337 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,139 epoch 2 - iter 20/24 - loss 0.63757098 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,253 epoch 2 - iter 22/24 - loss 0.63792653 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,366 epoch 2 - iter 24/24 - loss 0.63833079 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 01:47:21,367 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:21,367 EPOCH 2 done: loss 0.6383 - lr 0.0200000\n",
-      "2021-09-08 01:47:22,003 DEV : loss 0.6366268396377563 - score 1.0\n",
-      "2021-09-08 01:47:22,004 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:44,733 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:44,893 epoch 2 - iter 2/24 - loss 0.63397866 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,047 epoch 2 - iter 4/24 - loss 0.63856897 - samples/sec: 12.97 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,211 epoch 2 - iter 6/24 - loss 0.64144599 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,345 epoch 2 - iter 8/24 - loss 0.64130971 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,507 epoch 2 - iter 10/24 - loss 0.63913981 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,666 epoch 2 - iter 12/24 - loss 0.64063791 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,821 epoch 2 - iter 14/24 - loss 0.64144622 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,987 epoch 2 - iter 16/24 - loss 0.64118615 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 21:17:46,138 epoch 2 - iter 18/24 - loss 0.64097778 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:17:46,291 epoch 2 - iter 20/24 - loss 0.64085428 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 21:17:46,467 epoch 2 - iter 22/24 - loss 0.64044859 - samples/sec: 11.36 - lr: 0.020000\n",
+      "2021-09-21 21:17:46,614 epoch 2 - iter 24/24 - loss 0.64220693 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 21:17:46,615 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:46,615 EPOCH 2 done: loss 0.6422 - lr 0.0200000\n",
+      "2021-09-21 21:17:46,731 DEV : loss 0.636523962020874 - score 0.6667\n",
+      "2021-09-21 21:17:46,731 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:47:26,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:26,634 epoch 3 - iter 2/24 - loss 0.62962717 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,721 epoch 3 - iter 4/24 - loss 0.62975013 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,809 epoch 3 - iter 6/24 - loss 0.63101807 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,900 epoch 3 - iter 8/24 - loss 0.63342691 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,988 epoch 3 - iter 10/24 - loss 0.63497328 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,075 epoch 3 - iter 12/24 - loss 0.63226624 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,163 epoch 3 - iter 14/24 - loss 0.63167050 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,251 epoch 3 - iter 16/24 - loss 0.63145303 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,339 epoch 3 - iter 18/24 - loss 0.63358012 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,426 epoch 3 - iter 20/24 - loss 0.63307135 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,516 epoch 3 - iter 22/24 - loss 0.63264938 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,603 epoch 3 - iter 24/24 - loss 0.63297372 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:27,604 EPOCH 3 done: loss 0.6330 - lr 0.0200000\n",
-      "2021-09-08 01:47:27,752 DEV : loss 0.636529803276062 - score 0.3333\n",
-      "2021-09-08 01:47:27,752 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:47:27,790 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:27,891 epoch 4 - iter 2/24 - loss 0.63789776 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,979 epoch 4 - iter 4/24 - loss 0.64561710 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,067 epoch 4 - iter 6/24 - loss 0.63754480 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,156 epoch 4 - iter 8/24 - loss 0.63937675 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,244 epoch 4 - iter 10/24 - loss 0.64010742 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,335 epoch 4 - iter 12/24 - loss 0.63936293 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,423 epoch 4 - iter 14/24 - loss 0.63850753 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,510 epoch 4 - iter 16/24 - loss 0.63960859 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,599 epoch 4 - iter 18/24 - loss 0.63997755 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,687 epoch 4 - iter 20/24 - loss 0.63944341 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,775 epoch 4 - iter 22/24 - loss 0.63863797 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,863 epoch 4 - iter 24/24 - loss 0.63719691 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,864 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:28,864 EPOCH 4 done: loss 0.6372 - lr 0.0200000\n",
-      "2021-09-08 01:47:29,168 DEV : loss 0.6365766525268555 - score 0.3333\n",
-      "2021-09-08 01:47:29,168 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:47:29,263 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:29,398 epoch 5 - iter 2/24 - loss 0.64626187 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 01:47:29,518 epoch 5 - iter 4/24 - loss 0.64417003 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:47:29,632 epoch 5 - iter 6/24 - loss 0.64039357 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:47:29,747 epoch 5 - iter 8/24 - loss 0.63886445 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:47:29,859 epoch 5 - iter 10/24 - loss 0.63564485 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:47:29,970 epoch 5 - iter 12/24 - loss 0.63732708 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,088 epoch 5 - iter 14/24 - loss 0.64004318 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,207 epoch 5 - iter 16/24 - loss 0.64130479 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,321 epoch 5 - iter 18/24 - loss 0.64137937 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,433 epoch 5 - iter 20/24 - loss 0.64082024 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,550 epoch 5 - iter 22/24 - loss 0.64159737 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,667 epoch 5 - iter 24/24 - loss 0.64081877 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:47:30,668 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:30,668 EPOCH 5 done: loss 0.6408 - lr 0.0200000\n",
-      "2021-09-08 01:47:37,867 DEV : loss 0.636725127696991 - score 0.3333\n",
-      "2021-09-08 01:47:37,868 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:47:37,892 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:38,047 epoch 6 - iter 2/24 - loss 0.61337680 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,167 epoch 6 - iter 4/24 - loss 0.62196653 - samples/sec: 16.74 - lr: 0.020000\n"
+      "2021-09-21 21:17:50,663 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:50,779 epoch 3 - iter 2/24 - loss 0.65433481 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 21:17:50,883 epoch 3 - iter 4/24 - loss 0.64958814 - samples/sec: 19.49 - lr: 0.020000\n",
+      "2021-09-21 21:17:50,985 epoch 3 - iter 6/24 - loss 0.64897370 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,082 epoch 3 - iter 8/24 - loss 0.64380024 - samples/sec: 20.78 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,183 epoch 3 - iter 10/24 - loss 0.64043427 - samples/sec: 20.02 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,285 epoch 3 - iter 12/24 - loss 0.63984724 - samples/sec: 19.74 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,387 epoch 3 - iter 14/24 - loss 0.63858172 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,484 epoch 3 - iter 16/24 - loss 0.63946521 - samples/sec: 20.84 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,593 epoch 3 - iter 18/24 - loss 0.63818205 - samples/sec: 18.48 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,693 epoch 3 - iter 20/24 - loss 0.63697355 - samples/sec: 20.04 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,802 epoch 3 - iter 22/24 - loss 0.63757832 - samples/sec: 18.44 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,904 epoch 3 - iter 24/24 - loss 0.63723844 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 21:17:51,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:51,905 EPOCH 3 done: loss 0.6372 - lr 0.0200000\n",
+      "2021-09-21 21:17:52,071 DEV : loss 0.6366634368896484 - score 0.0\n",
+      "2021-09-21 21:17:52,072 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:17:52,156 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:52,281 epoch 4 - iter 2/24 - loss 0.63704118 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 21:17:52,384 epoch 4 - iter 4/24 - loss 0.63442512 - samples/sec: 19.55 - lr: 0.020000\n",
+      "2021-09-21 21:17:52,482 epoch 4 - iter 6/24 - loss 0.63407493 - samples/sec: 20.55 - lr: 0.020000\n",
+      "2021-09-21 21:17:52,582 epoch 4 - iter 8/24 - loss 0.63478914 - samples/sec: 20.09 - lr: 0.020000\n",
+      "2021-09-21 21:17:52,685 epoch 4 - iter 10/24 - loss 0.63267351 - samples/sec: 19.69 - lr: 0.020000\n",
+      "2021-09-21 21:17:52,791 epoch 4 - iter 12/24 - loss 0.63172563 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 21:17:52,894 epoch 4 - iter 14/24 - loss 0.63151259 - samples/sec: 19.45 - lr: 0.020000\n",
+      "2021-09-21 21:17:52,998 epoch 4 - iter 16/24 - loss 0.63035658 - samples/sec: 19.40 - lr: 0.020000\n",
+      "2021-09-21 21:17:53,095 epoch 4 - iter 18/24 - loss 0.63402890 - samples/sec: 20.73 - lr: 0.020000\n",
+      "2021-09-21 21:17:53,190 epoch 4 - iter 20/24 - loss 0.63521999 - samples/sec: 20.98 - lr: 0.020000\n",
+      "2021-09-21 21:17:53,290 epoch 4 - iter 22/24 - loss 0.63484776 - samples/sec: 20.26 - lr: 0.020000\n",
+      "2021-09-21 21:17:53,390 epoch 4 - iter 24/24 - loss 0.63511620 - samples/sec: 20.11 - lr: 0.020000\n",
+      "2021-09-21 21:17:53,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:53,391 EPOCH 4 done: loss 0.6351 - lr 0.0200000\n",
+      "2021-09-21 21:17:53,489 DEV : loss 0.636628270149231 - score 0.3333\n",
+      "2021-09-21 21:17:53,489 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:17:53,491 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:53,666 epoch 5 - iter 2/24 - loss 0.63379407 - samples/sec: 14.57 - lr: 0.020000\n",
+      "2021-09-21 21:17:53,792 epoch 5 - iter 4/24 - loss 0.63305153 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 21:17:53,932 epoch 5 - iter 6/24 - loss 0.63556412 - samples/sec: 14.31 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,061 epoch 5 - iter 8/24 - loss 0.63591154 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,202 epoch 5 - iter 10/24 - loss 0.63477673 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,335 epoch 5 - iter 12/24 - loss 0.63642261 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,487 epoch 5 - iter 14/24 - loss 0.63651159 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,615 epoch 5 - iter 16/24 - loss 0.63776124 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,766 epoch 5 - iter 18/24 - loss 0.63704418 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,902 epoch 5 - iter 20/24 - loss 0.63658135 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 21:17:55,056 epoch 5 - iter 22/24 - loss 0.63707477 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:17:55,183 epoch 5 - iter 24/24 - loss 0.63699203 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 21:17:55,185 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,185 EPOCH 5 done: loss 0.6370 - lr 0.0200000\n",
+      "2021-09-21 21:17:55,302 DEV : loss 0.636533260345459 - score 0.6667\n",
+      "2021-09-21 21:17:55,304 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:17:55,306 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:55,475 epoch 6 - iter 2/24 - loss 0.64886490 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 21:17:55,590 epoch 6 - iter 4/24 - loss 0.64858963 - samples/sec: 17.47 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:38,282 epoch 6 - iter 6/24 - loss 0.61756964 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,396 epoch 6 - iter 8/24 - loss 0.61797606 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,510 epoch 6 - iter 10/24 - loss 0.61753912 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,622 epoch 6 - iter 12/24 - loss 0.62105766 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,738 epoch 6 - iter 14/24 - loss 0.62276425 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,849 epoch 6 - iter 16/24 - loss 0.62292420 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,961 epoch 6 - iter 18/24 - loss 0.62395148 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,082 epoch 6 - iter 20/24 - loss 0.62734043 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,198 epoch 6 - iter 22/24 - loss 0.62915825 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,310 epoch 6 - iter 24/24 - loss 0.62983362 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,311 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:39,311 EPOCH 6 done: loss 0.6298 - lr 0.0200000\n",
-      "2021-09-08 01:47:39,374 DEV : loss 0.6365346908569336 - score 0.3333\n",
+      "2021-09-21 21:17:55,731 epoch 6 - iter 6/24 - loss 0.64569437 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 21:17:55,864 epoch 6 - iter 8/24 - loss 0.64379315 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,046 epoch 6 - iter 10/24 - loss 0.64192349 - samples/sec: 11.00 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,234 epoch 6 - iter 12/24 - loss 0.63990581 - samples/sec: 10.66 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,395 epoch 6 - iter 14/24 - loss 0.64132657 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,590 epoch 6 - iter 16/24 - loss 0.64172972 - samples/sec: 10.27 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,790 epoch 6 - iter 18/24 - loss 0.64149122 - samples/sec: 10.05 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,964 epoch 6 - iter 20/24 - loss 0.64016565 - samples/sec: 11.49 - lr: 0.020000\n",
+      "2021-09-21 21:17:57,157 epoch 6 - iter 22/24 - loss 0.64084908 - samples/sec: 10.42 - lr: 0.020000\n",
+      "2021-09-21 21:17:57,357 epoch 6 - iter 24/24 - loss 0.64027628 - samples/sec: 10.01 - lr: 0.020000\n",
+      "2021-09-21 21:17:57,358 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:57,359 EPOCH 6 done: loss 0.6403 - lr 0.0200000\n",
+      "2021-09-21 21:17:57,485 DEV : loss 0.636587381362915 - score 0.3333\n",
       "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:47:39,375 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:47:39,376 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:39,495 epoch 7 - iter 2/24 - loss 0.65281618 - samples/sec: 19.49 - lr: 0.010000\n",
-      "2021-09-08 01:47:39,602 epoch 7 - iter 4/24 - loss 0.64356005 - samples/sec: 18.75 - lr: 0.010000\n",
-      "2021-09-08 01:47:39,716 epoch 7 - iter 6/24 - loss 0.64448115 - samples/sec: 17.69 - lr: 0.010000\n",
-      "2021-09-08 01:47:39,822 epoch 7 - iter 8/24 - loss 0.64626230 - samples/sec: 18.98 - lr: 0.010000\n",
-      "2021-09-08 01:47:39,931 epoch 7 - iter 10/24 - loss 0.64713040 - samples/sec: 18.34 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,045 epoch 7 - iter 12/24 - loss 0.64244237 - samples/sec: 17.72 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,159 epoch 7 - iter 14/24 - loss 0.63825351 - samples/sec: 17.55 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,265 epoch 7 - iter 16/24 - loss 0.63820424 - samples/sec: 18.88 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,378 epoch 7 - iter 18/24 - loss 0.63731670 - samples/sec: 17.88 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,491 epoch 7 - iter 20/24 - loss 0.63851544 - samples/sec: 17.69 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,600 epoch 7 - iter 22/24 - loss 0.63835554 - samples/sec: 18.47 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,709 epoch 7 - iter 24/24 - loss 0.63857432 - samples/sec: 18.52 - lr: 0.010000\n",
-      "2021-09-08 01:47:40,710 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:40,710 EPOCH 7 done: loss 0.6386 - lr 0.0100000\n",
-      "2021-09-08 01:47:40,774 DEV : loss 0.6365576982498169 - score 0.6667\n",
-      "2021-09-08 01:47:40,775 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:47:40,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:40,899 epoch 8 - iter 2/24 - loss 0.62705931 - samples/sec: 19.37 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,017 epoch 8 - iter 4/24 - loss 0.63489310 - samples/sec: 17.09 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,130 epoch 8 - iter 6/24 - loss 0.63930654 - samples/sec: 17.78 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,235 epoch 8 - iter 8/24 - loss 0.64094350 - samples/sec: 19.10 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,338 epoch 8 - iter 10/24 - loss 0.63887571 - samples/sec: 19.69 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,449 epoch 8 - iter 12/24 - loss 0.64219420 - samples/sec: 18.06 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,570 epoch 8 - iter 14/24 - loss 0.64483293 - samples/sec: 16.65 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,680 epoch 8 - iter 16/24 - loss 0.64797301 - samples/sec: 18.16 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,794 epoch 8 - iter 18/24 - loss 0.64867126 - samples/sec: 17.62 - lr: 0.010000\n",
-      "2021-09-08 01:47:41,910 epoch 8 - iter 20/24 - loss 0.64898488 - samples/sec: 17.31 - lr: 0.010000\n",
-      "2021-09-08 01:47:42,024 epoch 8 - iter 22/24 - loss 0.64793273 - samples/sec: 17.71 - lr: 0.010000\n",
-      "2021-09-08 01:47:42,140 epoch 8 - iter 24/24 - loss 0.64625668 - samples/sec: 17.26 - lr: 0.010000\n",
-      "2021-09-08 01:47:42,141 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:42,142 EPOCH 8 done: loss 0.6463 - lr 0.0100000\n",
-      "2021-09-08 01:47:42,310 DEV : loss 0.6365256309509277 - score 0.6667\n",
-      "2021-09-08 01:47:42,310 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:47:42,390 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:42,495 epoch 9 - iter 2/24 - loss 0.62921801 - samples/sec: 21.81 - lr: 0.010000\n",
-      "2021-09-08 01:47:42,594 epoch 9 - iter 4/24 - loss 0.63286830 - samples/sec: 20.23 - lr: 0.010000\n",
-      "2021-09-08 01:47:42,700 epoch 9 - iter 6/24 - loss 0.63486619 - samples/sec: 18.99 - lr: 0.010000\n",
-      "2021-09-08 01:47:42,809 epoch 9 - iter 8/24 - loss 0.63031796 - samples/sec: 18.50 - lr: 0.010000\n",
-      "2021-09-08 01:47:42,917 epoch 9 - iter 10/24 - loss 0.62924221 - samples/sec: 18.57 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,030 epoch 9 - iter 12/24 - loss 0.62666584 - samples/sec: 17.88 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,144 epoch 9 - iter 14/24 - loss 0.62861704 - samples/sec: 17.62 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,266 epoch 9 - iter 16/24 - loss 0.62839650 - samples/sec: 16.36 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,374 epoch 9 - iter 18/24 - loss 0.62653658 - samples/sec: 18.77 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,487 epoch 9 - iter 20/24 - loss 0.62973326 - samples/sec: 17.77 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,601 epoch 9 - iter 22/24 - loss 0.63020786 - samples/sec: 17.57 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,716 epoch 9 - iter 24/24 - loss 0.63118468 - samples/sec: 17.44 - lr: 0.010000\n",
-      "2021-09-08 01:47:43,717 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:43,718 EPOCH 9 done: loss 0.6312 - lr 0.0100000\n",
-      "2021-09-08 01:47:43,880 DEV : loss 0.6365152597427368 - score 0.6667\n",
-      "2021-09-08 01:47:43,881 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:47:43,951 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:44,074 epoch 10 - iter 2/24 - loss 0.65541571 - samples/sec: 18.66 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,189 epoch 10 - iter 4/24 - loss 0.65143007 - samples/sec: 17.48 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,304 epoch 10 - iter 6/24 - loss 0.64836073 - samples/sec: 17.41 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,420 epoch 10 - iter 8/24 - loss 0.64633102 - samples/sec: 17.42 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,526 epoch 10 - iter 10/24 - loss 0.64449260 - samples/sec: 18.87 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,638 epoch 10 - iter 12/24 - loss 0.64305416 - samples/sec: 18.00 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,749 epoch 10 - iter 14/24 - loss 0.64373162 - samples/sec: 18.00 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,861 epoch 10 - iter 16/24 - loss 0.64047589 - samples/sec: 18.08 - lr: 0.010000\n",
-      "2021-09-08 01:47:44,970 epoch 10 - iter 18/24 - loss 0.64139449 - samples/sec: 18.41 - lr: 0.010000\n",
-      "2021-09-08 01:47:45,081 epoch 10 - iter 20/24 - loss 0.64015943 - samples/sec: 18.02 - lr: 0.010000\n",
-      "2021-09-08 01:47:45,196 epoch 10 - iter 22/24 - loss 0.63572050 - samples/sec: 17.52 - lr: 0.010000\n",
-      "2021-09-08 01:47:45,314 epoch 10 - iter 24/24 - loss 0.63534806 - samples/sec: 16.94 - lr: 0.010000\n",
-      "2021-09-08 01:47:45,316 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:45,316 EPOCH 10 done: loss 0.6353 - lr 0.0100000\n",
-      "2021-09-08 01:47:45,376 DEV : loss 0.6365203857421875 - score 0.0\n",
+      "2021-09-21 21:17:57,486 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:17:57,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:57,717 epoch 7 - iter 2/24 - loss 0.64860678 - samples/sec: 10.82 - lr: 0.010000\n",
+      "2021-09-21 21:17:57,897 epoch 7 - iter 4/24 - loss 0.64091852 - samples/sec: 11.20 - lr: 0.010000\n",
+      "2021-09-21 21:17:58,075 epoch 7 - iter 6/24 - loss 0.64533213 - samples/sec: 11.23 - lr: 0.010000\n",
+      "2021-09-21 21:17:58,243 epoch 7 - iter 8/24 - loss 0.64184152 - samples/sec: 11.95 - lr: 0.010000\n",
+      "2021-09-21 21:17:58,376 epoch 7 - iter 10/24 - loss 0.64099825 - samples/sec: 15.11 - lr: 0.010000\n",
+      "2021-09-21 21:17:58,547 epoch 7 - iter 12/24 - loss 0.64053268 - samples/sec: 11.70 - lr: 0.010000\n",
+      "2021-09-21 21:17:58,710 epoch 7 - iter 14/24 - loss 0.63825873 - samples/sec: 12.31 - lr: 0.010000\n",
+      "2021-09-21 21:17:58,825 epoch 7 - iter 16/24 - loss 0.63726280 - samples/sec: 17.58 - lr: 0.010000\n",
+      "2021-09-21 21:17:58,923 epoch 7 - iter 18/24 - loss 0.63583506 - samples/sec: 20.50 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,021 epoch 7 - iter 20/24 - loss 0.63550936 - samples/sec: 20.53 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,126 epoch 7 - iter 22/24 - loss 0.63513194 - samples/sec: 19.13 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,236 epoch 7 - iter 24/24 - loss 0.63652128 - samples/sec: 18.32 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,237 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:59,238 EPOCH 7 done: loss 0.6365 - lr 0.0100000\n",
+      "2021-09-21 21:17:59,309 DEV : loss 0.6365916728973389 - score 0.3333\n",
+      "2021-09-21 21:17:59,310 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:17:59,313 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:59,437 epoch 8 - iter 2/24 - loss 0.63735524 - samples/sec: 18.47 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,542 epoch 8 - iter 4/24 - loss 0.64008114 - samples/sec: 19.06 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,650 epoch 8 - iter 6/24 - loss 0.63810257 - samples/sec: 18.68 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,747 epoch 8 - iter 8/24 - loss 0.63764474 - samples/sec: 20.81 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,850 epoch 8 - iter 10/24 - loss 0.63514498 - samples/sec: 19.42 - lr: 0.010000\n",
+      "2021-09-21 21:17:59,949 epoch 8 - iter 12/24 - loss 0.63449638 - samples/sec: 20.38 - lr: 0.010000\n",
+      "2021-09-21 21:18:00,042 epoch 8 - iter 14/24 - loss 0.63658422 - samples/sec: 21.51 - lr: 0.010000\n",
+      "2021-09-21 21:18:00,135 epoch 8 - iter 16/24 - loss 0.63698271 - samples/sec: 21.79 - lr: 0.010000\n",
+      "2021-09-21 21:18:00,231 epoch 8 - iter 18/24 - loss 0.63606221 - samples/sec: 20.92 - lr: 0.010000\n",
+      "2021-09-21 21:18:00,342 epoch 8 - iter 20/24 - loss 0.63535795 - samples/sec: 18.03 - lr: 0.010000\n",
+      "2021-09-21 21:18:00,438 epoch 8 - iter 22/24 - loss 0.63399490 - samples/sec: 20.94 - lr: 0.010000\n",
+      "2021-09-21 21:18:00,540 epoch 8 - iter 24/24 - loss 0.63390304 - samples/sec: 19.73 - lr: 0.010000\n",
+      "2021-09-21 21:18:00,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:00,542 EPOCH 8 done: loss 0.6339 - lr 0.0100000\n",
+      "2021-09-21 21:18:00,750 DEV : loss 0.6367752552032471 - score 0.3333\n",
+      "2021-09-21 21:18:00,751 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:18:00,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:00,956 epoch 9 - iter 2/24 - loss 0.63654685 - samples/sec: 18.70 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,051 epoch 9 - iter 4/24 - loss 0.63982961 - samples/sec: 21.23 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,146 epoch 9 - iter 6/24 - loss 0.63885132 - samples/sec: 21.27 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,249 epoch 9 - iter 8/24 - loss 0.63940199 - samples/sec: 19.51 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,353 epoch 9 - iter 10/24 - loss 0.63773285 - samples/sec: 19.28 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,455 epoch 9 - iter 12/24 - loss 0.63870043 - samples/sec: 19.74 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,558 epoch 9 - iter 14/24 - loss 0.63894179 - samples/sec: 19.46 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,657 epoch 9 - iter 16/24 - loss 0.63883648 - samples/sec: 20.39 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,764 epoch 9 - iter 18/24 - loss 0.63943740 - samples/sec: 18.71 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,870 epoch 9 - iter 20/24 - loss 0.63934164 - samples/sec: 19.15 - lr: 0.010000\n",
+      "2021-09-21 21:18:01,974 epoch 9 - iter 22/24 - loss 0.63818421 - samples/sec: 19.27 - lr: 0.010000\n",
+      "2021-09-21 21:18:02,069 epoch 9 - iter 24/24 - loss 0.63764539 - samples/sec: 21.11 - lr: 0.010000\n",
+      "2021-09-21 21:18:02,071 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:02,071 EPOCH 9 done: loss 0.6376 - lr 0.0100000\n",
+      "2021-09-21 21:18:02,254 DEV : loss 0.6365166902542114 - score 0.3333\n",
+      "2021-09-21 21:18:02,254 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:18:02,335 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:02,465 epoch 10 - iter 2/24 - loss 0.63911659 - samples/sec: 19.06 - lr: 0.010000\n",
+      "2021-09-21 21:18:02,567 epoch 10 - iter 4/24 - loss 0.63462010 - samples/sec: 19.77 - lr: 0.010000\n",
+      "2021-09-21 21:18:02,661 epoch 10 - iter 6/24 - loss 0.62999594 - samples/sec: 21.59 - lr: 0.010000\n",
+      "2021-09-21 21:18:02,762 epoch 10 - iter 8/24 - loss 0.63479284 - samples/sec: 19.96 - lr: 0.010000\n",
+      "2021-09-21 21:18:02,859 epoch 10 - iter 10/24 - loss 0.63454387 - samples/sec: 20.64 - lr: 0.010000\n",
+      "2021-09-21 21:18:02,956 epoch 10 - iter 12/24 - loss 0.63821948 - samples/sec: 20.64 - lr: 0.010000\n",
+      "2021-09-21 21:18:03,051 epoch 10 - iter 14/24 - loss 0.63913287 - samples/sec: 21.21 - lr: 0.010000\n",
+      "2021-09-21 21:18:03,149 epoch 10 - iter 16/24 - loss 0.63853074 - samples/sec: 20.49 - lr: 0.010000\n",
+      "2021-09-21 21:18:03,264 epoch 10 - iter 18/24 - loss 0.63735069 - samples/sec: 17.52 - lr: 0.010000\n",
+      "2021-09-21 21:18:03,367 epoch 10 - iter 20/24 - loss 0.63664406 - samples/sec: 19.61 - lr: 0.010000\n",
+      "2021-09-21 21:18:03,471 epoch 10 - iter 22/24 - loss 0.63616327 - samples/sec: 19.23 - lr: 0.010000\n",
+      "2021-09-21 21:18:03,571 epoch 10 - iter 24/24 - loss 0.63693495 - samples/sec: 20.11 - lr: 0.010000\n",
+      "2021-09-21 21:18:03,572 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:03,573 EPOCH 10 done: loss 0.6369 - lr 0.0100000\n",
+      "2021-09-21 21:18:03,749 DEV : loss 0.6365296840667725 - score 0.3333\n",
       "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:47:45,377 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:47:49,333 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:49,334 Testing using best model ...\n",
-      "2021-09-08 01:47:49,336 loading file None1/best-model.pt\n",
+      "2021-09-21 21:18:03,749 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:18:13,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:13,283 Testing using best model ...\n",
+      "2021-09-21 21:18:13,285 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:47:54,417 \t0.3333\n"
+      "2021-09-21 21:18:18,500 \t0.6667\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:54,418 \n",
+      "2021-09-21 21:18:18,500 \n",
       "Results:\n",
-      "- F-score (micro) 0.3333\n",
-      "- F-score (macro) 0.2222\n",
-      "- Accuracy 0.3333\n",
+      "- F-score (micro) 0.6667\n",
+      "- F-score (macro) 0.2667\n",
+      "- Accuracy 0.6667\n",
       "\n",
       "By class:\n",
       "                                                          precision    recall  f1-score   support\n",
       "\n",
-      "   having the quality of something harmful or unpleasant     0.0000    0.0000    0.0000         1\n",
-      "one who does not side with any party in a war or dispute     0.5000    1.0000    0.6667         1\n",
-      "                             involving advantage or good     0.0000    0.0000    0.0000         1\n",
+      "   having the quality of something harmful or unpleasant     0.6667    1.0000    0.8000         2\n",
+      "one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         1\n",
+      "                             involving advantage or good     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                               micro avg     0.3333    0.3333    0.3333         3\n",
-      "                                               macro avg     0.1667    0.3333    0.2222         3\n",
-      "                                            weighted avg     0.1667    0.3333    0.2222         3\n",
-      "                                             samples avg     0.3333    0.3333    0.3333         3\n",
+      "                                               micro avg     0.6667    0.6667    0.6667         3\n",
+      "                                               macro avg     0.2222    0.3333    0.2667         3\n",
+      "                                            weighted avg     0.4444    0.6667    0.5333         3\n",
+      "                                             samples avg     0.6667    0.6667    0.6667         3\n",
       "\n",
-      "2021-09-08 01:47:54,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:11,860 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:18:18,501 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:49,828 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:48:16,611 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 38690.20it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:48:16,614 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n"
+      "2021-09-21 21:18:54,532 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 27/27 [00:00<00:00, 38453.72it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:48:17,342 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,344 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:18:54,534 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 21:18:54,544 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,546 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6884,215 +6902,222 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:48:17,345 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,345 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:48:17,346 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,346 Parameters:\n",
-      "2021-09-08 01:48:17,346  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:48:17,346  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:48:17,347  - patience: \"3\"\n",
-      "2021-09-08 01:48:17,347  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:48:17,347  - max_epochs: \"10\"\n",
-      "2021-09-08 01:48:17,347  - shuffle: \"True\"\n",
-      "2021-09-08 01:48:17,348  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:48:17,348  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:48:17,348 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,349 Model training base path: \"None1\"\n",
-      "2021-09-08 01:48:17,349 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,349 Device: cuda:1\n",
-      "2021-09-08 01:48:17,349 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,350 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:48:17,356 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,504 epoch 1 - iter 2/24 - loss 0.63033643 - samples/sec: 17.32 - lr: 0.020000\n",
-      "2021-09-08 01:48:17,594 epoch 1 - iter 4/24 - loss 0.63293649 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 01:48:17,683 epoch 1 - iter 6/24 - loss 0.63927542 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:48:17,790 epoch 1 - iter 8/24 - loss 0.63976251 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 01:48:17,895 epoch 1 - iter 10/24 - loss 0.63733894 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 01:48:17,996 epoch 1 - iter 12/24 - loss 0.63756394 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 01:48:18,094 epoch 1 - iter 14/24 - loss 0.63727184 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 01:48:18,195 epoch 1 - iter 16/24 - loss 0.63664135 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 01:48:18,297 epoch 1 - iter 18/24 - loss 0.63679755 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 01:48:18,408 epoch 1 - iter 20/24 - loss 0.63616731 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:48:18,509 epoch 1 - iter 22/24 - loss 0.63674651 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 01:48:18,613 epoch 1 - iter 24/24 - loss 0.63714619 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 01:48:18,614 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:18,614 EPOCH 1 done: loss 0.6371 - lr 0.0200000\n",
-      "2021-09-08 01:48:18,702 DEV : loss 0.6365897059440613 - score 0.6667\n",
-      "2021-09-08 01:48:18,703 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:48:22,853 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:22,978 epoch 2 - iter 2/24 - loss 0.64038506 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,088 epoch 2 - iter 4/24 - loss 0.63580510 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,187 epoch 2 - iter 6/24 - loss 0.63362877 - samples/sec: 20.21 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,288 epoch 2 - iter 8/24 - loss 0.63809047 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,400 epoch 2 - iter 10/24 - loss 0.63770294 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,511 epoch 2 - iter 12/24 - loss 0.63769531 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,624 epoch 2 - iter 14/24 - loss 0.63536845 - samples/sec: 17.70 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,734 epoch 2 - iter 16/24 - loss 0.63578929 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,836 epoch 2 - iter 18/24 - loss 0.63677529 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 01:48:23,933 epoch 2 - iter 20/24 - loss 0.63623321 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,031 epoch 2 - iter 22/24 - loss 0.63632428 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,135 epoch 2 - iter 24/24 - loss 0.63595584 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,136 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:24,136 EPOCH 2 done: loss 0.6360 - lr 0.0200000\n",
-      "2021-09-08 01:48:24,213 DEV : loss 0.6365184783935547 - score 0.3333\n",
-      "2021-09-08 01:48:24,214 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:48:24,216 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:24,345 epoch 3 - iter 2/24 - loss 0.65859523 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,448 epoch 3 - iter 4/24 - loss 0.64456858 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,559 epoch 3 - iter 6/24 - loss 0.64109802 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,655 epoch 3 - iter 8/24 - loss 0.64049584 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,749 epoch 3 - iter 10/24 - loss 0.63573695 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,852 epoch 3 - iter 12/24 - loss 0.63377803 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 01:48:24,951 epoch 3 - iter 14/24 - loss 0.63458143 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,064 epoch 3 - iter 16/24 - loss 0.63527563 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,175 epoch 3 - iter 18/24 - loss 0.63613513 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,284 epoch 3 - iter 20/24 - loss 0.63668225 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,393 epoch 3 - iter 22/24 - loss 0.63532949 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,504 epoch 3 - iter 24/24 - loss 0.63537422 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,505 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:25,505 EPOCH 3 done: loss 0.6354 - lr 0.0200000\n",
-      "2021-09-08 01:48:25,571 DEV : loss 0.6366625428199768 - score 0.0\n",
-      "2021-09-08 01:48:25,572 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:48:25,574 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:25,695 epoch 4 - iter 2/24 - loss 0.62858814 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,794 epoch 4 - iter 4/24 - loss 0.63038366 - samples/sec: 20.25 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,893 epoch 4 - iter 6/24 - loss 0.63526459 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 01:48:25,996 epoch 4 - iter 8/24 - loss 0.64002132 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,099 epoch 4 - iter 10/24 - loss 0.64029638 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,205 epoch 4 - iter 12/24 - loss 0.64223346 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,307 epoch 4 - iter 14/24 - loss 0.64279070 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,401 epoch 4 - iter 16/24 - loss 0.64008804 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,496 epoch 4 - iter 18/24 - loss 0.64045593 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,595 epoch 4 - iter 20/24 - loss 0.64000372 - samples/sec: 20.38 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,691 epoch 4 - iter 22/24 - loss 0.64088660 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,801 epoch 4 - iter 24/24 - loss 0.64024866 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 01:48:26,803 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:26,803 EPOCH 4 done: loss 0.6402 - lr 0.0200000\n",
-      "2021-09-08 01:48:26,870 DEV : loss 0.6369165182113647 - score 0.0\n",
-      "2021-09-08 01:48:26,871 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:48:26,873 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:26,993 epoch 5 - iter 2/24 - loss 0.64380071 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,088 epoch 5 - iter 4/24 - loss 0.65236369 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,194 epoch 5 - iter 6/24 - loss 0.64390336 - samples/sec: 18.88 - lr: 0.020000\n"
+      "2021-09-21 21:18:54,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,547 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:18:54,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,547 Parameters:\n",
+      "2021-09-21 21:18:54,548  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:18:54,548  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:18:54,548  - patience: \"3\"\n",
+      "2021-09-21 21:18:54,549  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:18:54,549  - max_epochs: \"10\"\n",
+      "2021-09-21 21:18:54,549  - shuffle: \"True\"\n",
+      "2021-09-21 21:18:54,549  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:18:54,550  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:18:54,550 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,550 Model training base path: \"None1\"\n",
+      "2021-09-21 21:18:54,550 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,551 Device: cuda:0\n",
+      "2021-09-21 21:18:54,551 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,551 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:18:54,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,668 epoch 1 - iter 2/24 - loss 0.63137120 - samples/sec: 20.80 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:48:27,304 epoch 5 - iter 8/24 - loss 0.63967665 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,403 epoch 5 - iter 10/24 - loss 0.63926895 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,511 epoch 5 - iter 12/24 - loss 0.64069883 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,615 epoch 5 - iter 14/24 - loss 0.63981889 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,719 epoch 5 - iter 16/24 - loss 0.63869084 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,822 epoch 5 - iter 18/24 - loss 0.63688113 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 01:48:27,921 epoch 5 - iter 20/24 - loss 0.63932985 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 01:48:28,024 epoch 5 - iter 22/24 - loss 0.64079834 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:48:28,120 epoch 5 - iter 24/24 - loss 0.63853999 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:48:28,121 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:28,122 EPOCH 5 done: loss 0.6385 - lr 0.0200000\n",
-      "2021-09-08 01:48:28,198 DEV : loss 0.6365145444869995 - score 0.6667\n",
-      "2021-09-08 01:48:28,199 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:18:54,784 epoch 1 - iter 4/24 - loss 0.64840020 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 21:18:54,890 epoch 1 - iter 6/24 - loss 0.64375981 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:18:54,999 epoch 1 - iter 8/24 - loss 0.64179914 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,100 epoch 1 - iter 10/24 - loss 0.64218359 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,200 epoch 1 - iter 12/24 - loss 0.64447986 - samples/sec: 20.24 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,292 epoch 1 - iter 14/24 - loss 0.64286056 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,379 epoch 1 - iter 16/24 - loss 0.64147463 - samples/sec: 22.99 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,470 epoch 1 - iter 18/24 - loss 0.63987672 - samples/sec: 22.10 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,574 epoch 1 - iter 20/24 - loss 0.64230028 - samples/sec: 19.46 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,683 epoch 1 - iter 22/24 - loss 0.64150285 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,788 epoch 1 - iter 24/24 - loss 0.64156979 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 21:18:55,789 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:55,789 EPOCH 1 done: loss 0.6416 - lr 0.0200000\n",
+      "2021-09-21 21:18:55,941 DEV : loss 0.6365493535995483 - score 0.3333\n",
+      "2021-09-21 21:18:55,941 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:48:32,324 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:32,458 epoch 6 - iter 2/24 - loss 0.65137762 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 01:48:32,557 epoch 6 - iter 4/24 - loss 0.65046319 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 01:48:32,658 epoch 6 - iter 6/24 - loss 0.64513208 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 01:48:32,760 epoch 6 - iter 8/24 - loss 0.64195850 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 01:48:32,860 epoch 6 - iter 10/24 - loss 0.63704401 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:48:32,957 epoch 6 - iter 12/24 - loss 0.63721278 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,061 epoch 6 - iter 14/24 - loss 0.63671096 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,165 epoch 6 - iter 16/24 - loss 0.63646318 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,269 epoch 6 - iter 18/24 - loss 0.63637498 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,382 epoch 6 - iter 20/24 - loss 0.63669842 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,487 epoch 6 - iter 22/24 - loss 0.63620866 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,587 epoch 6 - iter 24/24 - loss 0.63576699 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:33,588 EPOCH 6 done: loss 0.6358 - lr 0.0200000\n",
-      "2021-09-08 01:48:33,668 DEV : loss 0.6365668773651123 - score 0.6667\n",
-      "2021-09-08 01:48:33,670 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:48:33,672 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:33,788 epoch 7 - iter 2/24 - loss 0.62993836 - samples/sec: 20.48 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,899 epoch 7 - iter 4/24 - loss 0.63949575 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:48:33,998 epoch 7 - iter 6/24 - loss 0.63406978 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,110 epoch 7 - iter 8/24 - loss 0.63063399 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,221 epoch 7 - iter 10/24 - loss 0.62559234 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,327 epoch 7 - iter 12/24 - loss 0.62829988 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,426 epoch 7 - iter 14/24 - loss 0.63120105 - samples/sec: 20.27 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,522 epoch 7 - iter 16/24 - loss 0.63269418 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,626 epoch 7 - iter 18/24 - loss 0.63212094 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,727 epoch 7 - iter 20/24 - loss 0.63259215 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,829 epoch 7 - iter 22/24 - loss 0.63340638 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,933 epoch 7 - iter 24/24 - loss 0.63682792 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 01:48:34,934 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:34,934 EPOCH 7 done: loss 0.6368 - lr 0.0200000\n",
-      "2021-09-08 01:48:35,004 DEV : loss 0.6365524530410767 - score 0.6667\n",
-      "2021-09-08 01:48:35,005 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:48:35,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:35,133 epoch 8 - iter 2/24 - loss 0.63105285 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,241 epoch 8 - iter 4/24 - loss 0.62746048 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,339 epoch 8 - iter 6/24 - loss 0.62458971 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,442 epoch 8 - iter 8/24 - loss 0.63220857 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,546 epoch 8 - iter 10/24 - loss 0.63397714 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,656 epoch 8 - iter 12/24 - loss 0.63489956 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,762 epoch 8 - iter 14/24 - loss 0.63415041 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,869 epoch 8 - iter 16/24 - loss 0.63473798 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 01:48:35,973 epoch 8 - iter 18/24 - loss 0.63594077 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,071 epoch 8 - iter 20/24 - loss 0.63581699 - samples/sec: 20.52 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,171 epoch 8 - iter 22/24 - loss 0.63607299 - samples/sec: 20.06 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,271 epoch 8 - iter 24/24 - loss 0.63549651 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,272 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:36,273 EPOCH 8 done: loss 0.6355 - lr 0.0200000\n",
-      "2021-09-08 01:48:36,337 DEV : loss 0.6365155577659607 - score 0.6667\n",
-      "2021-09-08 01:48:36,339 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:48:36,341 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:36,470 epoch 9 - iter 2/24 - loss 0.64797249 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,573 epoch 9 - iter 4/24 - loss 0.64376000 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,676 epoch 9 - iter 6/24 - loss 0.64174790 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,779 epoch 9 - iter 8/24 - loss 0.63495634 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,888 epoch 9 - iter 10/24 - loss 0.63241468 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 01:48:36,991 epoch 9 - iter 12/24 - loss 0.63034687 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:48:37,100 epoch 9 - iter 14/24 - loss 0.63006511 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 01:48:37,203 epoch 9 - iter 16/24 - loss 0.63160257 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:48:37,298 epoch 9 - iter 18/24 - loss 0.63164965 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 01:48:37,398 epoch 9 - iter 20/24 - loss 0.63163667 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 01:48:37,501 epoch 9 - iter 22/24 - loss 0.63389331 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:48:37,608 epoch 9 - iter 24/24 - loss 0.63435773 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 01:48:37,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:37,609 EPOCH 9 done: loss 0.6344 - lr 0.0200000\n",
-      "2021-09-08 01:48:37,690 DEV : loss 0.636720597743988 - score 0.6667\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:48:37,693 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:48:37,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:37,827 epoch 10 - iter 2/24 - loss 0.64038086 - samples/sec: 17.64 - lr: 0.010000\n",
-      "2021-09-08 01:48:37,931 epoch 10 - iter 4/24 - loss 0.64143994 - samples/sec: 19.26 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,044 epoch 10 - iter 6/24 - loss 0.64777511 - samples/sec: 17.81 - lr: 0.010000\n"
+      "2021-09-21 21:19:12,241 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:12,344 epoch 2 - iter 2/24 - loss 0.61874056 - samples/sec: 22.56 - lr: 0.020000\n",
+      "2021-09-21 21:19:12,431 epoch 2 - iter 4/24 - loss 0.62240016 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 21:19:12,520 epoch 2 - iter 6/24 - loss 0.62665250 - samples/sec: 22.52 - lr: 0.020000\n",
+      "2021-09-21 21:19:12,607 epoch 2 - iter 8/24 - loss 0.63011502 - samples/sec: 23.15 - lr: 0.020000\n",
+      "2021-09-21 21:19:12,694 epoch 2 - iter 10/24 - loss 0.63234674 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 21:19:12,781 epoch 2 - iter 12/24 - loss 0.63406909 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 21:19:12,868 epoch 2 - iter 14/24 - loss 0.63352349 - samples/sec: 23.01 - lr: 0.020000\n",
+      "2021-09-21 21:19:12,955 epoch 2 - iter 16/24 - loss 0.63420757 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 21:19:13,042 epoch 2 - iter 18/24 - loss 0.63410994 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 21:19:13,129 epoch 2 - iter 20/24 - loss 0.63392468 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 21:19:13,216 epoch 2 - iter 22/24 - loss 0.63401264 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 21:19:13,305 epoch 2 - iter 24/24 - loss 0.63490280 - samples/sec: 22.65 - lr: 0.020000\n",
+      "2021-09-21 21:19:13,306 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:13,306 EPOCH 2 done: loss 0.6349 - lr 0.0200000\n",
+      "2021-09-21 21:19:13,844 DEV : loss 0.6365208029747009 - score 0.0\n",
+      "2021-09-21 21:19:13,845 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:19:13,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:14,104 epoch 3 - iter 2/24 - loss 0.64816192 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:19:14,294 epoch 3 - iter 4/24 - loss 0.63802136 - samples/sec: 10.58 - lr: 0.020000\n",
+      "2021-09-21 21:19:14,482 epoch 3 - iter 6/24 - loss 0.63903824 - samples/sec: 10.68 - lr: 0.020000\n",
+      "2021-09-21 21:19:14,655 epoch 3 - iter 8/24 - loss 0.63796622 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 21:19:14,845 epoch 3 - iter 10/24 - loss 0.63681442 - samples/sec: 10.56 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,009 epoch 3 - iter 12/24 - loss 0.63603919 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,177 epoch 3 - iter 14/24 - loss 0.63546334 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,380 epoch 3 - iter 16/24 - loss 0.63491471 - samples/sec: 9.90 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,572 epoch 3 - iter 18/24 - loss 0.63739725 - samples/sec: 10.43 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,750 epoch 3 - iter 20/24 - loss 0.63850682 - samples/sec: 11.29 - lr: 0.020000\n",
+      "2021-09-21 21:19:15,951 epoch 3 - iter 22/24 - loss 0.63887778 - samples/sec: 10.01 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,133 epoch 3 - iter 24/24 - loss 0.63884351 - samples/sec: 11.00 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:16,135 EPOCH 3 done: loss 0.6388 - lr 0.0200000\n",
+      "2021-09-21 21:19:16,269 DEV : loss 0.6371505260467529 - score 0.3333\n",
+      "2021-09-21 21:19:16,276 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:19:16,281 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:16,507 epoch 4 - iter 2/24 - loss 0.63953868 - samples/sec: 10.69 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,688 epoch 4 - iter 4/24 - loss 0.64064021 - samples/sec: 11.13 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,846 epoch 4 - iter 6/24 - loss 0.64088038 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 21:19:16,986 epoch 4 - iter 8/24 - loss 0.63984174 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,122 epoch 4 - iter 10/24 - loss 0.63762062 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,271 epoch 4 - iter 12/24 - loss 0.64132332 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,420 epoch 4 - iter 14/24 - loss 0.64273418 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,564 epoch 4 - iter 16/24 - loss 0.64197284 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,704 epoch 4 - iter 18/24 - loss 0.64142493 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 21:19:17,863 epoch 4 - iter 20/24 - loss 0.64040992 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 21:19:18,012 epoch 4 - iter 22/24 - loss 0.64058267 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 21:19:18,163 epoch 4 - iter 24/24 - loss 0.64054515 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 21:19:18,164 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:18,165 EPOCH 4 done: loss 0.6405 - lr 0.0200000\n",
+      "2021-09-21 21:19:18,769 DEV : loss 0.6370668411254883 - score 0.3333\n",
+      "2021-09-21 21:19:18,772 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:19:18,853 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:19,024 epoch 5 - iter 2/24 - loss 0.63011286 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,184 epoch 5 - iter 4/24 - loss 0.62851469 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,344 epoch 5 - iter 6/24 - loss 0.62702005 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,479 epoch 5 - iter 8/24 - loss 0.62698085 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,602 epoch 5 - iter 10/24 - loss 0.62553362 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,715 epoch 5 - iter 12/24 - loss 0.62813135 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,853 epoch 5 - iter 14/24 - loss 0.62804531 - samples/sec: 14.54 - lr: 0.020000\n",
+      "2021-09-21 21:19:19,967 epoch 5 - iter 16/24 - loss 0.62861123 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:19:20,059 epoch 5 - iter 18/24 - loss 0.62847973 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 21:19:20,170 epoch 5 - iter 20/24 - loss 0.62778266 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 21:19:20,338 epoch 5 - iter 22/24 - loss 0.62924108 - samples/sec: 11.93 - lr: 0.020000\n",
+      "2021-09-21 21:19:20,493 epoch 5 - iter 24/24 - loss 0.63032959 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:19:20,494 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:20,495 EPOCH 5 done: loss 0.6303 - lr 0.0200000\n",
+      "2021-09-21 21:19:20,700 DEV : loss 0.6366466879844666 - score 0.6667\n",
+      "2021-09-21 21:19:20,701 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:19:25,925 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:26,149 epoch 6 - iter 2/24 - loss 0.62879854 - samples/sec: 10.74 - lr: 0.020000\n",
+      "2021-09-21 21:19:26,312 epoch 6 - iter 4/24 - loss 0.63129716 - samples/sec: 12.30 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:48:38,155 epoch 10 - iter 8/24 - loss 0.64558606 - samples/sec: 18.15 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,264 epoch 10 - iter 10/24 - loss 0.64070143 - samples/sec: 18.47 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,371 epoch 10 - iter 12/24 - loss 0.64185542 - samples/sec: 18.84 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,475 epoch 10 - iter 14/24 - loss 0.64176881 - samples/sec: 19.18 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,591 epoch 10 - iter 16/24 - loss 0.63775179 - samples/sec: 17.40 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,691 epoch 10 - iter 18/24 - loss 0.63987590 - samples/sec: 20.14 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,799 epoch 10 - iter 20/24 - loss 0.63827216 - samples/sec: 18.55 - lr: 0.010000\n",
-      "2021-09-08 01:48:38,921 epoch 10 - iter 22/24 - loss 0.63601576 - samples/sec: 16.44 - lr: 0.010000\n",
-      "2021-09-08 01:48:39,034 epoch 10 - iter 24/24 - loss 0.63679092 - samples/sec: 17.82 - lr: 0.010000\n",
-      "2021-09-08 01:48:39,035 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:39,036 EPOCH 10 done: loss 0.6368 - lr 0.0100000\n",
-      "2021-09-08 01:48:39,107 DEV : loss 0.6365683078765869 - score 0.3333\n",
-      "2021-09-08 01:48:39,108 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:48:43,084 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:43,085 Testing using best model ...\n",
-      "2021-09-08 01:48:43,086 loading file None1/best-model.pt\n",
+      "2021-09-21 21:19:26,466 epoch 6 - iter 6/24 - loss 0.63433840 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 21:19:26,590 epoch 6 - iter 8/24 - loss 0.63359479 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 21:19:26,749 epoch 6 - iter 10/24 - loss 0.63532398 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 21:19:26,903 epoch 6 - iter 12/24 - loss 0.63387435 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 21:19:27,051 epoch 6 - iter 14/24 - loss 0.63440187 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 21:19:27,177 epoch 6 - iter 16/24 - loss 0.63547523 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 21:19:27,344 epoch 6 - iter 18/24 - loss 0.63328061 - samples/sec: 12.09 - lr: 0.020000\n",
+      "2021-09-21 21:19:27,496 epoch 6 - iter 20/24 - loss 0.63488467 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 21:19:27,658 epoch 6 - iter 22/24 - loss 0.63519633 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 21:19:27,827 epoch 6 - iter 24/24 - loss 0.63492775 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 21:19:27,828 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:27,828 EPOCH 6 done: loss 0.6349 - lr 0.0200000\n",
+      "2021-09-21 21:19:28,035 DEV : loss 0.6365176439285278 - score 0.0\n",
+      "2021-09-21 21:19:28,036 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:19:28,109 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:28,305 epoch 7 - iter 2/24 - loss 0.64288014 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 21:19:28,453 epoch 7 - iter 4/24 - loss 0.64574879 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 21:19:28,601 epoch 7 - iter 6/24 - loss 0.64388494 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 21:19:28,759 epoch 7 - iter 8/24 - loss 0.64322352 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 21:19:28,911 epoch 7 - iter 10/24 - loss 0.64189187 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,061 epoch 7 - iter 12/24 - loss 0.64249457 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,212 epoch 7 - iter 14/24 - loss 0.63996960 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,370 epoch 7 - iter 16/24 - loss 0.64119009 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,528 epoch 7 - iter 18/24 - loss 0.64228933 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,683 epoch 7 - iter 20/24 - loss 0.64172422 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,832 epoch 7 - iter 22/24 - loss 0.64175110 - samples/sec: 13.48 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,977 epoch 7 - iter 24/24 - loss 0.64070685 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 21:19:29,978 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:29,979 EPOCH 7 done: loss 0.6407 - lr 0.0200000\n",
+      "2021-09-21 21:19:30,459 DEV : loss 0.6365577578544617 - score 0.3333\n",
+      "2021-09-21 21:19:30,460 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:19:30,463 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:30,643 epoch 8 - iter 2/24 - loss 0.61892343 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 21:19:30,852 epoch 8 - iter 4/24 - loss 0.63144815 - samples/sec: 9.62 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,038 epoch 8 - iter 6/24 - loss 0.63278179 - samples/sec: 10.79 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,227 epoch 8 - iter 8/24 - loss 0.63858901 - samples/sec: 10.61 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,436 epoch 8 - iter 10/24 - loss 0.63730382 - samples/sec: 9.62 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,597 epoch 8 - iter 12/24 - loss 0.63572033 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,762 epoch 8 - iter 14/24 - loss 0.63681321 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 21:19:31,937 epoch 8 - iter 16/24 - loss 0.63877933 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,099 epoch 8 - iter 18/24 - loss 0.63914817 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,228 epoch 8 - iter 20/24 - loss 0.63963681 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,401 epoch 8 - iter 22/24 - loss 0.64068984 - samples/sec: 11.61 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,612 epoch 8 - iter 24/24 - loss 0.64054576 - samples/sec: 9.53 - lr: 0.020000\n",
+      "2021-09-21 21:19:32,613 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:32,613 EPOCH 8 done: loss 0.6405 - lr 0.0200000\n",
+      "2021-09-21 21:19:32,765 DEV : loss 0.636539876461029 - score 0.3333\n",
+      "2021-09-21 21:19:32,766 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:19:32,768 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:32,977 epoch 9 - iter 2/24 - loss 0.62045646 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 21:19:33,173 epoch 9 - iter 4/24 - loss 0.63241588 - samples/sec: 10.28 - lr: 0.020000\n",
+      "2021-09-21 21:19:33,344 epoch 9 - iter 6/24 - loss 0.63323870 - samples/sec: 11.76 - lr: 0.020000\n",
+      "2021-09-21 21:19:33,515 epoch 9 - iter 8/24 - loss 0.63553946 - samples/sec: 11.75 - lr: 0.020000\n",
+      "2021-09-21 21:19:33,700 epoch 9 - iter 10/24 - loss 0.63433955 - samples/sec: 10.81 - lr: 0.020000\n",
+      "2021-09-21 21:19:33,883 epoch 9 - iter 12/24 - loss 0.63325813 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 21:19:34,038 epoch 9 - iter 14/24 - loss 0.63463652 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:19:34,192 epoch 9 - iter 16/24 - loss 0.63399870 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 21:19:34,343 epoch 9 - iter 18/24 - loss 0.63301913 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:19:34,495 epoch 9 - iter 20/24 - loss 0.63357032 - samples/sec: 13.16 - lr: 0.020000\n",
+      "2021-09-21 21:19:34,649 epoch 9 - iter 22/24 - loss 0.63189182 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:19:34,800 epoch 9 - iter 24/24 - loss 0.63150169 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 21:19:34,802 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:34,802 EPOCH 9 done: loss 0.6315 - lr 0.0200000\n",
+      "2021-09-21 21:19:34,985 DEV : loss 0.6368974447250366 - score 0.6667\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:19:34,989 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:19:35,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:35,225 epoch 10 - iter 2/24 - loss 0.63784039 - samples/sec: 14.66 - lr: 0.010000\n",
+      "2021-09-21 21:19:35,360 epoch 10 - iter 4/24 - loss 0.62525776 - samples/sec: 14.81 - lr: 0.010000\n",
+      "2021-09-21 21:19:35,526 epoch 10 - iter 6/24 - loss 0.63470372 - samples/sec: 12.10 - lr: 0.010000\n",
+      "2021-09-21 21:19:35,670 epoch 10 - iter 8/24 - loss 0.63694066 - samples/sec: 14.00 - lr: 0.010000\n",
+      "2021-09-21 21:19:35,819 epoch 10 - iter 10/24 - loss 0.63863840 - samples/sec: 13.43 - lr: 0.010000\n",
+      "2021-09-21 21:19:35,964 epoch 10 - iter 12/24 - loss 0.64004052 - samples/sec: 13.89 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,112 epoch 10 - iter 14/24 - loss 0.63898548 - samples/sec: 13.57 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,243 epoch 10 - iter 16/24 - loss 0.64000172 - samples/sec: 15.31 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,393 epoch 10 - iter 18/24 - loss 0.63925847 - samples/sec: 13.34 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,565 epoch 10 - iter 20/24 - loss 0.63865269 - samples/sec: 11.70 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,695 epoch 10 - iter 22/24 - loss 0.63998236 - samples/sec: 15.45 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,872 epoch 10 - iter 24/24 - loss 0.64136077 - samples/sec: 11.30 - lr: 0.010000\n",
+      "2021-09-21 21:19:36,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:36,874 EPOCH 10 done: loss 0.6414 - lr 0.0100000\n",
+      "2021-09-21 21:19:37,623 DEV : loss 0.6365439295768738 - score 0.3333\n",
+      "2021-09-21 21:19:37,624 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:19:43,721 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:43,722 Testing using best model ...\n",
+      "2021-09-21 21:19:43,723 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:48:47,971 \t0.0\n",
-      "2021-09-08 01:48:47,972 \n",
+      "2021-09-21 21:19:51,196 \t0.0\n",
+      "2021-09-21 21:19:51,197 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -7101,35 +7126,41 @@
       "By class:\n",
       "                                                          precision    recall  f1-score   support\n",
       "\n",
-      "   having the quality of something harmful or unpleasant     0.0000    0.0000    0.0000         1\n",
-      "one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         2\n",
-      "                             involving advantage or good     0.0000    0.0000    0.0000         0\n",
+      "   having the quality of something harmful or unpleasant     0.0000    0.0000    0.0000         0\n",
+      "one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         1\n",
+      "                             involving advantage or good     0.0000    0.0000    0.0000         2\n",
       "\n",
       "                                               micro avg     0.0000    0.0000    0.0000         3\n",
       "                                               macro avg     0.0000    0.0000    0.0000         3\n",
       "                                            weighted avg     0.0000    0.0000    0.0000         3\n",
       "                                             samples avg     0.0000    0.0000    0.0000         3\n",
-      "\n",
-      "2021-09-08 01:48:47,972 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:10,046 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:19:51,197 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:16,655 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:49:14,062 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:20:20,855 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 37350.33it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 24955.09it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:14,065 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
-      "2021-09-08 01:49:14,074 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,075 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:20:20,858 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 21:20:20,868 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:20,870 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7434,34 +7465,34 @@
       "  )\n",
       "  (beta): 1.0\n",
       "  (weights): None\n",
-      "  (weight_tensor) None\n"
+      "  (weight_tensor) None\n",
+      ")\"\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      ")\"\n",
-      "2021-09-08 01:49:14,076 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,076 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:49:14,077 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,077 Parameters:\n",
-      "2021-09-08 01:49:14,077  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:49:14,077  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:49:14,078  - patience: \"3\"\n",
-      "2021-09-08 01:49:14,078  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:49:14,078  - max_epochs: \"10\"\n",
-      "2021-09-08 01:49:14,078  - shuffle: \"True\"\n",
-      "2021-09-08 01:49:14,079  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:49:14,079  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:49:14,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,080 Model training base path: \"None1\"\n",
-      "2021-09-08 01:49:14,080 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,080 Device: cuda:1\n",
-      "2021-09-08 01:49:14,080 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,081 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:49:14,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,217 epoch 1 - iter 2/24 - loss 0.62427187 - samples/sec: 19.90 - lr: 0.020000\n"
+      "2021-09-21 21:20:20,870 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:20,871 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:20:20,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:20,871 Parameters:\n",
+      "2021-09-21 21:20:20,871  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:20:20,872  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:20:20,872  - patience: \"3\"\n",
+      "2021-09-21 21:20:20,872  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:20:20,873  - max_epochs: \"10\"\n",
+      "2021-09-21 21:20:20,873  - shuffle: \"True\"\n",
+      "2021-09-21 21:20:20,873  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:20:20,873  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:20:20,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:20,874 Model training base path: \"None1\"\n",
+      "2021-09-21 21:20:20,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:20,874 Device: cuda:0\n",
+      "2021-09-21 21:20:20,875 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:20,875 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:20:20,882 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:21,006 epoch 1 - iter 2/24 - loss 0.62440339 - samples/sec: 19.78 - lr: 0.020000\n"
      ]
     },
     {
@@ -7475,190 +7506,196 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:14,321 epoch 1 - iter 4/24 - loss 0.62772748 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:49:14,427 epoch 1 - iter 6/24 - loss 0.62901084 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:49:14,535 epoch 1 - iter 8/24 - loss 0.63205820 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:49:14,631 epoch 1 - iter 10/24 - loss 0.63494267 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 01:49:14,734 epoch 1 - iter 12/24 - loss 0.63433398 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:49:14,843 epoch 1 - iter 14/24 - loss 0.63474404 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 01:49:14,947 epoch 1 - iter 16/24 - loss 0.63902886 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:49:15,054 epoch 1 - iter 18/24 - loss 0.63767619 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 01:49:15,161 epoch 1 - iter 20/24 - loss 0.63813992 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 01:49:15,258 epoch 1 - iter 22/24 - loss 0.63797660 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 01:49:15,346 epoch 1 - iter 24/24 - loss 0.63711096 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 01:49:15,347 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:15,347 EPOCH 1 done: loss 0.6371 - lr 0.0200000\n",
-      "2021-09-08 01:49:15,397 DEV : loss 0.6365179419517517 - score 0.6667\n",
-      "2021-09-08 01:49:15,397 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:20:21,128 epoch 1 - iter 4/24 - loss 0.62926872 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 21:20:21,235 epoch 1 - iter 6/24 - loss 0.63052800 - samples/sec: 18.78 - lr: 0.020000\n",
+      "2021-09-21 21:20:21,363 epoch 1 - iter 8/24 - loss 0.62762851 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:20:21,495 epoch 1 - iter 10/24 - loss 0.62834005 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:20:21,660 epoch 1 - iter 12/24 - loss 0.62714253 - samples/sec: 12.16 - lr: 0.020000\n",
+      "2021-09-21 21:20:21,809 epoch 1 - iter 14/24 - loss 0.62843049 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 21:20:21,951 epoch 1 - iter 16/24 - loss 0.62669186 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,091 epoch 1 - iter 18/24 - loss 0.62743191 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,249 epoch 1 - iter 20/24 - loss 0.62840675 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,413 epoch 1 - iter 22/24 - loss 0.62956575 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,571 epoch 1 - iter 24/24 - loss 0.63060857 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,572 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:22,573 EPOCH 1 done: loss 0.6306 - lr 0.0200000\n",
+      "2021-09-21 21:20:22,685 DEV : loss 0.6365651488304138 - score 0.6667\n",
+      "2021-09-21 21:20:22,686 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:49:19,902 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:20,006 epoch 2 - iter 2/24 - loss 0.62318674 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,097 epoch 2 - iter 4/24 - loss 0.63104016 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,298 epoch 2 - iter 6/24 - loss 0.63988741 - samples/sec: 9.99 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,387 epoch 2 - iter 8/24 - loss 0.64107989 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,475 epoch 2 - iter 10/24 - loss 0.64197712 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,563 epoch 2 - iter 12/24 - loss 0.64185607 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,651 epoch 2 - iter 14/24 - loss 0.64104036 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,740 epoch 2 - iter 16/24 - loss 0.64064829 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,840 epoch 2 - iter 18/24 - loss 0.64096532 - samples/sec: 20.21 - lr: 0.020000\n",
-      "2021-09-08 01:49:20,930 epoch 2 - iter 20/24 - loss 0.63977432 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 01:49:21,023 epoch 2 - iter 22/24 - loss 0.63926615 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 01:49:21,114 epoch 2 - iter 24/24 - loss 0.63919565 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:49:21,116 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,116 EPOCH 2 done: loss 0.6392 - lr 0.0200000\n",
-      "2021-09-08 01:49:21,336 DEV : loss 0.6365299224853516 - score 0.6667\n",
-      "2021-09-08 01:49:21,337 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:49:21,502 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:21,628 epoch 3 - iter 2/24 - loss 0.63028955 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 01:49:21,741 epoch 3 - iter 4/24 - loss 0.63282967 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 01:49:21,856 epoch 3 - iter 6/24 - loss 0.63520807 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 01:49:21,946 epoch 3 - iter 8/24 - loss 0.63553347 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,038 epoch 3 - iter 10/24 - loss 0.62965821 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,151 epoch 3 - iter 12/24 - loss 0.63132249 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,262 epoch 3 - iter 14/24 - loss 0.63472784 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,373 epoch 3 - iter 16/24 - loss 0.63172944 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,489 epoch 3 - iter 18/24 - loss 0.63253079 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,602 epoch 3 - iter 20/24 - loss 0.63273108 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,713 epoch 3 - iter 22/24 - loss 0.63446476 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,827 epoch 3 - iter 24/24 - loss 0.63433194 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 01:49:22,829 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:22,829 EPOCH 3 done: loss 0.6343 - lr 0.0200000\n",
-      "2021-09-08 01:49:23,081 DEV : loss 0.63652503490448 - score 0.6667\n",
-      "2021-09-08 01:49:23,082 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:49:23,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:23,229 epoch 4 - iter 2/24 - loss 0.63629508 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,346 epoch 4 - iter 4/24 - loss 0.64221501 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,454 epoch 4 - iter 6/24 - loss 0.64099077 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,566 epoch 4 - iter 8/24 - loss 0.64170578 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,679 epoch 4 - iter 10/24 - loss 0.64177167 - samples/sec: 17.72 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,799 epoch 4 - iter 12/24 - loss 0.64121547 - samples/sec: 16.73 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,919 epoch 4 - iter 14/24 - loss 0.64083347 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,036 epoch 4 - iter 16/24 - loss 0.64208926 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,145 epoch 4 - iter 18/24 - loss 0.64111925 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,264 epoch 4 - iter 20/24 - loss 0.64152642 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,381 epoch 4 - iter 22/24 - loss 0.64325373 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,496 epoch 4 - iter 24/24 - loss 0.64253672 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,498 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:24,498 EPOCH 4 done: loss 0.6425 - lr 0.0200000\n",
-      "2021-09-08 01:49:24,568 DEV : loss 0.6366003155708313 - score 0.6667\n",
-      "2021-09-08 01:49:24,569 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:49:24,571 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:24,691 epoch 5 - iter 2/24 - loss 0.64332700 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,801 epoch 5 - iter 4/24 - loss 0.63742004 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,915 epoch 5 - iter 6/24 - loss 0.63282847 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,029 epoch 5 - iter 8/24 - loss 0.63305140 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,137 epoch 5 - iter 10/24 - loss 0.63439951 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,252 epoch 5 - iter 12/24 - loss 0.63618822 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,367 epoch 5 - iter 14/24 - loss 0.63491929 - samples/sec: 17.38 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,482 epoch 5 - iter 16/24 - loss 0.63584058 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,595 epoch 5 - iter 18/24 - loss 0.63698689 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,710 epoch 5 - iter 20/24 - loss 0.63664522 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,818 epoch 5 - iter 22/24 - loss 0.63830608 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,924 epoch 5 - iter 24/24 - loss 0.63788206 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,925 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:25,925 EPOCH 5 done: loss 0.6379 - lr 0.0200000\n",
-      "2021-09-08 01:49:25,997 DEV : loss 0.6365160942077637 - score 0.6667\n",
-      "2021-09-08 01:49:25,999 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:20:26,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:26,559 epoch 2 - iter 2/24 - loss 0.62072566 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 21:20:26,669 epoch 2 - iter 4/24 - loss 0.62201780 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 21:20:26,787 epoch 2 - iter 6/24 - loss 0.63294272 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 21:20:26,897 epoch 2 - iter 8/24 - loss 0.63426653 - samples/sec: 18.14 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,003 epoch 2 - iter 10/24 - loss 0.63731128 - samples/sec: 18.98 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,108 epoch 2 - iter 12/24 - loss 0.63951221 - samples/sec: 19.18 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,221 epoch 2 - iter 14/24 - loss 0.63770412 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,336 epoch 2 - iter 16/24 - loss 0.63763047 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,440 epoch 2 - iter 18/24 - loss 0.63731771 - samples/sec: 19.34 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,551 epoch 2 - iter 20/24 - loss 0.63745819 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,664 epoch 2 - iter 22/24 - loss 0.63574178 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,785 epoch 2 - iter 24/24 - loss 0.63717716 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 21:20:27,786 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:27,787 EPOCH 2 done: loss 0.6372 - lr 0.0200000\n",
+      "2021-09-21 21:20:27,967 DEV : loss 0.6365141868591309 - score 0.6667\n",
+      "2021-09-21 21:20:27,969 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:49:35,243 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:35,348 epoch 6 - iter 2/24 - loss 0.63546064 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 01:49:35,437 epoch 6 - iter 4/24 - loss 0.63226102 - samples/sec: 22.70 - lr: 0.020000\n"
+      "2021-09-21 21:20:32,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:32,205 epoch 3 - iter 2/24 - loss 0.63136345 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 21:20:32,351 epoch 3 - iter 4/24 - loss 0.63756211 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:20:32,489 epoch 3 - iter 6/24 - loss 0.63786814 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 21:20:32,628 epoch 3 - iter 8/24 - loss 0.63812041 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 21:20:32,778 epoch 3 - iter 10/24 - loss 0.63726504 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:20:32,901 epoch 3 - iter 12/24 - loss 0.63703852 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,040 epoch 3 - iter 14/24 - loss 0.63729153 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,160 epoch 3 - iter 16/24 - loss 0.63727751 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,263 epoch 3 - iter 18/24 - loss 0.63684670 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,386 epoch 3 - iter 20/24 - loss 0.63635007 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,489 epoch 3 - iter 22/24 - loss 0.63636683 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,590 epoch 3 - iter 24/24 - loss 0.63447297 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:33,591 EPOCH 3 done: loss 0.6345 - lr 0.0200000\n",
+      "2021-09-21 21:20:33,662 DEV : loss 0.6365441679954529 - score 0.0\n",
+      "2021-09-21 21:20:33,663 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:20:33,666 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:33,778 epoch 4 - iter 2/24 - loss 0.61532137 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,874 epoch 4 - iter 4/24 - loss 0.64398880 - samples/sec: 21.10 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,973 epoch 4 - iter 6/24 - loss 0.64576514 - samples/sec: 20.23 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,074 epoch 4 - iter 8/24 - loss 0.64626368 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,178 epoch 4 - iter 10/24 - loss 0.64692004 - samples/sec: 19.40 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,283 epoch 4 - iter 12/24 - loss 0.64473397 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,379 epoch 4 - iter 14/24 - loss 0.64097501 - samples/sec: 20.81 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,475 epoch 4 - iter 16/24 - loss 0.64027392 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,574 epoch 4 - iter 18/24 - loss 0.63980383 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,674 epoch 4 - iter 20/24 - loss 0.63882343 - samples/sec: 20.08 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,771 epoch 4 - iter 22/24 - loss 0.63700723 - samples/sec: 20.75 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,871 epoch 4 - iter 24/24 - loss 0.63743088 - samples/sec: 20.10 - lr: 0.020000\n",
+      "2021-09-21 21:20:34,872 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:34,873 EPOCH 4 done: loss 0.6374 - lr 0.0200000\n",
+      "2021-09-21 21:20:35,041 DEV : loss 0.6365270614624023 - score 0.3333\n",
+      "2021-09-21 21:20:35,042 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:20:35,118 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:35,242 epoch 5 - iter 2/24 - loss 0.63088942 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 21:20:35,342 epoch 5 - iter 4/24 - loss 0.63261864 - samples/sec: 20.04 - lr: 0.020000\n",
+      "2021-09-21 21:20:35,447 epoch 5 - iter 6/24 - loss 0.63438002 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 21:20:35,557 epoch 5 - iter 8/24 - loss 0.63085929 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 21:20:35,659 epoch 5 - iter 10/24 - loss 0.63812430 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 21:20:35,774 epoch 5 - iter 12/24 - loss 0.63697104 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 21:20:35,875 epoch 5 - iter 14/24 - loss 0.63676123 - samples/sec: 19.83 - lr: 0.020000\n",
+      "2021-09-21 21:20:35,978 epoch 5 - iter 16/24 - loss 0.63834490 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 21:20:36,078 epoch 5 - iter 18/24 - loss 0.63903427 - samples/sec: 20.01 - lr: 0.020000\n",
+      "2021-09-21 21:20:36,176 epoch 5 - iter 20/24 - loss 0.63847415 - samples/sec: 20.58 - lr: 0.020000\n",
+      "2021-09-21 21:20:36,273 epoch 5 - iter 22/24 - loss 0.63677479 - samples/sec: 20.69 - lr: 0.020000\n",
+      "2021-09-21 21:20:36,383 epoch 5 - iter 24/24 - loss 0.63794939 - samples/sec: 18.35 - lr: 0.020000\n",
+      "2021-09-21 21:20:36,384 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:36,384 EPOCH 5 done: loss 0.6379 - lr 0.0200000\n",
+      "2021-09-21 21:20:36,999 DEV : loss 0.6365189552307129 - score 0.0\n",
+      "2021-09-21 21:20:37,000 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:20:37,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:37,212 epoch 6 - iter 2/24 - loss 0.62500131 - samples/sec: 10.87 - lr: 0.020000\n",
+      "2021-09-21 21:20:37,405 epoch 6 - iter 4/24 - loss 0.62897566 - samples/sec: 10.44 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:35,524 epoch 6 - iter 6/24 - loss 0.63549841 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:49:35,613 epoch 6 - iter 8/24 - loss 0.63430762 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:49:35,703 epoch 6 - iter 10/24 - loss 0.63552951 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 01:49:35,791 epoch 6 - iter 12/24 - loss 0.63748009 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 01:49:35,879 epoch 6 - iter 14/24 - loss 0.63448590 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:49:35,967 epoch 6 - iter 16/24 - loss 0.63233199 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,055 epoch 6 - iter 18/24 - loss 0.63187463 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,143 epoch 6 - iter 20/24 - loss 0.63205808 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,231 epoch 6 - iter 22/24 - loss 0.63206328 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,320 epoch 6 - iter 24/24 - loss 0.63318269 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,321 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:36,321 EPOCH 6 done: loss 0.6332 - lr 0.0200000\n",
-      "2021-09-08 01:49:40,289 DEV : loss 0.6365141868591309 - score 0.6667\n",
-      "2021-09-08 01:49:40,290 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:49:44,365 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:44,476 epoch 7 - iter 2/24 - loss 0.64245617 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 01:49:44,574 epoch 7 - iter 4/24 - loss 0.64003824 - samples/sec: 20.57 - lr: 0.020000\n",
-      "2021-09-08 01:49:44,672 epoch 7 - iter 6/24 - loss 0.63938994 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 01:49:44,764 epoch 7 - iter 8/24 - loss 0.63484453 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:49:44,859 epoch 7 - iter 10/24 - loss 0.63894423 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:49:44,957 epoch 7 - iter 12/24 - loss 0.63786403 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,053 epoch 7 - iter 14/24 - loss 0.63791828 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,142 epoch 7 - iter 16/24 - loss 0.63674029 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,231 epoch 7 - iter 18/24 - loss 0.63664348 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,319 epoch 7 - iter 20/24 - loss 0.63656116 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,408 epoch 7 - iter 22/24 - loss 0.63569990 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,496 epoch 7 - iter 24/24 - loss 0.63665325 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:45,497 EPOCH 7 done: loss 0.6367 - lr 0.0200000\n",
-      "2021-09-08 01:49:45,666 DEV : loss 0.6366040706634521 - score 0.6667\n",
-      "2021-09-08 01:49:45,667 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:49:45,763 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:45,865 epoch 8 - iter 2/24 - loss 0.64680868 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 01:49:45,955 epoch 8 - iter 4/24 - loss 0.63249013 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,044 epoch 8 - iter 6/24 - loss 0.63532150 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,133 epoch 8 - iter 8/24 - loss 0.63217894 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,221 epoch 8 - iter 10/24 - loss 0.63329172 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,310 epoch 8 - iter 12/24 - loss 0.63820647 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,398 epoch 8 - iter 14/24 - loss 0.63617476 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,486 epoch 8 - iter 16/24 - loss 0.63568832 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,575 epoch 8 - iter 18/24 - loss 0.63441581 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,662 epoch 8 - iter 20/24 - loss 0.63776464 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,753 epoch 8 - iter 22/24 - loss 0.63808484 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,842 epoch 8 - iter 24/24 - loss 0.63829056 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 01:49:46,843 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:46,843 EPOCH 8 done: loss 0.6383 - lr 0.0200000\n",
-      "2021-09-08 01:49:47,013 DEV : loss 0.6365408897399902 - score 0.6667\n",
-      "2021-09-08 01:49:47,014 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:49:47,114 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:47,215 epoch 9 - iter 2/24 - loss 0.64538512 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,304 epoch 9 - iter 4/24 - loss 0.64744408 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,392 epoch 9 - iter 6/24 - loss 0.64995415 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,480 epoch 9 - iter 8/24 - loss 0.64720774 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,568 epoch 9 - iter 10/24 - loss 0.64770821 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,660 epoch 9 - iter 12/24 - loss 0.64399270 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,748 epoch 9 - iter 14/24 - loss 0.63963704 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,837 epoch 9 - iter 16/24 - loss 0.63715246 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:49:47,926 epoch 9 - iter 18/24 - loss 0.63891164 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,014 epoch 9 - iter 20/24 - loss 0.63844762 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,103 epoch 9 - iter 22/24 - loss 0.63893997 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,191 epoch 9 - iter 24/24 - loss 0.63856668 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,192 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:48,192 EPOCH 9 done: loss 0.6386 - lr 0.0200000\n",
-      "2021-09-08 01:49:48,387 DEV : loss 0.6365840435028076 - score 0.3333\n",
-      "2021-09-08 01:49:48,387 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:49:48,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:48,573 epoch 10 - iter 2/24 - loss 0.63033670 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,661 epoch 10 - iter 4/24 - loss 0.63406865 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,750 epoch 10 - iter 6/24 - loss 0.63290585 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,838 epoch 10 - iter 8/24 - loss 0.63139293 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:49:48,929 epoch 10 - iter 10/24 - loss 0.63069396 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,019 epoch 10 - iter 12/24 - loss 0.62924241 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,107 epoch 10 - iter 14/24 - loss 0.62855096 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,196 epoch 10 - iter 16/24 - loss 0.63096882 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,288 epoch 10 - iter 18/24 - loss 0.63109555 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,376 epoch 10 - iter 20/24 - loss 0.63292887 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,464 epoch 10 - iter 22/24 - loss 0.63360756 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,553 epoch 10 - iter 24/24 - loss 0.63414464 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 01:49:49,554 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:49,554 EPOCH 10 done: loss 0.6341 - lr 0.0200000\n",
-      "2021-09-08 01:49:49,714 DEV : loss 0.636612057685852 - score 0.6667\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:49:49,715 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:49:55,512 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:55,513 Testing using best model ...\n",
-      "2021-09-08 01:49:55,514 loading file None1/best-model.pt\n",
+      "2021-09-21 21:20:37,595 epoch 6 - iter 6/24 - loss 0.62693226 - samples/sec: 10.54 - lr: 0.020000\n",
+      "2021-09-21 21:20:37,757 epoch 6 - iter 8/24 - loss 0.62880711 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:20:37,933 epoch 6 - iter 10/24 - loss 0.63005853 - samples/sec: 11.42 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,092 epoch 6 - iter 12/24 - loss 0.63140431 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,276 epoch 6 - iter 14/24 - loss 0.63260605 - samples/sec: 10.88 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,448 epoch 6 - iter 16/24 - loss 0.63346621 - samples/sec: 11.69 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,636 epoch 6 - iter 18/24 - loss 0.63381737 - samples/sec: 10.65 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,831 epoch 6 - iter 20/24 - loss 0.63322621 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:20:39,021 epoch 6 - iter 22/24 - loss 0.63496942 - samples/sec: 10.57 - lr: 0.020000\n",
+      "2021-09-21 21:20:39,187 epoch 6 - iter 24/24 - loss 0.63560905 - samples/sec: 12.08 - lr: 0.020000\n",
+      "2021-09-21 21:20:39,188 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:39,188 EPOCH 6 done: loss 0.6356 - lr 0.0200000\n",
+      "2021-09-21 21:20:39,292 DEV : loss 0.6366633772850037 - score 0.0\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:20:39,292 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:20:39,294 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:39,491 epoch 7 - iter 2/24 - loss 0.63181180 - samples/sec: 12.13 - lr: 0.010000\n",
+      "2021-09-21 21:20:39,628 epoch 7 - iter 4/24 - loss 0.62666945 - samples/sec: 14.67 - lr: 0.010000\n",
+      "2021-09-21 21:20:39,780 epoch 7 - iter 6/24 - loss 0.63195915 - samples/sec: 13.18 - lr: 0.010000\n",
+      "2021-09-21 21:20:39,917 epoch 7 - iter 8/24 - loss 0.63491411 - samples/sec: 14.69 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,037 epoch 7 - iter 10/24 - loss 0.63219480 - samples/sec: 16.73 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,175 epoch 7 - iter 12/24 - loss 0.63076485 - samples/sec: 14.60 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,309 epoch 7 - iter 14/24 - loss 0.63115402 - samples/sec: 14.93 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,436 epoch 7 - iter 16/24 - loss 0.63133842 - samples/sec: 15.85 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,560 epoch 7 - iter 18/24 - loss 0.63312660 - samples/sec: 16.26 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,697 epoch 7 - iter 20/24 - loss 0.63338221 - samples/sec: 14.65 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,828 epoch 7 - iter 22/24 - loss 0.63499589 - samples/sec: 15.27 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,989 epoch 7 - iter 24/24 - loss 0.63636962 - samples/sec: 12.47 - lr: 0.010000\n",
+      "2021-09-21 21:20:40,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:40,991 EPOCH 7 done: loss 0.6364 - lr 0.0100000\n",
+      "2021-09-21 21:20:41,169 DEV : loss 0.6365141868591309 - score 0.3333\n",
+      "2021-09-21 21:20:41,170 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:20:41,235 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:41,389 epoch 8 - iter 2/24 - loss 0.62856776 - samples/sec: 17.07 - lr: 0.010000\n",
+      "2021-09-21 21:20:41,497 epoch 8 - iter 4/24 - loss 0.63209671 - samples/sec: 18.68 - lr: 0.010000\n",
+      "2021-09-21 21:20:41,594 epoch 8 - iter 6/24 - loss 0.63603222 - samples/sec: 20.62 - lr: 0.010000\n",
+      "2021-09-21 21:20:41,690 epoch 8 - iter 8/24 - loss 0.63618043 - samples/sec: 21.01 - lr: 0.010000\n",
+      "2021-09-21 21:20:41,790 epoch 8 - iter 10/24 - loss 0.63440061 - samples/sec: 20.20 - lr: 0.010000\n",
+      "2021-09-21 21:20:41,888 epoch 8 - iter 12/24 - loss 0.63314711 - samples/sec: 20.50 - lr: 0.010000\n",
+      "2021-09-21 21:20:41,989 epoch 8 - iter 14/24 - loss 0.63546340 - samples/sec: 19.84 - lr: 0.010000\n",
+      "2021-09-21 21:20:42,091 epoch 8 - iter 16/24 - loss 0.63844399 - samples/sec: 19.72 - lr: 0.010000\n",
+      "2021-09-21 21:20:42,191 epoch 8 - iter 18/24 - loss 0.63702721 - samples/sec: 20.24 - lr: 0.010000\n",
+      "2021-09-21 21:20:42,295 epoch 8 - iter 20/24 - loss 0.63674630 - samples/sec: 19.21 - lr: 0.010000\n",
+      "2021-09-21 21:20:42,396 epoch 8 - iter 22/24 - loss 0.63733057 - samples/sec: 19.92 - lr: 0.010000\n",
+      "2021-09-21 21:20:42,497 epoch 8 - iter 24/24 - loss 0.63735697 - samples/sec: 19.96 - lr: 0.010000\n",
+      "2021-09-21 21:20:42,498 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:42,499 EPOCH 8 done: loss 0.6374 - lr 0.0100000\n",
+      "2021-09-21 21:20:42,665 DEV : loss 0.63652503490448 - score 0.0\n",
+      "2021-09-21 21:20:42,666 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:20:42,749 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:42,867 epoch 9 - iter 2/24 - loss 0.64136371 - samples/sec: 19.58 - lr: 0.010000\n",
+      "2021-09-21 21:20:42,961 epoch 9 - iter 4/24 - loss 0.63682269 - samples/sec: 21.39 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,060 epoch 9 - iter 6/24 - loss 0.63402132 - samples/sec: 20.33 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,161 epoch 9 - iter 8/24 - loss 0.63418584 - samples/sec: 19.90 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,265 epoch 9 - iter 10/24 - loss 0.63531365 - samples/sec: 19.46 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,364 epoch 9 - iter 12/24 - loss 0.63470113 - samples/sec: 20.29 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,462 epoch 9 - iter 14/24 - loss 0.63434899 - samples/sec: 20.46 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,561 epoch 9 - iter 16/24 - loss 0.63436456 - samples/sec: 20.46 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,654 epoch 9 - iter 18/24 - loss 0.63281087 - samples/sec: 21.59 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,762 epoch 9 - iter 20/24 - loss 0.63459406 - samples/sec: 18.58 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,859 epoch 9 - iter 22/24 - loss 0.63504577 - samples/sec: 20.75 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,972 epoch 9 - iter 24/24 - loss 0.63527121 - samples/sec: 17.90 - lr: 0.010000\n",
+      "2021-09-21 21:20:43,973 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:43,973 EPOCH 9 done: loss 0.6353 - lr 0.0100000\n",
+      "2021-09-21 21:20:44,645 DEV : loss 0.6365160942077637 - score 0.0\n",
+      "2021-09-21 21:20:44,646 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:20:44,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:44,874 epoch 10 - iter 2/24 - loss 0.63254094 - samples/sec: 15.21 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,010 epoch 10 - iter 4/24 - loss 0.62648678 - samples/sec: 14.81 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,124 epoch 10 - iter 6/24 - loss 0.62755618 - samples/sec: 17.66 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,251 epoch 10 - iter 8/24 - loss 0.63017598 - samples/sec: 15.80 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,384 epoch 10 - iter 10/24 - loss 0.63290786 - samples/sec: 15.07 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,513 epoch 10 - iter 12/24 - loss 0.63838164 - samples/sec: 15.61 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,627 epoch 10 - iter 14/24 - loss 0.63746810 - samples/sec: 17.64 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,744 epoch 10 - iter 16/24 - loss 0.63708663 - samples/sec: 17.12 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,860 epoch 10 - iter 18/24 - loss 0.63758844 - samples/sec: 17.35 - lr: 0.010000\n",
+      "2021-09-21 21:20:45,975 epoch 10 - iter 20/24 - loss 0.63852711 - samples/sec: 17.35 - lr: 0.010000\n",
+      "2021-09-21 21:20:46,101 epoch 10 - iter 22/24 - loss 0.63754459 - samples/sec: 16.01 - lr: 0.010000\n",
+      "2021-09-21 21:20:46,279 epoch 10 - iter 24/24 - loss 0.63732708 - samples/sec: 11.25 - lr: 0.010000\n",
+      "2021-09-21 21:20:46,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:46,281 EPOCH 10 done: loss 0.6373 - lr 0.0100000\n",
+      "2021-09-21 21:20:46,414 DEV : loss 0.6366074085235596 - score 0.0\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:20:46,415 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:20:50,559 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:50,560 Testing using best model ...\n",
+      "2021-09-21 21:20:50,562 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:50:00,761 \t0.3333\n",
-      "2021-09-08 01:50:00,762 \n",
+      "2021-09-21 21:21:00,159 \t0.3333\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:21:00,159 \n",
       "Results:\n",
       "- F-score (micro) 0.3333\n",
       "- F-score (macro) 0.2222\n",
@@ -7675,33 +7712,40 @@
       "                                               macro avg     0.1667    0.3333    0.2222         3\n",
       "                                            weighted avg     0.1667    0.3333    0.2222         3\n",
       "                                             samples avg     0.3333    0.3333    0.3333         3\n",
-      "\n"
+      "\n",
+      "2021-09-21 21:21:00,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:30,620 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:21:35,619 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 27/27 [00:00<00:00, 35803.42it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:50:00,762 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:17,923 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:50:21,792 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:21:35,622 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 36320.14it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:50:21,795 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
-      "2021-09-08 01:50:21,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:21,899 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:21:36,424 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:36,426 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8014,280 +8058,253 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:50:21,899 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:21,900 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:50:21,900 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:21,900 Parameters:\n",
-      "2021-09-08 01:50:21,901  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:50:21,901  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:50:21,901  - patience: \"3\"\n",
-      "2021-09-08 01:50:21,901  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:50:21,902  - max_epochs: \"10\"\n",
-      "2021-09-08 01:50:21,902  - shuffle: \"True\"\n",
-      "2021-09-08 01:50:21,902  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:50:21,903  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:50:21,903 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:21,903 Model training base path: \"None1\"\n",
-      "2021-09-08 01:50:21,903 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:21,904 Device: cuda:1\n",
-      "2021-09-08 01:50:21,904 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:21,904 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:50:21,938 ----------------------------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:50:22,051 epoch 1 - iter 2/24 - loss 0.63713983 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,163 epoch 1 - iter 4/24 - loss 0.63422991 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,280 epoch 1 - iter 6/24 - loss 0.63824603 - samples/sec: 17.19 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,390 epoch 1 - iter 8/24 - loss 0.63479862 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,503 epoch 1 - iter 10/24 - loss 0.63561807 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,619 epoch 1 - iter 12/24 - loss 0.63487530 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,732 epoch 1 - iter 14/24 - loss 0.63412679 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,840 epoch 1 - iter 16/24 - loss 0.63707089 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 01:50:22,955 epoch 1 - iter 18/24 - loss 0.63787732 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:50:23,073 epoch 1 - iter 20/24 - loss 0.63660796 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 01:50:23,185 epoch 1 - iter 22/24 - loss 0.63501141 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:50:23,289 epoch 1 - iter 24/24 - loss 0.63703852 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 01:50:23,290 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:23,290 EPOCH 1 done: loss 0.6370 - lr 0.0200000\n",
-      "2021-09-08 01:50:23,354 DEV : loss 0.6365207433700562 - score 0.0\n",
-      "2021-09-08 01:50:23,354 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:50:27,430 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:27,552 epoch 2 - iter 2/24 - loss 0.64162731 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 01:50:27,663 epoch 2 - iter 4/24 - loss 0.64379653 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 01:50:27,775 epoch 2 - iter 6/24 - loss 0.64150109 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 01:50:27,882 epoch 2 - iter 8/24 - loss 0.64265106 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:27,997 epoch 2 - iter 10/24 - loss 0.64110011 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,112 epoch 2 - iter 12/24 - loss 0.63966222 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,222 epoch 2 - iter 14/24 - loss 0.64053197 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,332 epoch 2 - iter 16/24 - loss 0.63975606 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,447 epoch 2 - iter 18/24 - loss 0.63942120 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,557 epoch 2 - iter 20/24 - loss 0.63902036 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,670 epoch 2 - iter 22/24 - loss 0.63977473 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,776 epoch 2 - iter 24/24 - loss 0.63972998 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:28,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:28,778 EPOCH 2 done: loss 0.6397 - lr 0.0200000\n",
-      "2021-09-08 01:50:28,838 DEV : loss 0.6365184187889099 - score 0.0\n",
-      "2021-09-08 01:50:28,839 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:21:36,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:36,427 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:21:36,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:36,428 Parameters:\n",
+      "2021-09-21 21:21:36,428  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:21:36,429  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:21:36,429  - patience: \"3\"\n",
+      "2021-09-21 21:21:36,429  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:21:36,430  - max_epochs: \"10\"\n",
+      "2021-09-21 21:21:36,430  - shuffle: \"True\"\n",
+      "2021-09-21 21:21:36,430  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:21:36,431  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:21:36,431 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:36,431 Model training base path: \"None1\"\n",
+      "2021-09-21 21:21:36,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:36,432 Device: cuda:0\n",
+      "2021-09-21 21:21:36,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:36,433 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:21:37,570 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:37,705 epoch 1 - iter 2/24 - loss 0.64408386 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 21:21:37,831 epoch 1 - iter 4/24 - loss 0.64632751 - samples/sec: 15.96 - lr: 0.020000\n",
+      "2021-09-21 21:21:37,950 epoch 1 - iter 6/24 - loss 0.64424232 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,078 epoch 1 - iter 8/24 - loss 0.63967946 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,198 epoch 1 - iter 10/24 - loss 0.64190160 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,304 epoch 1 - iter 12/24 - loss 0.64056747 - samples/sec: 18.95 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,423 epoch 1 - iter 14/24 - loss 0.64088243 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,551 epoch 1 - iter 16/24 - loss 0.64168032 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,667 epoch 1 - iter 18/24 - loss 0.64054021 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,792 epoch 1 - iter 20/24 - loss 0.63951709 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 21:21:38,906 epoch 1 - iter 22/24 - loss 0.63957032 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 21:21:39,024 epoch 1 - iter 24/24 - loss 0.64037464 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:21:39,025 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:39,026 EPOCH 1 done: loss 0.6404 - lr 0.0200000\n",
+      "2021-09-21 21:21:39,099 DEV : loss 0.6368801593780518 - score 0.0\n",
+      "2021-09-21 21:21:39,100 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:50:32,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:33,082 epoch 3 - iter 2/24 - loss 0.62752375 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,195 epoch 3 - iter 4/24 - loss 0.63393141 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,297 epoch 3 - iter 6/24 - loss 0.63414733 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,406 epoch 3 - iter 8/24 - loss 0.63526250 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,514 epoch 3 - iter 10/24 - loss 0.63493435 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,624 epoch 3 - iter 12/24 - loss 0.63252686 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,721 epoch 3 - iter 14/24 - loss 0.63216493 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,814 epoch 3 - iter 16/24 - loss 0.63372660 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 01:50:33,908 epoch 3 - iter 18/24 - loss 0.63396316 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,004 epoch 3 - iter 20/24 - loss 0.63640867 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,101 epoch 3 - iter 22/24 - loss 0.63482625 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,199 epoch 3 - iter 24/24 - loss 0.63606662 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,200 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:34,200 EPOCH 3 done: loss 0.6361 - lr 0.0200000\n",
-      "2021-09-08 01:50:34,250 DEV : loss 0.6365214586257935 - score 0.0\n",
-      "2021-09-08 01:50:34,251 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:50:34,253 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:34,362 epoch 4 - iter 2/24 - loss 0.64194831 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,459 epoch 4 - iter 4/24 - loss 0.63764288 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,550 epoch 4 - iter 6/24 - loss 0.64275246 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,639 epoch 4 - iter 8/24 - loss 0.63964808 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,727 epoch 4 - iter 10/24 - loss 0.64101383 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,815 epoch 4 - iter 12/24 - loss 0.63973743 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,907 epoch 4 - iter 14/24 - loss 0.63853663 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 01:50:34,995 epoch 4 - iter 16/24 - loss 0.63943377 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,083 epoch 4 - iter 18/24 - loss 0.63899601 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,171 epoch 4 - iter 20/24 - loss 0.63818448 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,259 epoch 4 - iter 22/24 - loss 0.63815289 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,348 epoch 4 - iter 24/24 - loss 0.63666950 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,349 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:35,349 EPOCH 4 done: loss 0.6367 - lr 0.0200000\n",
-      "2021-09-08 01:50:35,491 DEV : loss 0.6365827918052673 - score 0.0\n",
-      "2021-09-08 01:50:35,492 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:50:35,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:35,665 epoch 5 - iter 2/24 - loss 0.63664025 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,757 epoch 5 - iter 4/24 - loss 0.64208995 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,846 epoch 5 - iter 6/24 - loss 0.64142542 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:50:35,934 epoch 5 - iter 8/24 - loss 0.63933969 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,024 epoch 5 - iter 10/24 - loss 0.63614995 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,112 epoch 5 - iter 12/24 - loss 0.63357639 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,200 epoch 5 - iter 14/24 - loss 0.63568844 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,289 epoch 5 - iter 16/24 - loss 0.63543076 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,377 epoch 5 - iter 18/24 - loss 0.63313357 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,465 epoch 5 - iter 20/24 - loss 0.63293735 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,558 epoch 5 - iter 22/24 - loss 0.63237067 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,647 epoch 5 - iter 24/24 - loss 0.63339939 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:50:36,648 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:36,648 EPOCH 5 done: loss 0.6334 - lr 0.0200000\n",
-      "2021-09-08 01:50:36,799 DEV : loss 0.6369203925132751 - score 0.6667\n",
-      "2021-09-08 01:50:36,800 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:21:43,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:43,528 epoch 2 - iter 2/24 - loss 0.63353658 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 21:21:43,634 epoch 2 - iter 4/24 - loss 0.63364375 - samples/sec: 18.96 - lr: 0.020000\n",
+      "2021-09-21 21:21:43,760 epoch 2 - iter 6/24 - loss 0.63408496 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 21:21:43,874 epoch 2 - iter 8/24 - loss 0.63617545 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,002 epoch 2 - iter 10/24 - loss 0.63580890 - samples/sec: 15.71 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,167 epoch 2 - iter 12/24 - loss 0.63612367 - samples/sec: 12.12 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,293 epoch 2 - iter 14/24 - loss 0.63726113 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,419 epoch 2 - iter 16/24 - loss 0.63538659 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,554 epoch 2 - iter 18/24 - loss 0.63488218 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,678 epoch 2 - iter 20/24 - loss 0.63479450 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,821 epoch 2 - iter 22/24 - loss 0.63613297 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,935 epoch 2 - iter 24/24 - loss 0.63565279 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 21:21:44,936 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:44,937 EPOCH 2 done: loss 0.6357 - lr 0.0200000\n",
+      "2021-09-21 21:21:45,017 DEV : loss 0.6365187168121338 - score 0.6667\n",
+      "2021-09-21 21:21:45,018 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:50:42,812 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:42,916 epoch 6 - iter 2/24 - loss 0.62200654 - samples/sec: 22.29 - lr: 0.020000\n"
+      "2021-09-21 21:21:49,130 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:49,325 epoch 3 - iter 2/24 - loss 0.65470621 - samples/sec: 12.17 - lr: 0.020000\n",
+      "2021-09-21 21:21:49,486 epoch 3 - iter 4/24 - loss 0.64602928 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 21:21:49,638 epoch 3 - iter 6/24 - loss 0.63965463 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:21:49,799 epoch 3 - iter 8/24 - loss 0.63786817 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 21:21:49,944 epoch 3 - iter 10/24 - loss 0.63811307 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,122 epoch 3 - iter 12/24 - loss 0.63796097 - samples/sec: 11.29 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,284 epoch 3 - iter 14/24 - loss 0.63451511 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,432 epoch 3 - iter 16/24 - loss 0.63533716 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,579 epoch 3 - iter 18/24 - loss 0.63650647 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,716 epoch 3 - iter 20/24 - loss 0.63655913 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,857 epoch 3 - iter 22/24 - loss 0.63792290 - samples/sec: 14.31 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,989 epoch 3 - iter 24/24 - loss 0.63831840 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 21:21:50,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:50,991 EPOCH 3 done: loss 0.6383 - lr 0.0200000\n",
+      "2021-09-21 21:21:51,071 DEV : loss 0.6368775367736816 - score 0.3333\n",
+      "2021-09-21 21:21:51,072 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:21:51,074 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:51,219 epoch 4 - iter 2/24 - loss 0.63565314 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 21:21:51,343 epoch 4 - iter 4/24 - loss 0.63673156 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 21:21:51,462 epoch 4 - iter 6/24 - loss 0.63108894 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 21:21:51,587 epoch 4 - iter 8/24 - loss 0.63136379 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 21:21:51,712 epoch 4 - iter 10/24 - loss 0.63449597 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 21:21:51,834 epoch 4 - iter 12/24 - loss 0.63426446 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 21:21:51,945 epoch 4 - iter 14/24 - loss 0.63845194 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 21:21:52,078 epoch 4 - iter 16/24 - loss 0.63693383 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 21:21:52,187 epoch 4 - iter 18/24 - loss 0.63765070 - samples/sec: 18.43 - lr: 0.020000\n",
+      "2021-09-21 21:21:52,333 epoch 4 - iter 20/24 - loss 0.63767230 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 21:21:52,463 epoch 4 - iter 22/24 - loss 0.63607015 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 21:21:52,586 epoch 4 - iter 24/24 - loss 0.63682576 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 21:21:52,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:52,588 EPOCH 4 done: loss 0.6368 - lr 0.0200000\n",
+      "2021-09-21 21:21:52,818 DEV : loss 0.6365677714347839 - score 0.3333\n",
+      "2021-09-21 21:21:52,819 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:21:52,912 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:53,069 epoch 5 - iter 2/24 - loss 0.64032930 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 21:21:53,195 epoch 5 - iter 4/24 - loss 0.62887532 - samples/sec: 15.96 - lr: 0.020000\n",
+      "2021-09-21 21:21:53,336 epoch 5 - iter 6/24 - loss 0.63232573 - samples/sec: 14.28 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:50:43,005 epoch 6 - iter 4/24 - loss 0.63843736 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,093 epoch 6 - iter 6/24 - loss 0.63737342 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,183 epoch 6 - iter 8/24 - loss 0.63817699 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,271 epoch 6 - iter 10/24 - loss 0.63319825 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,360 epoch 6 - iter 12/24 - loss 0.63494308 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,448 epoch 6 - iter 14/24 - loss 0.63286788 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,538 epoch 6 - iter 16/24 - loss 0.63132897 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,627 epoch 6 - iter 18/24 - loss 0.63124327 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,714 epoch 6 - iter 20/24 - loss 0.63197204 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,803 epoch 6 - iter 22/24 - loss 0.63261784 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,894 epoch 6 - iter 24/24 - loss 0.63278447 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 01:50:43,895 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:43,896 EPOCH 6 done: loss 0.6328 - lr 0.0200000\n",
-      "2021-09-08 01:50:43,946 DEV : loss 0.6365172863006592 - score 0.6667\n",
-      "2021-09-08 01:50:43,947 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:21:53,471 epoch 5 - iter 8/24 - loss 0.63252684 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 21:21:53,614 epoch 5 - iter 10/24 - loss 0.63526916 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 21:21:53,741 epoch 5 - iter 12/24 - loss 0.63740152 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:21:53,872 epoch 5 - iter 14/24 - loss 0.63797419 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 21:21:53,986 epoch 5 - iter 16/24 - loss 0.63669472 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 21:21:54,110 epoch 5 - iter 18/24 - loss 0.63777796 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:21:54,226 epoch 5 - iter 20/24 - loss 0.63715372 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 21:21:54,352 epoch 5 - iter 22/24 - loss 0.63688384 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:21:54,469 epoch 5 - iter 24/24 - loss 0.63690834 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:21:54,470 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:54,470 EPOCH 5 done: loss 0.6369 - lr 0.0200000\n",
+      "2021-09-21 21:21:55,021 DEV : loss 0.6365227699279785 - score 0.3333\n",
+      "2021-09-21 21:21:55,022 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:21:55,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:55,179 epoch 6 - iter 2/24 - loss 0.63130024 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 21:21:55,328 epoch 6 - iter 4/24 - loss 0.64231029 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:21:55,486 epoch 6 - iter 6/24 - loss 0.63975655 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 21:21:55,640 epoch 6 - iter 8/24 - loss 0.64074248 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 21:21:55,809 epoch 6 - iter 10/24 - loss 0.64035215 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 21:21:55,956 epoch 6 - iter 12/24 - loss 0.63817165 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,127 epoch 6 - iter 14/24 - loss 0.63712967 - samples/sec: 11.73 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,285 epoch 6 - iter 16/24 - loss 0.63758036 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,447 epoch 6 - iter 18/24 - loss 0.63858338 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,624 epoch 6 - iter 20/24 - loss 0.63824532 - samples/sec: 11.28 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,779 epoch 6 - iter 22/24 - loss 0.63888824 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,963 epoch 6 - iter 24/24 - loss 0.63853335 - samples/sec: 10.92 - lr: 0.020000\n",
+      "2021-09-21 21:21:56,964 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:56,965 EPOCH 6 done: loss 0.6385 - lr 0.0200000\n",
+      "2021-09-21 21:21:57,073 DEV : loss 0.6366457343101501 - score 0.0\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:21:57,074 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:21:57,076 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:57,265 epoch 7 - iter 2/24 - loss 0.64128304 - samples/sec: 13.33 - lr: 0.010000\n",
+      "2021-09-21 21:21:57,446 epoch 7 - iter 4/24 - loss 0.63553284 - samples/sec: 11.11 - lr: 0.010000\n",
+      "2021-09-21 21:21:57,602 epoch 7 - iter 6/24 - loss 0.63790877 - samples/sec: 12.87 - lr: 0.010000\n",
+      "2021-09-21 21:21:57,762 epoch 7 - iter 8/24 - loss 0.63880792 - samples/sec: 12.51 - lr: 0.010000\n",
+      "2021-09-21 21:21:57,957 epoch 7 - iter 10/24 - loss 0.64009970 - samples/sec: 10.31 - lr: 0.010000\n",
+      "2021-09-21 21:21:58,119 epoch 7 - iter 12/24 - loss 0.63924776 - samples/sec: 12.40 - lr: 0.010000\n",
+      "2021-09-21 21:21:58,291 epoch 7 - iter 14/24 - loss 0.63982787 - samples/sec: 11.67 - lr: 0.010000\n",
+      "2021-09-21 21:21:58,458 epoch 7 - iter 16/24 - loss 0.64021722 - samples/sec: 12.03 - lr: 0.010000\n",
+      "2021-09-21 21:21:58,611 epoch 7 - iter 18/24 - loss 0.63936949 - samples/sec: 13.07 - lr: 0.010000\n",
+      "2021-09-21 21:21:58,784 epoch 7 - iter 20/24 - loss 0.63898001 - samples/sec: 11.57 - lr: 0.010000\n",
+      "2021-09-21 21:21:58,941 epoch 7 - iter 22/24 - loss 0.63793839 - samples/sec: 12.86 - lr: 0.010000\n",
+      "2021-09-21 21:21:59,071 epoch 7 - iter 24/24 - loss 0.63974950 - samples/sec: 15.34 - lr: 0.010000\n",
+      "2021-09-21 21:21:59,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:59,073 EPOCH 7 done: loss 0.6397 - lr 0.0100000\n",
+      "2021-09-21 21:21:59,191 DEV : loss 0.6366629600524902 - score 0.3333\n",
+      "2021-09-21 21:21:59,192 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:21:59,194 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:59,409 epoch 8 - iter 2/24 - loss 0.63990244 - samples/sec: 11.57 - lr: 0.010000\n",
+      "2021-09-21 21:21:59,572 epoch 8 - iter 4/24 - loss 0.63722251 - samples/sec: 12.28 - lr: 0.010000\n",
+      "2021-09-21 21:21:59,718 epoch 8 - iter 6/24 - loss 0.63181387 - samples/sec: 13.70 - lr: 0.010000\n",
+      "2021-09-21 21:21:59,901 epoch 8 - iter 8/24 - loss 0.63088606 - samples/sec: 11.02 - lr: 0.010000\n",
+      "2021-09-21 21:22:00,051 epoch 8 - iter 10/24 - loss 0.63014289 - samples/sec: 13.32 - lr: 0.010000\n",
+      "2021-09-21 21:22:00,203 epoch 8 - iter 12/24 - loss 0.63324156 - samples/sec: 13.22 - lr: 0.010000\n",
+      "2021-09-21 21:22:00,368 epoch 8 - iter 14/24 - loss 0.63355697 - samples/sec: 12.14 - lr: 0.010000\n",
+      "2021-09-21 21:22:00,550 epoch 8 - iter 16/24 - loss 0.63388657 - samples/sec: 11.04 - lr: 0.010000\n",
+      "2021-09-21 21:22:00,724 epoch 8 - iter 18/24 - loss 0.63413105 - samples/sec: 11.53 - lr: 0.010000\n",
+      "2021-09-21 21:22:00,884 epoch 8 - iter 20/24 - loss 0.63478021 - samples/sec: 12.58 - lr: 0.010000\n",
+      "2021-09-21 21:22:01,032 epoch 8 - iter 22/24 - loss 0.63424808 - samples/sec: 13.54 - lr: 0.010000\n",
+      "2021-09-21 21:22:01,146 epoch 8 - iter 24/24 - loss 0.63372198 - samples/sec: 17.63 - lr: 0.010000\n",
+      "2021-09-21 21:22:01,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:01,148 EPOCH 8 done: loss 0.6337 - lr 0.0100000\n",
+      "2021-09-21 21:22:01,239 DEV : loss 0.6365218162536621 - score 0.0\n",
+      "2021-09-21 21:22:01,240 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:22:01,242 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:01,402 epoch 9 - iter 2/24 - loss 0.63391334 - samples/sec: 16.09 - lr: 0.010000\n",
+      "2021-09-21 21:22:01,532 epoch 9 - iter 4/24 - loss 0.63364248 - samples/sec: 15.43 - lr: 0.010000\n",
+      "2021-09-21 21:22:01,660 epoch 9 - iter 6/24 - loss 0.63140372 - samples/sec: 15.71 - lr: 0.010000\n",
+      "2021-09-21 21:22:01,773 epoch 9 - iter 8/24 - loss 0.63467243 - samples/sec: 17.85 - lr: 0.010000\n",
+      "2021-09-21 21:22:01,909 epoch 9 - iter 10/24 - loss 0.63391308 - samples/sec: 14.86 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,039 epoch 9 - iter 12/24 - loss 0.63081579 - samples/sec: 15.46 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,148 epoch 9 - iter 14/24 - loss 0.63070766 - samples/sec: 18.46 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,277 epoch 9 - iter 16/24 - loss 0.63016254 - samples/sec: 15.64 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,385 epoch 9 - iter 18/24 - loss 0.63056328 - samples/sec: 18.61 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,502 epoch 9 - iter 20/24 - loss 0.63139055 - samples/sec: 17.06 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,597 epoch 9 - iter 22/24 - loss 0.63105123 - samples/sec: 21.29 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,717 epoch 9 - iter 24/24 - loss 0.63132032 - samples/sec: 16.73 - lr: 0.010000\n",
+      "2021-09-21 21:22:02,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:02,719 EPOCH 9 done: loss 0.6313 - lr 0.0100000\n",
+      "2021-09-21 21:22:02,892 DEV : loss 0.6365155577659607 - score 0.6667\n",
+      "2021-09-21 21:22:02,895 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:50:47,980 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:48,099 epoch 7 - iter 2/24 - loss 0.64738935 - samples/sec: 20.13 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,212 epoch 7 - iter 4/24 - loss 0.64879325 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,315 epoch 7 - iter 6/24 - loss 0.64352179 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,420 epoch 7 - iter 8/24 - loss 0.64026195 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,521 epoch 7 - iter 10/24 - loss 0.63939614 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,630 epoch 7 - iter 12/24 - loss 0.63999984 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,727 epoch 7 - iter 14/24 - loss 0.64225554 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,835 epoch 7 - iter 16/24 - loss 0.64042503 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 01:50:48,944 epoch 7 - iter 18/24 - loss 0.64071665 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,041 epoch 7 - iter 20/24 - loss 0.64012358 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,140 epoch 7 - iter 22/24 - loss 0.63952913 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,244 epoch 7 - iter 24/24 - loss 0.63986234 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,245 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:49,245 EPOCH 7 done: loss 0.6399 - lr 0.0200000\n",
-      "2021-09-08 01:50:49,307 DEV : loss 0.6367270946502686 - score 0.6667\n",
-      "2021-09-08 01:50:49,308 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:50:49,309 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:49,443 epoch 8 - iter 2/24 - loss 0.64234716 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,539 epoch 8 - iter 4/24 - loss 0.63381582 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,654 epoch 8 - iter 6/24 - loss 0.63137986 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,755 epoch 8 - iter 8/24 - loss 0.63739375 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,855 epoch 8 - iter 10/24 - loss 0.63574987 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 01:50:49,953 epoch 8 - iter 12/24 - loss 0.63614902 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,057 epoch 8 - iter 14/24 - loss 0.63505709 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,162 epoch 8 - iter 16/24 - loss 0.63728012 - samples/sec: 19.29 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,266 epoch 8 - iter 18/24 - loss 0.63849487 - samples/sec: 19.29 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,377 epoch 8 - iter 20/24 - loss 0.63716410 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,486 epoch 8 - iter 22/24 - loss 0.63789664 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,591 epoch 8 - iter 24/24 - loss 0.63933784 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:50,592 EPOCH 8 done: loss 0.6393 - lr 0.0200000\n",
-      "2021-09-08 01:50:50,665 DEV : loss 0.6367247104644775 - score 0.0\n",
-      "2021-09-08 01:50:50,666 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:50:50,668 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:50,800 epoch 9 - iter 2/24 - loss 0.64251944 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 01:50:50,908 epoch 9 - iter 4/24 - loss 0.63858020 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,017 epoch 9 - iter 6/24 - loss 0.64144516 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,120 epoch 9 - iter 8/24 - loss 0.64552777 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,224 epoch 9 - iter 10/24 - loss 0.64254187 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,320 epoch 9 - iter 12/24 - loss 0.64449764 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,420 epoch 9 - iter 14/24 - loss 0.64648487 - samples/sec: 20.25 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,521 epoch 9 - iter 16/24 - loss 0.64459194 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,623 epoch 9 - iter 18/24 - loss 0.64503355 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,723 epoch 9 - iter 20/24 - loss 0.64441441 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,821 epoch 9 - iter 22/24 - loss 0.64565258 - samples/sec: 20.61 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,926 epoch 9 - iter 24/24 - loss 0.64393145 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:50:51,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:51,927 EPOCH 9 done: loss 0.6439 - lr 0.0200000\n",
-      "2021-09-08 01:50:52,006 DEV : loss 0.6366057395935059 - score 0.3333\n",
-      "2021-09-08 01:50:52,007 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:50:52,009 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:52,135 epoch 10 - iter 2/24 - loss 0.64335668 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,241 epoch 10 - iter 4/24 - loss 0.63862027 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,347 epoch 10 - iter 6/24 - loss 0.64264922 - samples/sec: 19.02 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,451 epoch 10 - iter 8/24 - loss 0.64105514 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,564 epoch 10 - iter 10/24 - loss 0.64183494 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,665 epoch 10 - iter 12/24 - loss 0.64678635 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,771 epoch 10 - iter 14/24 - loss 0.64170833 - samples/sec: 18.99 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,878 epoch 10 - iter 16/24 - loss 0.64221876 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 01:50:52,993 epoch 10 - iter 18/24 - loss 0.63973016 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:50:53,098 epoch 10 - iter 20/24 - loss 0.64051710 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 01:50:53,196 epoch 10 - iter 22/24 - loss 0.63898723 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:50:53,301 epoch 10 - iter 24/24 - loss 0.63904432 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 01:50:53,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:53,303 EPOCH 10 done: loss 0.6390 - lr 0.0200000\n",
-      "2021-09-08 01:50:53,366 DEV : loss 0.6365311741828918 - score 0.3333\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:50:53,367 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:50:57,567 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:57,568 Testing using best model ...\n",
-      "2021-09-08 01:50:57,569 loading file None1/best-model.pt\n"
+      "2021-09-21 21:22:08,209 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:08,430 epoch 10 - iter 2/24 - loss 0.63111493 - samples/sec: 10.60 - lr: 0.010000\n",
+      "2021-09-21 21:22:08,569 epoch 10 - iter 4/24 - loss 0.63587701 - samples/sec: 14.49 - lr: 0.010000\n",
+      "2021-09-21 21:22:08,730 epoch 10 - iter 6/24 - loss 0.63593074 - samples/sec: 12.48 - lr: 0.010000\n",
+      "2021-09-21 21:22:08,868 epoch 10 - iter 8/24 - loss 0.63442355 - samples/sec: 14.59 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2021-09-21 21:22:09,016 epoch 10 - iter 10/24 - loss 0.63020383 - samples/sec: 13.54 - lr: 0.010000\n",
+      "2021-09-21 21:22:09,167 epoch 10 - iter 12/24 - loss 0.63099819 - samples/sec: 13.28 - lr: 0.010000\n",
+      "2021-09-21 21:22:09,322 epoch 10 - iter 14/24 - loss 0.63141949 - samples/sec: 12.98 - lr: 0.010000\n",
+      "2021-09-21 21:22:09,463 epoch 10 - iter 16/24 - loss 0.63404855 - samples/sec: 14.23 - lr: 0.010000\n",
+      "2021-09-21 21:22:09,595 epoch 10 - iter 18/24 - loss 0.63435079 - samples/sec: 15.13 - lr: 0.010000\n",
+      "2021-09-21 21:22:09,729 epoch 10 - iter 20/24 - loss 0.63260367 - samples/sec: 15.06 - lr: 0.010000\n",
+      "2021-09-21 21:22:09,902 epoch 10 - iter 22/24 - loss 0.63358051 - samples/sec: 11.56 - lr: 0.010000\n",
+      "2021-09-21 21:22:10,053 epoch 10 - iter 24/24 - loss 0.63522903 - samples/sec: 13.26 - lr: 0.010000\n",
+      "2021-09-21 21:22:10,054 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:10,055 EPOCH 10 done: loss 0.6352 - lr 0.0100000\n",
+      "2021-09-21 21:22:10,145 DEV : loss 0.6365206241607666 - score 0.6667\n",
+      "2021-09-21 21:22:10,148 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:22:13,880 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:13,881 Testing using best model ...\n",
+      "2021-09-21 21:22:13,882 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:51:02,293 \t0.6667\n",
-      "2021-09-08 01:51:02,294 \n",
+      "2021-09-21 21:22:19,127 \t0.0\n",
+      "2021-09-21 21:22:19,127 \n",
       "Results:\n",
-      "- F-score (micro) 0.6667\n",
-      "- F-score (macro) 0.4444\n",
-      "- Accuracy 0.6667\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                                          precision    recall  f1-score   support\n",
       "\n",
-      "   having the quality of something harmful or unpleasant     0.5000    1.0000    0.6667         1\n",
-      "one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         0\n",
-      "                             involving advantage or good     1.0000    0.5000    0.6667         2\n",
+      "   having the quality of something harmful or unpleasant     0.0000    0.0000    0.0000         0\n",
+      "one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         2\n",
+      "                             involving advantage or good     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                               micro avg     0.6667    0.6667    0.6667         3\n",
-      "                                               macro avg     0.5000    0.5000    0.4444         3\n",
-      "                                            weighted avg     0.8333    0.6667    0.6667         3\n",
-      "                                             samples avg     0.6667    0.6667    0.6667         3\n",
+      "                                               micro avg     0.0000    0.0000    0.0000         3\n",
+      "                                               macro avg     0.0000    0.0000    0.0000         3\n",
+      "                                            weighted avg     0.0000    0.0000    0.0000         3\n",
+      "                                             samples avg     0.0000    0.0000    0.0000         3\n",
       "\n",
-      "2021-09-08 01:51:02,294 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:24,573 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 21:22:19,128 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:44,478 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:51:28,547 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:22:48,861 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 27/27 [00:00<00:00, 38624.22it/s]"
+      "100%|██████████| 27/27 [00:00<00:00, 37008.56it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:28,550 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:51:28,980 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:28,982 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:22:48,863 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 21:22:49,001 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:49,003 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8600,235 +8617,249 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:28,982 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:28,983 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
-      "2021-09-08 01:51:28,983 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:28,983 Parameters:\n",
-      "2021-09-08 01:51:28,984  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:51:28,984  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:51:28,984  - patience: \"3\"\n",
-      "2021-09-08 01:51:28,984  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:51:28,985  - max_epochs: \"10\"\n",
-      "2021-09-08 01:51:28,985  - shuffle: \"True\"\n",
-      "2021-09-08 01:51:28,985  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:51:28,985  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:51:28,986 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:28,986 Model training base path: \"None1\"\n",
-      "2021-09-08 01:51:28,986 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:28,987 Device: cuda:1\n",
-      "2021-09-08 01:51:28,987 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:28,987 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:51:29,214 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:29,324 epoch 1 - iter 2/24 - loss 0.63063160 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:51:29,437 epoch 1 - iter 4/24 - loss 0.63292955 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 01:51:29,543 epoch 1 - iter 6/24 - loss 0.63488309 - samples/sec: 18.99 - lr: 0.020000\n",
-      "2021-09-08 01:51:29,653 epoch 1 - iter 8/24 - loss 0.63387723 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 01:51:29,755 epoch 1 - iter 10/24 - loss 0.63547573 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 01:51:29,867 epoch 1 - iter 12/24 - loss 0.63627562 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 01:51:29,972 epoch 1 - iter 14/24 - loss 0.63574120 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 01:51:30,084 epoch 1 - iter 16/24 - loss 0.63612652 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:51:30,193 epoch 1 - iter 18/24 - loss 0.63640873 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 01:51:30,300 epoch 1 - iter 20/24 - loss 0.63517277 - samples/sec: 18.82 - lr: 0.020000\n",
-      "2021-09-08 01:51:30,406 epoch 1 - iter 22/24 - loss 0.63506812 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 01:51:30,504 epoch 1 - iter 24/24 - loss 0.63449793 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 01:51:30,505 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:30,506 EPOCH 1 done: loss 0.6345 - lr 0.0200000\n",
-      "2021-09-08 01:51:30,678 DEV : loss 0.636521577835083 - score 0.3333\n",
-      "2021-09-08 01:51:30,678 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:22:49,003 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:49,004 Corpus: \"Corpus: 24 train + 3 dev + 3 test sentences\"\n",
+      "2021-09-21 21:22:49,004 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:49,004 Parameters:\n",
+      "2021-09-21 21:22:49,005  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:22:49,005  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:22:49,005  - patience: \"3\"\n",
+      "2021-09-21 21:22:49,005  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:22:49,006  - max_epochs: \"10\"\n",
+      "2021-09-21 21:22:49,006  - shuffle: \"True\"\n",
+      "2021-09-21 21:22:49,006  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:22:49,007  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:22:49,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:49,007 Model training base path: \"None1\"\n",
+      "2021-09-21 21:22:49,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:49,008 Device: cuda:0\n",
+      "2021-09-21 21:22:49,008 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:49,008 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:22:49,196 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:49,292 epoch 1 - iter 2/24 - loss 0.64054427 - samples/sec: 24.40 - lr: 0.020000\n",
+      "2021-09-21 21:22:49,379 epoch 1 - iter 4/24 - loss 0.63181551 - samples/sec: 22.96 - lr: 0.020000\n",
+      "2021-09-21 21:22:49,471 epoch 1 - iter 6/24 - loss 0.63108201 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 21:22:49,560 epoch 1 - iter 8/24 - loss 0.63313607 - samples/sec: 22.51 - lr: 0.020000\n",
+      "2021-09-21 21:22:49,648 epoch 1 - iter 10/24 - loss 0.63124254 - samples/sec: 22.87 - lr: 0.020000\n",
+      "2021-09-21 21:22:49,736 epoch 1 - iter 12/24 - loss 0.63395854 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 21:22:49,825 epoch 1 - iter 14/24 - loss 0.63668522 - samples/sec: 22.46 - lr: 0.020000\n",
+      "2021-09-21 21:22:49,914 epoch 1 - iter 16/24 - loss 0.63730428 - samples/sec: 22.58 - lr: 0.020000\n",
+      "2021-09-21 21:22:50,004 epoch 1 - iter 18/24 - loss 0.63699297 - samples/sec: 22.54 - lr: 0.020000\n",
+      "2021-09-21 21:22:50,091 epoch 1 - iter 20/24 - loss 0.63438338 - samples/sec: 23.09 - lr: 0.020000\n",
+      "2021-09-21 21:22:50,181 epoch 1 - iter 22/24 - loss 0.63568675 - samples/sec: 22.16 - lr: 0.020000\n",
+      "2021-09-21 21:22:50,270 epoch 1 - iter 24/24 - loss 0.63536078 - samples/sec: 22.65 - lr: 0.020000\n",
+      "2021-09-21 21:22:50,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:50,272 EPOCH 1 done: loss 0.6354 - lr 0.0200000\n",
+      "2021-09-21 21:22:50,435 DEV : loss 0.6367731094360352 - score 0.3333\n",
+      "2021-09-21 21:22:50,435 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:22:58,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:58,653 epoch 2 - iter 2/24 - loss 0.63855559 - samples/sec: 19.56 - lr: 0.020000\n",
+      "2021-09-21 21:22:58,768 epoch 2 - iter 4/24 - loss 0.63347891 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 21:22:58,882 epoch 2 - iter 6/24 - loss 0.63984417 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 21:22:58,991 epoch 2 - iter 8/24 - loss 0.63913859 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,115 epoch 2 - iter 10/24 - loss 0.63987238 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,234 epoch 2 - iter 12/24 - loss 0.64028455 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,348 epoch 2 - iter 14/24 - loss 0.63937291 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,468 epoch 2 - iter 16/24 - loss 0.63965929 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,570 epoch 2 - iter 18/24 - loss 0.63906144 - samples/sec: 19.57 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,682 epoch 2 - iter 20/24 - loss 0.63865302 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,795 epoch 2 - iter 22/24 - loss 0.63945334 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,918 epoch 2 - iter 24/24 - loss 0.64123179 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,919 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:59,919 EPOCH 2 done: loss 0.6412 - lr 0.0200000\n",
+      "2021-09-21 21:23:00,003 DEV : loss 0.6367982625961304 - score 0.0\n",
+      "2021-09-21 21:23:00,004 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:23:00,006 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:00,138 epoch 3 - iter 2/24 - loss 0.64519328 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,257 epoch 3 - iter 4/24 - loss 0.64123003 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,369 epoch 3 - iter 6/24 - loss 0.64247241 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,484 epoch 3 - iter 8/24 - loss 0.63712913 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,602 epoch 3 - iter 10/24 - loss 0.63611864 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,727 epoch 3 - iter 12/24 - loss 0.63561096 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,851 epoch 3 - iter 14/24 - loss 0.63757158 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,975 epoch 3 - iter 16/24 - loss 0.63925757 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,102 epoch 3 - iter 18/24 - loss 0.64074417 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,235 epoch 3 - iter 20/24 - loss 0.64138338 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,364 epoch 3 - iter 22/24 - loss 0.64160136 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,481 epoch 3 - iter 24/24 - loss 0.64109450 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,482 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:01,483 EPOCH 3 done: loss 0.6411 - lr 0.0200000\n",
+      "2021-09-21 21:23:01,571 DEV : loss 0.6368796825408936 - score 0.3333\n",
+      "2021-09-21 21:23:01,574 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:23:01,576 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:01,727 epoch 4 - iter 2/24 - loss 0.65189749 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,848 epoch 4 - iter 4/24 - loss 0.64338155 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,981 epoch 4 - iter 6/24 - loss 0.64582178 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,119 epoch 4 - iter 8/24 - loss 0.64636648 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,252 epoch 4 - iter 10/24 - loss 0.64508243 - samples/sec: 15.13 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,393 epoch 4 - iter 12/24 - loss 0.64262682 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,533 epoch 4 - iter 14/24 - loss 0.64049972 - samples/sec: 14.35 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,645 epoch 4 - iter 16/24 - loss 0.64113202 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,767 epoch 4 - iter 18/24 - loss 0.64109685 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,900 epoch 4 - iter 20/24 - loss 0.64056602 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 21:23:03,043 epoch 4 - iter 22/24 - loss 0.63912728 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 21:23:03,178 epoch 4 - iter 24/24 - loss 0.63774319 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 21:23:03,179 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:03,179 EPOCH 4 done: loss 0.6377 - lr 0.0200000\n",
+      "2021-09-21 21:23:03,273 DEV : loss 0.6366347074508667 - score 0.3333\n",
+      "2021-09-21 21:23:03,276 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:23:07,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:07,542 epoch 5 - iter 2/24 - loss 0.62843519 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:23:07,708 epoch 5 - iter 4/24 - loss 0.63643755 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 21:23:07,853 epoch 5 - iter 6/24 - loss 0.63459424 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 21:23:07,985 epoch 5 - iter 8/24 - loss 0.63341050 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,116 epoch 5 - iter 10/24 - loss 0.63487499 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,258 epoch 5 - iter 12/24 - loss 0.63655397 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,400 epoch 5 - iter 14/24 - loss 0.63623191 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,526 epoch 5 - iter 16/24 - loss 0.63773406 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,677 epoch 5 - iter 18/24 - loss 0.63848145 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,785 epoch 5 - iter 20/24 - loss 0.63911882 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,930 epoch 5 - iter 22/24 - loss 0.63881186 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 21:23:09,053 epoch 5 - iter 24/24 - loss 0.63917940 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 21:23:09,054 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:09,054 EPOCH 5 done: loss 0.6392 - lr 0.0200000\n",
+      "2021-09-21 21:23:09,140 DEV : loss 0.6365158557891846 - score 0.3333\n",
+      "2021-09-21 21:23:09,141 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:51:36,176 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:36,310 epoch 2 - iter 2/24 - loss 0.63601786 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,417 epoch 2 - iter 4/24 - loss 0.63138872 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,518 epoch 2 - iter 6/24 - loss 0.63377167 - samples/sec: 19.80 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,625 epoch 2 - iter 8/24 - loss 0.63232836 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,726 epoch 2 - iter 10/24 - loss 0.63313230 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,825 epoch 2 - iter 12/24 - loss 0.63304608 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,928 epoch 2 - iter 14/24 - loss 0.63013162 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,032 epoch 2 - iter 16/24 - loss 0.62944178 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,143 epoch 2 - iter 18/24 - loss 0.63081964 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,246 epoch 2 - iter 20/24 - loss 0.62960921 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,350 epoch 2 - iter 22/24 - loss 0.62832082 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,458 epoch 2 - iter 24/24 - loss 0.62818716 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,459 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:37,459 EPOCH 2 done: loss 0.6282 - lr 0.0200000\n",
-      "2021-09-08 01:51:37,525 DEV : loss 0.6366195678710938 - score 0.3333\n",
-      "2021-09-08 01:51:37,527 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:51:37,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:37,656 epoch 3 - iter 2/24 - loss 0.68134424 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,759 epoch 3 - iter 4/24 - loss 0.66066206 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,868 epoch 3 - iter 6/24 - loss 0.65197990 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,978 epoch 3 - iter 8/24 - loss 0.65427605 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,087 epoch 3 - iter 10/24 - loss 0.65041947 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,181 epoch 3 - iter 12/24 - loss 0.65153820 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,281 epoch 3 - iter 14/24 - loss 0.65114690 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,389 epoch 3 - iter 16/24 - loss 0.65068111 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,496 epoch 3 - iter 18/24 - loss 0.64742923 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,594 epoch 3 - iter 20/24 - loss 0.64696462 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,698 epoch 3 - iter 22/24 - loss 0.64507200 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,795 epoch 3 - iter 24/24 - loss 0.64391480 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 01:51:38,796 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:38,797 EPOCH 3 done: loss 0.6439 - lr 0.0200000\n",
-      "2021-09-08 01:51:38,941 DEV : loss 0.6365997195243835 - score 0.3333\n",
-      "2021-09-08 01:51:38,942 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:51:39,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:39,151 epoch 4 - iter 2/24 - loss 0.64392859 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,249 epoch 4 - iter 4/24 - loss 0.63519309 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,353 epoch 4 - iter 6/24 - loss 0.62889617 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,454 epoch 4 - iter 8/24 - loss 0.63374505 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,556 epoch 4 - iter 10/24 - loss 0.63636326 - samples/sec: 19.80 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,663 epoch 4 - iter 12/24 - loss 0.63978669 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,769 epoch 4 - iter 14/24 - loss 0.63787514 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,869 epoch 4 - iter 16/24 - loss 0.63766112 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 01:51:39,971 epoch 4 - iter 18/24 - loss 0.64156278 - samples/sec: 19.75 - lr: 0.020000\n",
-      "2021-09-08 01:51:40,078 epoch 4 - iter 20/24 - loss 0.63946396 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:51:40,180 epoch 4 - iter 22/24 - loss 0.63971741 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 01:51:40,283 epoch 4 - iter 24/24 - loss 0.64008663 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 01:51:40,284 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:40,284 EPOCH 4 done: loss 0.6401 - lr 0.0200000\n",
-      "2021-09-08 01:51:40,457 DEV : loss 0.6368191242218018 - score 0.3333\n",
-      "2021-09-08 01:51:40,459 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:51:40,537 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:40,667 epoch 5 - iter 2/24 - loss 0.62917003 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 01:51:40,773 epoch 5 - iter 4/24 - loss 0.63732651 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:51:40,881 epoch 5 - iter 6/24 - loss 0.63468718 - samples/sec: 18.67 - lr: 0.020000\n"
+      "2021-09-21 21:23:13,075 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:40,984 epoch 5 - iter 8/24 - loss 0.63487983 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,090 epoch 5 - iter 10/24 - loss 0.63645594 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,195 epoch 5 - iter 12/24 - loss 0.63725656 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,293 epoch 5 - iter 14/24 - loss 0.63598366 - samples/sec: 20.57 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,395 epoch 5 - iter 16/24 - loss 0.63433965 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,502 epoch 5 - iter 18/24 - loss 0.63493690 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,602 epoch 5 - iter 20/24 - loss 0.63507411 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,705 epoch 5 - iter 22/24 - loss 0.63573548 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,807 epoch 5 - iter 24/24 - loss 0.63638839 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 01:51:41,808 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:41,809 EPOCH 5 done: loss 0.6364 - lr 0.0200000\n",
-      "2021-09-08 01:51:41,880 DEV : loss 0.6365981101989746 - score 0.0\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:51:41,881 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:51:41,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:42,012 epoch 6 - iter 2/24 - loss 0.62767214 - samples/sec: 18.60 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,117 epoch 6 - iter 4/24 - loss 0.63265899 - samples/sec: 19.08 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,222 epoch 6 - iter 6/24 - loss 0.63941572 - samples/sec: 19.16 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,316 epoch 6 - iter 8/24 - loss 0.64177462 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,421 epoch 6 - iter 10/24 - loss 0.64336030 - samples/sec: 19.28 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,531 epoch 6 - iter 12/24 - loss 0.64152908 - samples/sec: 18.25 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,629 epoch 6 - iter 14/24 - loss 0.64086504 - samples/sec: 20.47 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,737 epoch 6 - iter 16/24 - loss 0.63832769 - samples/sec: 18.53 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,840 epoch 6 - iter 18/24 - loss 0.63854958 - samples/sec: 19.67 - lr: 0.010000\n",
-      "2021-09-08 01:51:42,952 epoch 6 - iter 20/24 - loss 0.63659986 - samples/sec: 17.85 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,063 epoch 6 - iter 22/24 - loss 0.63641518 - samples/sec: 18.08 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,166 epoch 6 - iter 24/24 - loss 0.63538992 - samples/sec: 19.53 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,167 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:43,168 EPOCH 6 done: loss 0.6354 - lr 0.0100000\n",
-      "2021-09-08 01:51:43,238 DEV : loss 0.6365477442741394 - score 0.3333\n",
-      "2021-09-08 01:51:43,239 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:51:43,243 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:43,378 epoch 7 - iter 2/24 - loss 0.63525695 - samples/sec: 18.42 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,483 epoch 7 - iter 4/24 - loss 0.63962708 - samples/sec: 19.00 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,590 epoch 7 - iter 6/24 - loss 0.63903674 - samples/sec: 18.78 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,687 epoch 7 - iter 8/24 - loss 0.63632372 - samples/sec: 20.70 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,795 epoch 7 - iter 10/24 - loss 0.63898380 - samples/sec: 18.62 - lr: 0.010000\n",
-      "2021-09-08 01:51:43,905 epoch 7 - iter 12/24 - loss 0.64415391 - samples/sec: 18.29 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,010 epoch 7 - iter 14/24 - loss 0.64221822 - samples/sec: 19.08 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,116 epoch 7 - iter 16/24 - loss 0.64156465 - samples/sec: 18.91 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,223 epoch 7 - iter 18/24 - loss 0.64179616 - samples/sec: 18.83 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,332 epoch 7 - iter 20/24 - loss 0.64121390 - samples/sec: 18.45 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,437 epoch 7 - iter 22/24 - loss 0.64216820 - samples/sec: 19.12 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,525 epoch 7 - iter 24/24 - loss 0.64120484 - samples/sec: 22.79 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,526 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:44,527 EPOCH 7 done: loss 0.6412 - lr 0.0100000\n",
-      "2021-09-08 01:51:44,576 DEV : loss 0.6367380023002625 - score 0.3333\n",
-      "2021-09-08 01:51:44,577 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:51:44,579 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:44,679 epoch 8 - iter 2/24 - loss 0.62876958 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,768 epoch 8 - iter 4/24 - loss 0.62983511 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,856 epoch 8 - iter 6/24 - loss 0.62993442 - samples/sec: 22.84 - lr: 0.010000\n",
-      "2021-09-08 01:51:44,944 epoch 8 - iter 8/24 - loss 0.63113503 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,034 epoch 8 - iter 10/24 - loss 0.63198576 - samples/sec: 22.19 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,122 epoch 8 - iter 12/24 - loss 0.63257496 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,209 epoch 8 - iter 14/24 - loss 0.63180627 - samples/sec: 22.98 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,298 epoch 8 - iter 16/24 - loss 0.63211837 - samples/sec: 22.57 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,386 epoch 8 - iter 18/24 - loss 0.63331460 - samples/sec: 22.89 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,475 epoch 8 - iter 20/24 - loss 0.63341598 - samples/sec: 22.72 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,563 epoch 8 - iter 22/24 - loss 0.63684066 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,650 epoch 8 - iter 24/24 - loss 0.63634276 - samples/sec: 23.01 - lr: 0.010000\n",
-      "2021-09-08 01:51:45,651 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:45,651 EPOCH 8 done: loss 0.6363 - lr 0.0100000\n",
-      "2021-09-08 01:51:45,706 DEV : loss 0.6365148425102234 - score 0.3333\n",
-      "2021-09-08 01:51:45,707 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:23:13,249 epoch 6 - iter 2/24 - loss 0.64004520 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 21:23:13,393 epoch 6 - iter 4/24 - loss 0.63834549 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 21:23:13,519 epoch 6 - iter 6/24 - loss 0.63448085 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 21:23:13,641 epoch 6 - iter 8/24 - loss 0.63310432 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 21:23:13,769 epoch 6 - iter 10/24 - loss 0.63582911 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:23:13,899 epoch 6 - iter 12/24 - loss 0.63550408 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 21:23:14,048 epoch 6 - iter 14/24 - loss 0.63696762 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:23:14,191 epoch 6 - iter 16/24 - loss 0.63552404 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 21:23:14,322 epoch 6 - iter 18/24 - loss 0.63587754 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 21:23:14,437 epoch 6 - iter 20/24 - loss 0.63540181 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 21:23:14,578 epoch 6 - iter 22/24 - loss 0.63649618 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 21:23:14,720 epoch 6 - iter 24/24 - loss 0.63661847 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 21:23:14,721 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:14,722 EPOCH 6 done: loss 0.6366 - lr 0.0200000\n",
+      "2021-09-21 21:23:14,795 DEV : loss 0.6365553140640259 - score 0.3333\n",
+      "2021-09-21 21:23:14,797 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:23:14,799 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:14,961 epoch 7 - iter 2/24 - loss 0.63100535 - samples/sec: 15.13 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,097 epoch 7 - iter 4/24 - loss 0.63513573 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,227 epoch 7 - iter 6/24 - loss 0.63953187 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,352 epoch 7 - iter 8/24 - loss 0.64098834 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,467 epoch 7 - iter 10/24 - loss 0.64342469 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,599 epoch 7 - iter 12/24 - loss 0.64473237 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,728 epoch 7 - iter 14/24 - loss 0.64132470 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,856 epoch 7 - iter 16/24 - loss 0.64063701 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:23:15,988 epoch 7 - iter 18/24 - loss 0.63895115 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 21:23:16,145 epoch 7 - iter 20/24 - loss 0.63937768 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:23:16,313 epoch 7 - iter 22/24 - loss 0.63839479 - samples/sec: 11.93 - lr: 0.020000\n",
+      "2021-09-21 21:23:16,433 epoch 7 - iter 24/24 - loss 0.63769828 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 21:23:16,434 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:16,434 EPOCH 7 done: loss 0.6377 - lr 0.0200000\n",
+      "2021-09-21 21:23:16,520 DEV : loss 0.6365150213241577 - score 0.3333\n",
+      "2021-09-21 21:23:16,524 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:51:49,765 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:49,869 epoch 9 - iter 2/24 - loss 0.64300358 - samples/sec: 22.24 - lr: 0.010000\n",
-      "2021-09-08 01:51:49,957 epoch 9 - iter 4/24 - loss 0.64046375 - samples/sec: 22.81 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,045 epoch 9 - iter 6/24 - loss 0.63508231 - samples/sec: 22.82 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,133 epoch 9 - iter 8/24 - loss 0.63810557 - samples/sec: 22.89 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,223 epoch 9 - iter 10/24 - loss 0.63786458 - samples/sec: 22.50 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,312 epoch 9 - iter 12/24 - loss 0.63768217 - samples/sec: 22.47 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,402 epoch 9 - iter 14/24 - loss 0.63606800 - samples/sec: 22.28 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,490 epoch 9 - iter 16/24 - loss 0.63789970 - samples/sec: 22.94 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,578 epoch 9 - iter 18/24 - loss 0.63746603 - samples/sec: 22.96 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,665 epoch 9 - iter 20/24 - loss 0.63734330 - samples/sec: 22.89 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,754 epoch 9 - iter 22/24 - loss 0.63942281 - samples/sec: 22.77 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,842 epoch 9 - iter 24/24 - loss 0.63912924 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 01:51:50,843 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:50,843 EPOCH 9 done: loss 0.6391 - lr 0.0100000\n",
-      "2021-09-08 01:51:50,893 DEV : loss 0.6366004347801208 - score 0.6667\n",
-      "2021-09-08 01:51:50,894 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:23:20,210 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:20,365 epoch 8 - iter 2/24 - loss 0.64252183 - samples/sec: 14.78 - lr: 0.020000\n",
+      "2021-09-21 21:23:20,499 epoch 8 - iter 4/24 - loss 0.64676733 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 21:23:20,625 epoch 8 - iter 6/24 - loss 0.64406035 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 21:23:20,757 epoch 8 - iter 8/24 - loss 0.63975015 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 21:23:20,888 epoch 8 - iter 10/24 - loss 0.63862139 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,041 epoch 8 - iter 12/24 - loss 0.64000869 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,175 epoch 8 - iter 14/24 - loss 0.63812609 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,310 epoch 8 - iter 16/24 - loss 0.63816023 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,433 epoch 8 - iter 18/24 - loss 0.63831256 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,555 epoch 8 - iter 20/24 - loss 0.63857872 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,680 epoch 8 - iter 22/24 - loss 0.63849758 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,816 epoch 8 - iter 24/24 - loss 0.63822179 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 21:23:21,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:21,818 EPOCH 8 done: loss 0.6382 - lr 0.0200000\n",
+      "2021-09-21 21:23:21,912 DEV : loss 0.6366527080535889 - score 0.0\n",
+      "2021-09-21 21:23:21,912 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:23:21,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:22,065 epoch 9 - iter 2/24 - loss 0.64387301 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 21:23:22,189 epoch 9 - iter 4/24 - loss 0.63569327 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:23:22,326 epoch 9 - iter 6/24 - loss 0.64038961 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 21:23:22,465 epoch 9 - iter 8/24 - loss 0.64030372 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 21:23:22,593 epoch 9 - iter 10/24 - loss 0.64124108 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:23:22,726 epoch 9 - iter 12/24 - loss 0.64125993 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:23:22,854 epoch 9 - iter 14/24 - loss 0.64187942 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:23:22,983 epoch 9 - iter 16/24 - loss 0.64230314 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:23:23,111 epoch 9 - iter 18/24 - loss 0.64208939 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 21:23:23,246 epoch 9 - iter 20/24 - loss 0.64249480 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 21:23:23,377 epoch 9 - iter 22/24 - loss 0.64139317 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 21:23:23,504 epoch 9 - iter 24/24 - loss 0.64168436 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:23:23,505 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:23,506 EPOCH 9 done: loss 0.6417 - lr 0.0200000\n",
+      "2021-09-21 21:23:23,603 DEV : loss 0.6370216012001038 - score 0.3333\n",
+      "2021-09-21 21:23:23,604 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:23:23,606 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:23,749 epoch 10 - iter 2/24 - loss 0.64830703 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 21:23:23,879 epoch 10 - iter 4/24 - loss 0.64386626 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,038 epoch 10 - iter 6/24 - loss 0.64125839 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,186 epoch 10 - iter 8/24 - loss 0.64027586 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,328 epoch 10 - iter 10/24 - loss 0.63885983 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,456 epoch 10 - iter 12/24 - loss 0.63902625 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,596 epoch 10 - iter 14/24 - loss 0.63873708 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,719 epoch 10 - iter 16/24 - loss 0.63792561 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,854 epoch 10 - iter 18/24 - loss 0.63832017 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 21:23:24,980 epoch 10 - iter 20/24 - loss 0.63817680 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:23:25,108 epoch 10 - iter 22/24 - loss 0.63784397 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 21:23:25,244 epoch 10 - iter 24/24 - loss 0.63728072 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:23:25,245 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:25,245 EPOCH 10 done: loss 0.6373 - lr 0.0200000\n",
+      "2021-09-21 21:23:25,338 DEV : loss 0.6365148425102234 - score 0.3333\n",
+      "2021-09-21 21:23:25,339 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:51:55,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:55,320 epoch 10 - iter 2/24 - loss 0.62821457 - samples/sec: 11.22 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,409 epoch 10 - iter 4/24 - loss 0.62777025 - samples/sec: 22.69 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,498 epoch 10 - iter 6/24 - loss 0.63429959 - samples/sec: 22.69 - lr: 0.010000\n"
+      "2021-09-21 21:23:37,042 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:37,043 Testing using best model ...\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:55,586 epoch 10 - iter 8/24 - loss 0.63661721 - samples/sec: 22.70 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,674 epoch 10 - iter 10/24 - loss 0.63628132 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,762 epoch 10 - iter 12/24 - loss 0.63636609 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,850 epoch 10 - iter 14/24 - loss 0.63807397 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,938 epoch 10 - iter 16/24 - loss 0.63665088 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,029 epoch 10 - iter 18/24 - loss 0.63570968 - samples/sec: 22.13 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,117 epoch 10 - iter 20/24 - loss 0.63561949 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,207 epoch 10 - iter 22/24 - loss 0.63527352 - samples/sec: 22.39 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,296 epoch 10 - iter 24/24 - loss 0.63594973 - samples/sec: 22.63 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,297 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:56,297 EPOCH 10 done: loss 0.6359 - lr 0.0100000\n",
-      "2021-09-08 01:51:56,347 DEV : loss 0.6366277933120728 - score 0.3333\n",
-      "2021-09-08 01:51:56,347 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:52:00,537 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:52:00,538 Testing using best model ...\n",
-      "2021-09-08 01:52:00,539 loading file None1/best-model.pt\n",
+      "2021-09-21 21:23:37,044 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:52:05,536 \t0.0\n",
-      "2021-09-08 01:52:05,537 \n",
+      "2021-09-21 21:23:41,980 \t0.3333\n",
+      "2021-09-21 21:23:41,980 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.3333\n",
+      "- F-score (macro) 0.1667\n",
+      "- Accuracy 0.3333\n",
       "\n",
       "By class:\n",
       "                                                          precision    recall  f1-score   support\n",
       "\n",
-      "   having the quality of something harmful or unpleasant     0.0000    0.0000    0.0000         0\n",
-      "one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         1\n",
-      "                             involving advantage or good     0.0000    0.0000    0.0000         2\n",
+      "   having the quality of something harmful or unpleasant     0.5000    0.5000    0.5000         2\n",
+      "one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         0\n",
+      "                             involving advantage or good     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                               micro avg     0.0000    0.0000    0.0000         3\n",
-      "                                               macro avg     0.0000    0.0000    0.0000         3\n",
-      "                                            weighted avg     0.0000    0.0000    0.0000         3\n",
-      "                                             samples avg     0.0000    0.0000    0.0000         3\n",
+      "                                               micro avg     0.3333    0.3333    0.3333         3\n",
+      "                                               macro avg     0.1667    0.1667    0.1667         3\n",
+      "                                            weighted avg     0.3333    0.3333    0.3333         3\n",
+      "                                             samples avg     0.3333    0.3333    0.3333         3\n",
       "\n",
-      "2021-09-08 01:52:05,537 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.3539353769676885\n"
+      "2021-09-21 21:23:41,981 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.34266777133388565\n"
      ]
     }
    ],
@@ -8898,11 +8929,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a310936c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.34465617232808615, 0.3471416735708368, 0.3628831814415907, 0.3380281690140845, 0.32062966031483014]\n",
+      "0.013710117377589134\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/fewshot/sentiment_twitter_fewshot.ipynb b/fewshot/sentiment_twitter_fewshot.ipynb
index e47de0c..7f55523 100644
--- a/fewshot/sentiment_twitter_fewshot.ipynb
+++ b/fewshot/sentiment_twitter_fewshot.ipynb
@@ -70,38 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:42,832 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:03:31,548 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:33:57,331 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:03:39,648 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 13455.47it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 7958.17it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:57,337 [b'awful', b'bad', b'neutral', b'good', b'great']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:33:57,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:57,575 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:03:39,657 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 21:03:39,841 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:39,843 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -414,208 +401,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:33:57,576 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:57,576 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:33:57,577 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:57,577 Parameters:\n",
-      "2021-09-08 01:33:57,577  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:33:57,577  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:33:57,578  - patience: \"3\"\n",
-      "2021-09-08 01:33:57,578  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:33:57,578  - max_epochs: \"10\"\n",
-      "2021-09-08 01:33:57,578  - shuffle: \"True\"\n",
-      "2021-09-08 01:33:57,579  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:33:57,579  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:33:57,579 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:57,580 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:33:57,580 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:57,580 Device: cuda:0\n",
-      "2021-09-08 01:33:57,580 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:57,581 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:33:57,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:33:58,158 epoch 1 - iter 4/41 - loss 0.38852019 - samples/sec: 13.73 - lr: 0.020000\n",
-      "2021-09-08 01:33:58,457 epoch 1 - iter 8/41 - loss 0.30755679 - samples/sec: 13.39 - lr: 0.020000\n",
-      "2021-09-08 01:33:58,750 epoch 1 - iter 12/41 - loss 0.39045769 - samples/sec: 13.71 - lr: 0.020000\n",
-      "2021-09-08 01:33:59,027 epoch 1 - iter 16/41 - loss 0.50835891 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 01:33:59,318 epoch 1 - iter 20/41 - loss 0.60636973 - samples/sec: 13.81 - lr: 0.020000\n",
-      "2021-09-08 01:33:59,540 epoch 1 - iter 24/41 - loss 0.67033566 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 01:34:00,000 epoch 1 - iter 28/41 - loss 0.71492016 - samples/sec: 8.72 - lr: 0.020000\n",
-      "2021-09-08 01:34:00,303 epoch 1 - iter 32/41 - loss 0.69059431 - samples/sec: 13.21 - lr: 0.020000\n",
-      "2021-09-08 01:34:00,524 epoch 1 - iter 36/41 - loss 0.67248883 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 01:34:00,804 epoch 1 - iter 40/41 - loss 0.67697561 - samples/sec: 14.35 - lr: 0.020000\n",
-      "2021-09-08 01:34:00,858 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:00,859 EPOCH 1 done: loss 0.6697 - lr 0.0200000\n",
-      "2021-09-08 01:34:04,720 DEV : loss 0.7947067022323608 - score 0.25\n",
-      "2021-09-08 01:34:04,721 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:10,682 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:11,007 epoch 2 - iter 4/41 - loss 1.35813357 - samples/sec: 13.44 - lr: 0.020000\n",
-      "2021-09-08 01:34:11,350 epoch 2 - iter 8/41 - loss 1.26057055 - samples/sec: 11.69 - lr: 0.020000\n",
-      "2021-09-08 01:34:11,625 epoch 2 - iter 12/41 - loss 1.05498442 - samples/sec: 14.61 - lr: 0.020000\n",
-      "2021-09-08 01:34:11,890 epoch 2 - iter 16/41 - loss 0.92510674 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 01:34:12,303 epoch 2 - iter 20/41 - loss 0.83947493 - samples/sec: 9.70 - lr: 0.020000\n",
-      "2021-09-08 01:34:12,531 epoch 2 - iter 24/41 - loss 0.86093377 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:34:12,819 epoch 2 - iter 28/41 - loss 0.81347980 - samples/sec: 13.88 - lr: 0.020000\n",
-      "2021-09-08 01:34:13,051 epoch 2 - iter 32/41 - loss 0.78343250 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 01:34:13,346 epoch 2 - iter 36/41 - loss 0.77804444 - samples/sec: 13.56 - lr: 0.020000\n",
-      "2021-09-08 01:34:13,613 epoch 2 - iter 40/41 - loss 0.77865194 - samples/sec: 15.02 - lr: 0.020000\n",
-      "2021-09-08 01:34:13,764 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:13,765 EPOCH 2 done: loss 0.7702 - lr 0.0200000\n",
-      "2021-09-08 01:34:14,363 DEV : loss 0.3001296818256378 - score 1.0\n",
-      "2021-09-08 01:34:14,364 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:34:19,221 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:19,610 epoch 3 - iter 4/41 - loss 0.75615149 - samples/sec: 11.11 - lr: 0.020000\n",
-      "2021-09-08 01:34:19,904 epoch 3 - iter 8/41 - loss 0.69869491 - samples/sec: 13.61 - lr: 0.020000\n",
-      "2021-09-08 01:34:20,131 epoch 3 - iter 12/41 - loss 0.62479660 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 01:34:20,406 epoch 3 - iter 16/41 - loss 0.58513171 - samples/sec: 14.58 - lr: 0.020000\n",
-      "2021-09-08 01:34:20,787 epoch 3 - iter 20/41 - loss 0.58506265 - samples/sec: 10.51 - lr: 0.020000\n",
-      "2021-09-08 01:34:21,080 epoch 3 - iter 24/41 - loss 0.56231620 - samples/sec: 13.69 - lr: 0.020000\n",
-      "2021-09-08 01:34:21,410 epoch 3 - iter 28/41 - loss 0.51862638 - samples/sec: 12.14 - lr: 0.020000\n",
-      "2021-09-08 01:34:21,722 epoch 3 - iter 32/41 - loss 0.51269551 - samples/sec: 12.87 - lr: 0.020000\n",
-      "2021-09-08 01:34:21,949 epoch 3 - iter 36/41 - loss 0.51798684 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 01:34:22,215 epoch 3 - iter 40/41 - loss 0.53588583 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:34:22,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:22,309 EPOCH 3 done: loss 0.5354 - lr 0.0200000\n",
-      "2021-09-08 01:34:22,494 DEV : loss 0.38607916235923767 - score 0.5\n",
-      "2021-09-08 01:34:22,495 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:34:22,515 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:23,073 epoch 4 - iter 4/41 - loss 0.55992325 - samples/sec: 7.41 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,359 epoch 4 - iter 8/41 - loss 0.47886033 - samples/sec: 14.03 - lr: 0.020000\n",
-      "2021-09-08 01:34:23,625 epoch 4 - iter 12/41 - loss 0.35851041 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,069 epoch 4 - iter 16/41 - loss 0.44902766 - samples/sec: 9.02 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,390 epoch 4 - iter 20/41 - loss 0.41233884 - samples/sec: 12.51 - lr: 0.020000\n",
-      "2021-09-08 01:34:24,777 epoch 4 - iter 24/41 - loss 0.41590507 - samples/sec: 10.35 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,196 epoch 4 - iter 28/41 - loss 0.49202637 - samples/sec: 9.57 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,624 epoch 4 - iter 32/41 - loss 0.45662427 - samples/sec: 9.35 - lr: 0.020000\n",
-      "2021-09-08 01:34:25,901 epoch 4 - iter 36/41 - loss 0.46174761 - samples/sec: 14.50 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,185 epoch 4 - iter 40/41 - loss 0.47931552 - samples/sec: 14.14 - lr: 0.020000\n",
-      "2021-09-08 01:34:26,277 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:26,277 EPOCH 4 done: loss 0.4768 - lr 0.0200000\n",
-      "2021-09-08 01:34:26,556 DEV : loss 0.4155237376689911 - score 1.0\n",
-      "2021-09-08 01:34:26,558 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:34:26,562 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:26,991 epoch 5 - iter 4/41 - loss 0.13919594 - samples/sec: 9.77 - lr: 0.020000\n",
-      "2021-09-08 01:34:27,370 epoch 5 - iter 8/41 - loss 0.47590045 - samples/sec: 10.58 - lr: 0.020000\n",
-      "2021-09-08 01:34:27,605 epoch 5 - iter 12/41 - loss 0.33974604 - samples/sec: 17.05 - lr: 0.020000\n",
-      "2021-09-08 01:34:27,970 epoch 5 - iter 16/41 - loss 0.36953428 - samples/sec: 11.00 - lr: 0.020000\n",
-      "2021-09-08 01:34:28,276 epoch 5 - iter 20/41 - loss 0.33498070 - samples/sec: 13.08 - lr: 0.020000\n",
-      "2021-09-08 01:34:28,634 epoch 5 - iter 24/41 - loss 0.34546181 - samples/sec: 11.20 - lr: 0.020000\n",
-      "2021-09-08 01:34:28,868 epoch 5 - iter 28/41 - loss 0.46411750 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 01:34:29,123 epoch 5 - iter 32/41 - loss 0.51308273 - samples/sec: 15.73 - lr: 0.020000\n",
-      "2021-09-08 01:34:29,355 epoch 5 - iter 36/41 - loss 0.50721338 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 01:34:29,610 epoch 5 - iter 40/41 - loss 0.48396892 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 01:34:29,661 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:03:39,843 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:39,844 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:03:39,845 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:39,845 Parameters:\n",
+      "2021-09-21 21:03:39,846  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:03:39,846  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:03:39,847  - patience: \"3\"\n",
+      "2021-09-21 21:03:39,847  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:03:39,848  - max_epochs: \"10\"\n",
+      "2021-09-21 21:03:39,848  - shuffle: \"True\"\n",
+      "2021-09-21 21:03:39,849  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:03:39,849  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:03:39,850 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:39,850 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:03:39,851 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:34:29,661 EPOCH 5 done: loss 0.4856 - lr 0.0200000\n",
-      "2021-09-08 01:34:29,909 DEV : loss 0.3631609380245209 - score 0.25\n",
-      "2021-09-08 01:34:29,910 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:34:29,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:30,286 epoch 6 - iter 4/41 - loss 0.26192132 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 01:34:30,564 epoch 6 - iter 8/41 - loss 0.24332363 - samples/sec: 14.44 - lr: 0.020000\n",
-      "2021-09-08 01:34:30,862 epoch 6 - iter 12/41 - loss 0.25397058 - samples/sec: 13.47 - lr: 0.020000\n",
-      "2021-09-08 01:34:31,107 epoch 6 - iter 16/41 - loss 0.27194821 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 01:34:31,434 epoch 6 - iter 20/41 - loss 0.36535139 - samples/sec: 12.26 - lr: 0.020000\n",
-      "2021-09-08 01:34:31,716 epoch 6 - iter 24/41 - loss 0.37631324 - samples/sec: 14.21 - lr: 0.020000\n",
-      "2021-09-08 01:34:32,111 epoch 6 - iter 28/41 - loss 0.36011784 - samples/sec: 10.15 - lr: 0.020000\n",
-      "2021-09-08 01:34:32,442 epoch 6 - iter 32/41 - loss 0.41838006 - samples/sec: 12.13 - lr: 0.020000\n",
-      "2021-09-08 01:34:33,767 epoch 6 - iter 36/41 - loss 0.40997362 - samples/sec: 7.55 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,044 epoch 6 - iter 40/41 - loss 0.40067195 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 01:34:34,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,098 EPOCH 6 done: loss 0.3912 - lr 0.0200000\n",
-      "2021-09-08 01:34:34,414 DEV : loss 0.46637868881225586 - score 0.75\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:34:34,415 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:34:34,421 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:34,827 epoch 7 - iter 4/41 - loss 0.14956633 - samples/sec: 11.37 - lr: 0.010000\n",
-      "2021-09-08 01:34:35,122 epoch 7 - iter 8/41 - loss 0.13095206 - samples/sec: 13.58 - lr: 0.010000\n",
-      "2021-09-08 01:34:35,401 epoch 7 - iter 12/41 - loss 0.20871294 - samples/sec: 14.39 - lr: 0.010000\n",
-      "2021-09-08 01:34:35,714 epoch 7 - iter 16/41 - loss 0.18724425 - samples/sec: 12.84 - lr: 0.010000\n",
-      "2021-09-08 01:34:36,068 epoch 7 - iter 20/41 - loss 0.16435897 - samples/sec: 11.33 - lr: 0.010000\n",
-      "2021-09-08 01:34:36,348 epoch 7 - iter 24/41 - loss 0.13998969 - samples/sec: 14.30 - lr: 0.010000\n",
-      "2021-09-08 01:34:36,581 epoch 7 - iter 28/41 - loss 0.13666545 - samples/sec: 17.28 - lr: 0.010000\n",
-      "2021-09-08 01:34:36,900 epoch 7 - iter 32/41 - loss 0.17640005 - samples/sec: 12.55 - lr: 0.010000\n",
-      "2021-09-08 01:34:37,176 epoch 7 - iter 36/41 - loss 0.21959507 - samples/sec: 14.54 - lr: 0.010000\n",
-      "2021-09-08 01:34:37,397 epoch 7 - iter 40/41 - loss 0.23623307 - samples/sec: 18.14 - lr: 0.010000\n",
-      "2021-09-08 01:34:37,450 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:37,451 EPOCH 7 done: loss 0.2305 - lr 0.0100000\n",
-      "2021-09-08 01:34:37,884 DEV : loss 0.7288554310798645 - score 0.25\n",
-      "2021-09-08 01:34:37,885 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:34:37,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:38,250 epoch 8 - iter 4/41 - loss 0.24091679 - samples/sec: 15.05 - lr: 0.010000\n",
-      "2021-09-08 01:34:38,594 epoch 8 - iter 8/41 - loss 0.33011625 - samples/sec: 11.66 - lr: 0.010000\n",
-      "2021-09-08 01:34:38,946 epoch 8 - iter 12/41 - loss 0.31473440 - samples/sec: 11.40 - lr: 0.010000\n",
-      "2021-09-08 01:34:39,211 epoch 8 - iter 16/41 - loss 0.25888184 - samples/sec: 15.13 - lr: 0.010000\n",
-      "2021-09-08 01:34:39,521 epoch 8 - iter 20/41 - loss 0.22858509 - samples/sec: 12.91 - lr: 0.010000\n",
-      "2021-09-08 01:34:39,853 epoch 8 - iter 24/41 - loss 0.23413193 - samples/sec: 12.09 - lr: 0.010000\n",
-      "2021-09-08 01:34:40,063 epoch 8 - iter 28/41 - loss 0.21299819 - samples/sec: 19.13 - lr: 0.010000\n",
-      "2021-09-08 01:34:40,339 epoch 8 - iter 32/41 - loss 0.19325496 - samples/sec: 14.52 - lr: 0.010000\n",
-      "2021-09-08 01:34:40,574 epoch 8 - iter 36/41 - loss 0.19787395 - samples/sec: 17.04 - lr: 0.010000\n",
-      "2021-09-08 01:34:40,829 epoch 8 - iter 40/41 - loss 0.21344357 - samples/sec: 15.72 - lr: 0.010000\n",
-      "2021-09-08 01:34:40,888 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:40,888 EPOCH 8 done: loss 0.2083 - lr 0.0100000\n",
-      "2021-09-08 01:34:41,633 DEV : loss 0.7543817758560181 - score 0.25\n",
-      "2021-09-08 01:34:41,634 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:34:41,734 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:41,994 epoch 9 - iter 4/41 - loss 0.26702415 - samples/sec: 16.71 - lr: 0.010000\n",
-      "2021-09-08 01:34:42,305 epoch 9 - iter 8/41 - loss 0.13919417 - samples/sec: 12.88 - lr: 0.010000\n",
-      "2021-09-08 01:34:42,806 epoch 9 - iter 12/41 - loss 0.16421466 - samples/sec: 8.01 - lr: 0.010000\n",
-      "2021-09-08 01:34:43,089 epoch 9 - iter 16/41 - loss 0.24726628 - samples/sec: 14.17 - lr: 0.010000\n",
-      "2021-09-08 01:34:43,414 epoch 9 - iter 20/41 - loss 0.21791688 - samples/sec: 12.34 - lr: 0.010000\n",
-      "2021-09-08 01:34:43,665 epoch 9 - iter 24/41 - loss 0.22863579 - samples/sec: 15.97 - lr: 0.010000\n",
-      "2021-09-08 01:34:43,968 epoch 9 - iter 28/41 - loss 0.20142049 - samples/sec: 13.22 - lr: 0.010000\n",
-      "2021-09-08 01:34:44,297 epoch 9 - iter 32/41 - loss 0.21012251 - samples/sec: 12.19 - lr: 0.010000\n",
-      "2021-09-08 01:34:44,670 epoch 9 - iter 36/41 - loss 0.23147013 - samples/sec: 10.76 - lr: 0.010000\n",
-      "2021-09-08 01:34:44,944 epoch 9 - iter 40/41 - loss 0.21990814 - samples/sec: 14.63 - lr: 0.010000\n",
-      "2021-09-08 01:34:45,008 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:45,009 EPOCH 9 done: loss 0.2149 - lr 0.0100000\n",
-      "2021-09-08 01:34:45,132 DEV : loss 0.7398620247840881 - score 0.25\n",
-      "2021-09-08 01:34:45,133 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:34:45,140 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:45,501 epoch 10 - iter 4/41 - loss 0.18132747 - samples/sec: 11.63 - lr: 0.010000\n",
-      "2021-09-08 01:34:45,874 epoch 10 - iter 8/41 - loss 0.28005994 - samples/sec: 10.72 - lr: 0.010000\n",
-      "2021-09-08 01:34:46,174 epoch 10 - iter 12/41 - loss 0.25330960 - samples/sec: 13.40 - lr: 0.010000\n",
-      "2021-09-08 01:34:46,520 epoch 10 - iter 16/41 - loss 0.19536850 - samples/sec: 11.56 - lr: 0.010000\n",
-      "2021-09-08 01:34:46,799 epoch 10 - iter 20/41 - loss 0.17601124 - samples/sec: 14.40 - lr: 0.010000\n",
-      "2021-09-08 01:34:47,029 epoch 10 - iter 24/41 - loss 0.15365805 - samples/sec: 17.46 - lr: 0.010000\n",
-      "2021-09-08 01:34:47,291 epoch 10 - iter 28/41 - loss 0.13531479 - samples/sec: 15.30 - lr: 0.010000\n",
-      "2021-09-08 01:34:47,485 epoch 10 - iter 32/41 - loss 0.12228196 - samples/sec: 20.69 - lr: 0.010000\n",
-      "2021-09-08 01:34:47,741 epoch 10 - iter 36/41 - loss 0.11436948 - samples/sec: 15.69 - lr: 0.010000\n",
-      "2021-09-08 01:34:47,979 epoch 10 - iter 40/41 - loss 0.11297121 - samples/sec: 16.86 - lr: 0.010000\n",
-      "2021-09-08 01:34:48,042 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:34:48,043 EPOCH 10 done: loss 0.1102 - lr 0.0100000\n",
-      "2021-09-08 01:34:48,287 DEV : loss 0.5873263478279114 - score 0.5\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:34:48,288 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:35:00,105 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:00,106 Testing using best model ...\n",
-      "2021-09-08 01:35:00,107 loading file temp1/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:35:08,722 \t0.2\n",
-      "2021-09-08 01:35:08,723 \n",
-      "Results:\n",
-      "- F-score (micro) 0.2\n",
-      "- F-score (macro) 0.1\n",
-      "- Accuracy 0.2\n",
-      "\n",
-      "By class:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       awful     0.5000    0.5000    0.5000         2\n",
-      "         bad     0.0000    0.0000    0.0000         0\n",
-      "     neutral     0.0000    0.0000    0.0000         1\n",
-      "        good     0.0000    0.0000    0.0000         2\n",
-      "       great     0.0000    0.0000    0.0000         0\n",
-      "\n",
-      "   micro avg     0.2000    0.2000    0.2000         5\n",
-      "   macro avg     0.1000    0.1000    0.1000         5\n",
-      "weighted avg     0.2000    0.2000    0.2000         5\n",
-      " samples avg     0.2000    0.2000    0.2000         5\n",
       "\n"
      ]
     },
@@ -623,39 +429,214 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:08,723 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:50,164 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:35:55,039 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 18626.95it/s]"
+      "2021-09-21 21:03:39,851 Device: cuda:0\n",
+      "2021-09-21 21:03:39,852 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:39,852 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:03:40,093 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:40,431 epoch 1 - iter 4/41 - loss 0.36670058 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:03:40,733 epoch 1 - iter 8/41 - loss 0.24232501 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 21:03:41,016 epoch 1 - iter 12/41 - loss 0.71039608 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 21:03:41,307 epoch 1 - iter 16/41 - loss 0.70771621 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:03:41,746 epoch 1 - iter 20/41 - loss 0.66823174 - samples/sec: 9.13 - lr: 0.020000\n",
+      "2021-09-21 21:03:42,060 epoch 1 - iter 24/41 - loss 0.69165820 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 21:03:42,335 epoch 1 - iter 28/41 - loss 0.65948566 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:03:42,595 epoch 1 - iter 32/41 - loss 0.64730256 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 21:03:42,846 epoch 1 - iter 36/41 - loss 0.68589366 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 21:03:43,111 epoch 1 - iter 40/41 - loss 0.68768595 - samples/sec: 15.14 - lr: 0.020000\n",
+      "2021-09-21 21:03:43,184 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:43,184 EPOCH 1 done: loss 0.6851 - lr 0.0200000\n",
+      "2021-09-21 21:03:45,386 DEV : loss 0.6035637855529785 - score 0.25\n",
+      "2021-09-21 21:03:45,387 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:03:50,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:50,459 epoch 2 - iter 4/41 - loss 0.80960231 - samples/sec: 9.44 - lr: 0.020000\n",
+      "2021-09-21 21:03:50,768 epoch 2 - iter 8/41 - loss 0.67260289 - samples/sec: 12.97 - lr: 0.020000\n",
+      "2021-09-21 21:03:51,077 epoch 2 - iter 12/41 - loss 0.66209657 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 21:03:51,329 epoch 2 - iter 16/41 - loss 0.60196319 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 21:03:51,552 epoch 2 - iter 20/41 - loss 0.58586655 - samples/sec: 17.99 - lr: 0.020000\n",
+      "2021-09-21 21:03:51,880 epoch 2 - iter 24/41 - loss 0.61679915 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 21:03:52,135 epoch 2 - iter 28/41 - loss 0.59184957 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 21:03:52,389 epoch 2 - iter 32/41 - loss 0.61297165 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:03:52,641 epoch 2 - iter 36/41 - loss 0.60894496 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 21:03:52,996 epoch 2 - iter 40/41 - loss 0.61390886 - samples/sec: 11.30 - lr: 0.020000\n",
+      "2021-09-21 21:03:53,063 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:03:53,064 EPOCH 2 done: loss 0.6234 - lr 0.0200000\n",
+      "2021-09-21 21:03:53,294 DEV : loss 0.34430742263793945 - score 0.75\n",
+      "2021-09-21 21:03:53,295 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:04:02,730 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:03,253 epoch 3 - iter 4/41 - loss 0.53443232 - samples/sec: 8.19 - lr: 0.020000\n",
+      "2021-09-21 21:04:03,649 epoch 3 - iter 8/41 - loss 0.48206240 - samples/sec: 10.13 - lr: 0.020000\n",
+      "2021-09-21 21:04:03,990 epoch 3 - iter 12/41 - loss 0.49703345 - samples/sec: 11.76 - lr: 0.020000\n",
+      "2021-09-21 21:04:04,653 epoch 3 - iter 16/41 - loss 0.44734715 - samples/sec: 6.04 - lr: 0.020000\n",
+      "2021-09-21 21:04:05,037 epoch 3 - iter 20/41 - loss 0.46816917 - samples/sec: 10.44 - lr: 0.020000\n",
+      "2021-09-21 21:04:05,374 epoch 3 - iter 24/41 - loss 0.47786404 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 21:04:05,706 epoch 3 - iter 28/41 - loss 0.52325743 - samples/sec: 12.09 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,164 epoch 3 - iter 32/41 - loss 0.50046178 - samples/sec: 8.75 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,491 epoch 3 - iter 36/41 - loss 0.49410166 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 21:04:06,914 epoch 3 - iter 40/41 - loss 0.49084635 - samples/sec: 9.48 - lr: 0.020000\n",
+      "2021-09-21 21:04:07,115 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:07,116 EPOCH 3 done: loss 0.5016 - lr 0.0200000\n",
+      "2021-09-21 21:04:07,322 DEV : loss 0.37803584337234497 - score 0.75\n",
+      "2021-09-21 21:04:07,325 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:04:07,329 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:07,839 epoch 4 - iter 4/41 - loss 0.64787788 - samples/sec: 8.47 - lr: 0.020000\n",
+      "2021-09-21 21:04:08,179 epoch 4 - iter 8/41 - loss 0.54169642 - samples/sec: 11.77 - lr: 0.020000\n",
+      "2021-09-21 21:04:08,577 epoch 4 - iter 12/41 - loss 0.52939147 - samples/sec: 10.07 - lr: 0.020000\n",
+      "2021-09-21 21:04:09,088 epoch 4 - iter 16/41 - loss 0.54216731 - samples/sec: 7.84 - lr: 0.020000\n",
+      "2021-09-21 21:04:09,560 epoch 4 - iter 20/41 - loss 0.52235061 - samples/sec: 8.49 - lr: 0.020000\n",
+      "2021-09-21 21:04:10,047 epoch 4 - iter 24/41 - loss 0.46651164 - samples/sec: 8.22 - lr: 0.020000\n",
+      "2021-09-21 21:04:10,326 epoch 4 - iter 28/41 - loss 0.46045004 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 21:04:10,689 epoch 4 - iter 32/41 - loss 0.42542277 - samples/sec: 11.07 - lr: 0.020000\n",
+      "2021-09-21 21:04:10,977 epoch 4 - iter 36/41 - loss 0.40475567 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 21:04:11,242 epoch 4 - iter 40/41 - loss 0.40264813 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 21:04:11,311 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:11,312 EPOCH 4 done: loss 0.3949 - lr 0.0200000\n",
+      "2021-09-21 21:04:11,546 DEV : loss 0.38551753759384155 - score 0.5\n",
+      "2021-09-21 21:04:11,547 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:04:11,626 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:11,903 epoch 5 - iter 4/41 - loss 0.39361719 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 21:04:12,159 epoch 5 - iter 8/41 - loss 0.29380825 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:04:12,467 epoch 5 - iter 12/41 - loss 0.34866090 - samples/sec: 13.03 - lr: 0.020000\n",
+      "2021-09-21 21:04:12,727 epoch 5 - iter 16/41 - loss 0.26526237 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 21:04:12,978 epoch 5 - iter 20/41 - loss 0.31222834 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 21:04:13,337 epoch 5 - iter 24/41 - loss 0.29650462 - samples/sec: 11.18 - lr: 0.020000\n",
+      "2021-09-21 21:04:13,648 epoch 5 - iter 28/41 - loss 0.34310051 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 21:04:13,973 epoch 5 - iter 32/41 - loss 0.34337143 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:04:14,279 epoch 5 - iter 36/41 - loss 0.35491843 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 21:04:14,717 epoch 5 - iter 40/41 - loss 0.33211038 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 21:04:14,795 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:14,796 EPOCH 5 done: loss 0.3390 - lr 0.0200000\n",
+      "2021-09-21 21:04:14,966 DEV : loss 0.3338642120361328 - score 0.75\n",
+      "2021-09-21 21:04:14,968 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:04:28,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:28,351 epoch 6 - iter 4/41 - loss 0.49381320 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 21:04:28,639 epoch 6 - iter 8/41 - loss 0.52972163 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 21:04:29,298 epoch 6 - iter 12/41 - loss 0.44846931 - samples/sec: 6.08 - lr: 0.020000\n",
+      "2021-09-21 21:04:29,674 epoch 6 - iter 16/41 - loss 0.47432321 - samples/sec: 10.68 - lr: 0.020000\n",
+      "2021-09-21 21:04:29,934 epoch 6 - iter 20/41 - loss 0.46601052 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 21:04:30,240 epoch 6 - iter 24/41 - loss 0.42339037 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:04:30,478 epoch 6 - iter 28/41 - loss 0.41452712 - samples/sec: 16.85 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:55,043 [b'awful', b'bad', b'neutral', b'good', b'great']\n"
+      "2021-09-21 21:04:30,878 epoch 6 - iter 32/41 - loss 0.43609254 - samples/sec: 10.01 - lr: 0.020000\n",
+      "2021-09-21 21:04:31,266 epoch 6 - iter 36/41 - loss 0.42052369 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 21:04:31,595 epoch 6 - iter 40/41 - loss 0.42621260 - samples/sec: 12.18 - lr: 0.020000\n",
+      "2021-09-21 21:04:31,671 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:31,671 EPOCH 6 done: loss 0.4160 - lr 0.0200000\n",
+      "2021-09-21 21:04:32,287 DEV : loss 0.26112204790115356 - score 0.5\n",
+      "2021-09-21 21:04:32,288 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:04:32,290 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:32,794 epoch 7 - iter 4/41 - loss 0.41554037 - samples/sec: 8.55 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,155 epoch 7 - iter 8/41 - loss 0.31564181 - samples/sec: 11.10 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,615 epoch 7 - iter 12/41 - loss 0.45634037 - samples/sec: 8.71 - lr: 0.020000\n",
+      "2021-09-21 21:04:33,979 epoch 7 - iter 16/41 - loss 0.38314413 - samples/sec: 11.01 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,412 epoch 7 - iter 20/41 - loss 0.38155562 - samples/sec: 9.26 - lr: 0.020000\n",
+      "2021-09-21 21:04:34,710 epoch 7 - iter 24/41 - loss 0.41134851 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,263 epoch 7 - iter 28/41 - loss 0.36963496 - samples/sec: 7.24 - lr: 0.020000\n",
+      "2021-09-21 21:04:35,651 epoch 7 - iter 32/41 - loss 0.34285011 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,038 epoch 7 - iter 36/41 - loss 0.31548193 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,402 epoch 7 - iter 40/41 - loss 0.36973403 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 21:04:36,461 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:36,461 EPOCH 7 done: loss 0.3719 - lr 0.0200000\n",
+      "2021-09-21 21:04:36,699 DEV : loss 0.3108387589454651 - score 0.5\n",
+      "2021-09-21 21:04:36,701 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:04:36,707 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:37,105 epoch 8 - iter 4/41 - loss 0.50521514 - samples/sec: 10.51 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,327 epoch 8 - iter 8/41 - loss 0.41947115 - samples/sec: 18.06 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,683 epoch 8 - iter 12/41 - loss 0.34705241 - samples/sec: 11.26 - lr: 0.020000\n",
+      "2021-09-21 21:04:37,977 epoch 8 - iter 16/41 - loss 0.30070886 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 21:04:38,260 epoch 8 - iter 20/41 - loss 0.29066928 - samples/sec: 14.15 - lr: 0.020000\n",
+      "2021-09-21 21:04:38,455 epoch 8 - iter 24/41 - loss 0.31028592 - samples/sec: 20.62 - lr: 0.020000\n",
+      "2021-09-21 21:04:38,650 epoch 8 - iter 28/41 - loss 0.30010927 - samples/sec: 20.54 - lr: 0.020000\n",
+      "2021-09-21 21:04:38,835 epoch 8 - iter 32/41 - loss 0.30015879 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,198 epoch 8 - iter 36/41 - loss 0.26888615 - samples/sec: 11.06 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,541 epoch 8 - iter 40/41 - loss 0.27890033 - samples/sec: 11.69 - lr: 0.020000\n",
+      "2021-09-21 21:04:39,622 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:39,622 EPOCH 8 done: loss 0.2987 - lr 0.0200000\n",
+      "2021-09-21 21:04:39,836 DEV : loss 0.7036451697349548 - score 0.5\n",
+      "2021-09-21 21:04:39,836 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:04:39,927 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:40,259 epoch 9 - iter 4/41 - loss 0.20577079 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 21:04:40,450 epoch 9 - iter 8/41 - loss 0.37729619 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 21:04:40,776 epoch 9 - iter 12/41 - loss 0.26957555 - samples/sec: 12.31 - lr: 0.020000\n",
+      "2021-09-21 21:04:40,964 epoch 9 - iter 16/41 - loss 0.30153649 - samples/sec: 21.38 - lr: 0.020000\n",
+      "2021-09-21 21:04:41,152 epoch 9 - iter 20/41 - loss 0.31731258 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 21:04:41,383 epoch 9 - iter 24/41 - loss 0.27788385 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 21:04:41,664 epoch 9 - iter 28/41 - loss 0.29918061 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:04:42,018 epoch 9 - iter 32/41 - loss 0.26763898 - samples/sec: 11.32 - lr: 0.020000\n",
+      "2021-09-21 21:04:42,324 epoch 9 - iter 36/41 - loss 0.25900356 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 21:04:42,631 epoch 9 - iter 40/41 - loss 0.26398839 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 21:04:42,697 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:42,697 EPOCH 9 done: loss 0.2666 - lr 0.0200000\n",
+      "2021-09-21 21:04:42,916 DEV : loss 0.383323609828949 - score 0.5\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:04:42,916 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:04:42,993 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:43,307 epoch 10 - iter 4/41 - loss 0.13108416 - samples/sec: 13.54 - lr: 0.010000\n",
+      "2021-09-21 21:04:43,552 epoch 10 - iter 8/41 - loss 0.24995728 - samples/sec: 16.38 - lr: 0.010000\n",
+      "2021-09-21 21:04:43,789 epoch 10 - iter 12/41 - loss 0.20898543 - samples/sec: 16.92 - lr: 0.010000\n",
+      "2021-09-21 21:04:44,077 epoch 10 - iter 16/41 - loss 0.15953902 - samples/sec: 13.94 - lr: 0.010000\n",
+      "2021-09-21 21:04:44,356 epoch 10 - iter 20/41 - loss 0.15121789 - samples/sec: 14.35 - lr: 0.010000\n",
+      "2021-09-21 21:04:44,801 epoch 10 - iter 24/41 - loss 0.22993449 - samples/sec: 9.00 - lr: 0.010000\n",
+      "2021-09-21 21:04:45,045 epoch 10 - iter 28/41 - loss 0.19934649 - samples/sec: 16.44 - lr: 0.010000\n",
+      "2021-09-21 21:04:50,706 epoch 10 - iter 32/41 - loss 0.17530545 - samples/sec: 9.95 - lr: 0.010000\n",
+      "2021-09-21 21:04:51,027 epoch 10 - iter 36/41 - loss 0.16379697 - samples/sec: 12.50 - lr: 0.010000\n",
+      "2021-09-21 21:04:51,447 epoch 10 - iter 40/41 - loss 0.20364010 - samples/sec: 9.54 - lr: 0.010000\n",
+      "2021-09-21 21:04:51,521 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:04:51,522 EPOCH 10 done: loss 0.1991 - lr 0.0100000\n",
+      "2021-09-21 21:04:51,752 DEV : loss 0.4923034906387329 - score 0.5\n",
+      "2021-09-21 21:04:51,753 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:05:02,185 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:02,186 Testing using best model ...\n",
+      "2021-09-21 21:05:02,188 loading file temp1/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:05:07,899 \t0.4\n",
+      "2021-09-21 21:05:07,900 \n",
+      "Results:\n",
+      "- F-score (micro) 0.4\n",
+      "- F-score (macro) 0.4\n",
+      "- Accuracy 0.4\n",
+      "\n",
+      "By class:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       awful     1.0000    1.0000    1.0000         1\n",
+      "         bad     1.0000    1.0000    1.0000         1\n",
+      "     neutral     0.0000    0.0000    0.0000         0\n",
+      "        good     0.0000    0.0000    0.0000         3\n",
+      "       great     0.0000    0.0000    0.0000         0\n",
+      "\n",
+      "   micro avg     0.4000    0.4000    0.4000         5\n",
+      "   macro avg     0.4000    0.4000    0.4000         5\n",
+      "weighted avg     0.4000    0.4000    0.4000         5\n",
+      " samples avg     0.4000    0.4000    0.4000         5\n",
+      "\n",
+      "2021-09-21 21:05:07,900 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:05:56,657 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:06:01,336 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 46/46 [00:00<00:00, 15840.56it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:55,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,531 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:06:01,341 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 21:06:01,465 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:01,467 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -968,208 +949,30 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:35:55,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,532 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:35:55,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,532 Parameters:\n",
-      "2021-09-08 01:35:55,533  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:35:55,533  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:35:55,533  - patience: \"3\"\n",
-      "2021-09-08 01:35:55,534  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:35:55,534  - max_epochs: \"10\"\n",
-      "2021-09-08 01:35:55,534  - shuffle: \"True\"\n",
-      "2021-09-08 01:35:55,534  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:35:55,535  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:35:55,535 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,535 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:35:55,536 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,536 Device: cuda:0\n",
-      "2021-09-08 01:35:55,536 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,536 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:35:55,543 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:55,840 epoch 1 - iter 4/41 - loss 0.48587575 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,148 epoch 1 - iter 8/41 - loss 0.72700812 - samples/sec: 13.05 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,443 epoch 1 - iter 12/41 - loss 0.68136527 - samples/sec: 13.56 - lr: 0.020000\n",
-      "2021-09-08 01:35:56,924 epoch 1 - iter 16/41 - loss 0.71534038 - samples/sec: 8.33 - lr: 0.020000\n",
-      "2021-09-08 01:35:57,177 epoch 1 - iter 20/41 - loss 0.68702886 - samples/sec: 15.84 - lr: 0.020000\n",
-      "2021-09-08 01:35:57,480 epoch 1 - iter 24/41 - loss 0.68353230 - samples/sec: 13.24 - lr: 0.020000\n",
-      "2021-09-08 01:35:57,744 epoch 1 - iter 28/41 - loss 0.70767374 - samples/sec: 15.18 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,064 epoch 1 - iter 32/41 - loss 0.64429984 - samples/sec: 12.53 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,335 epoch 1 - iter 36/41 - loss 0.67793818 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,631 epoch 1 - iter 40/41 - loss 0.62867808 - samples/sec: 13.55 - lr: 0.020000\n",
-      "2021-09-08 01:35:58,722 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:35:58,722 EPOCH 1 done: loss 0.6198 - lr 0.0200000\n",
-      "2021-09-08 01:35:59,048 DEV : loss 0.6919914484024048 - score 0.25\n",
-      "2021-09-08 01:35:59,049 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:36:02,875 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:03,302 epoch 2 - iter 4/41 - loss 1.69003725 - samples/sec: 10.83 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,615 epoch 2 - iter 8/41 - loss 1.23956323 - samples/sec: 12.82 - lr: 0.020000\n",
-      "2021-09-08 01:36:03,918 epoch 2 - iter 12/41 - loss 1.05758271 - samples/sec: 13.20 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,276 epoch 2 - iter 16/41 - loss 0.95929985 - samples/sec: 11.21 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,488 epoch 2 - iter 20/41 - loss 0.90487020 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 01:36:04,740 epoch 2 - iter 24/41 - loss 0.84255917 - samples/sec: 15.94 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,005 epoch 2 - iter 28/41 - loss 0.84050115 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,218 epoch 2 - iter 32/41 - loss 0.81760945 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,554 epoch 2 - iter 36/41 - loss 0.77906963 - samples/sec: 11.93 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,789 epoch 2 - iter 40/41 - loss 0.74439067 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 01:36:05,872 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:05,873 EPOCH 2 done: loss 0.7315 - lr 0.0200000\n",
-      "2021-09-08 01:36:06,297 DEV : loss 0.33316075801849365 - score 0.5\n",
-      "2021-09-08 01:36:06,298 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:36:12,530 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:12,820 epoch 3 - iter 4/41 - loss 0.51977962 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 01:36:13,070 epoch 3 - iter 8/41 - loss 0.69679976 - samples/sec: 16.05 - lr: 0.020000\n",
-      "2021-09-08 01:36:13,327 epoch 3 - iter 12/41 - loss 0.54317109 - samples/sec: 15.62 - lr: 0.020000\n",
-      "2021-09-08 01:36:13,678 epoch 3 - iter 16/41 - loss 0.51312261 - samples/sec: 11.40 - lr: 0.020000\n",
-      "2021-09-08 01:36:13,889 epoch 3 - iter 20/41 - loss 0.54032200 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 01:36:14,207 epoch 3 - iter 24/41 - loss 0.54480676 - samples/sec: 12.59 - lr: 0.020000\n",
-      "2021-09-08 01:36:14,578 epoch 3 - iter 28/41 - loss 0.52088477 - samples/sec: 10.82 - lr: 0.020000\n",
-      "2021-09-08 01:36:14,802 epoch 3 - iter 32/41 - loss 0.47928603 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 01:36:15,074 epoch 3 - iter 36/41 - loss 0.52361609 - samples/sec: 14.76 - lr: 0.020000\n",
-      "2021-09-08 01:36:15,322 epoch 3 - iter 40/41 - loss 0.52935353 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 01:36:15,410 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:15,411 EPOCH 3 done: loss 0.5314 - lr 0.0200000\n",
-      "2021-09-08 01:36:15,888 DEV : loss 0.5060239434242249 - score 0.5\n",
-      "2021-09-08 01:36:15,890 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:36:16,183 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:16,553 epoch 4 - iter 4/41 - loss 0.40424558 - samples/sec: 11.27 - lr: 0.020000\n",
-      "2021-09-08 01:36:16,786 epoch 4 - iter 8/41 - loss 0.48760065 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 01:36:17,040 epoch 4 - iter 12/41 - loss 0.35430659 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 01:36:17,288 epoch 4 - iter 16/41 - loss 0.28590289 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 01:36:17,567 epoch 4 - iter 20/41 - loss 0.39921267 - samples/sec: 14.40 - lr: 0.020000\n",
-      "2021-09-08 01:36:17,869 epoch 4 - iter 24/41 - loss 0.46794793 - samples/sec: 13.27 - lr: 0.020000\n",
-      "2021-09-08 01:36:18,092 epoch 4 - iter 28/41 - loss 0.43368870 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 01:36:18,388 epoch 4 - iter 32/41 - loss 0.44337887 - samples/sec: 13.58 - lr: 0.020000\n",
-      "2021-09-08 01:36:18,733 epoch 4 - iter 36/41 - loss 0.43477382 - samples/sec: 11.59 - lr: 0.020000\n",
-      "2021-09-08 01:36:18,926 epoch 4 - iter 40/41 - loss 0.44505870 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 01:36:19,011 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:19,011 EPOCH 4 done: loss 0.4772 - lr 0.0200000\n",
-      "2021-09-08 01:36:19,300 DEV : loss 0.5447602272033691 - score 0.5\n",
-      "2021-09-08 01:36:19,301 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:36:19,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:19,531 epoch 5 - iter 4/41 - loss 0.11409357 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:36:19,786 epoch 5 - iter 8/41 - loss 0.30806662 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 01:36:19,984 epoch 5 - iter 12/41 - loss 0.32497599 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 01:36:20,256 epoch 5 - iter 16/41 - loss 0.30664451 - samples/sec: 14.73 - lr: 0.020000\n",
-      "2021-09-08 01:36:20,484 epoch 5 - iter 20/41 - loss 0.29755341 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:36:20,802 epoch 5 - iter 24/41 - loss 0.25238050 - samples/sec: 12.61 - lr: 0.020000\n",
-      "2021-09-08 01:36:21,120 epoch 5 - iter 28/41 - loss 0.35210428 - samples/sec: 12.62 - lr: 0.020000\n",
-      "2021-09-08 01:36:21,382 epoch 5 - iter 32/41 - loss 0.42480183 - samples/sec: 15.26 - lr: 0.020000\n",
-      "2021-09-08 01:36:21,675 epoch 5 - iter 36/41 - loss 0.41557097 - samples/sec: 13.68 - lr: 0.020000\n",
-      "2021-09-08 01:36:21,935 epoch 5 - iter 40/41 - loss 0.42628608 - samples/sec: 15.45 - lr: 0.020000\n",
-      "2021-09-08 01:36:22,008 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:06:01,468 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:01,468 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:06:01,469 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:01,469 Parameters:\n",
+      "2021-09-21 21:06:01,469  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:06:01,470  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:06:01,470  - patience: \"3\"\n",
+      "2021-09-21 21:06:01,470  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:06:01,471  - max_epochs: \"10\"\n",
+      "2021-09-21 21:06:01,471  - shuffle: \"True\"\n",
+      "2021-09-21 21:06:01,471  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:06:01,471  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:06:01,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:01,472 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:06:01,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:01,473 Device: cuda:0\n",
+      "2021-09-21 21:06:01,473 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:01,473 Embeddings storage mode: cpu\n"
      ]
     },
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:36:22,008 EPOCH 5 done: loss 0.4160 - lr 0.0200000\n",
-      "2021-09-08 01:36:22,171 DEV : loss 0.7200701236724854 - score 0.25\n",
-      "2021-09-08 01:36:22,172 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:36:22,174 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:22,479 epoch 6 - iter 4/41 - loss 0.23607156 - samples/sec: 13.95 - lr: 0.020000\n",
-      "2021-09-08 01:36:22,772 epoch 6 - iter 8/41 - loss 0.37100006 - samples/sec: 13.69 - lr: 0.020000\n",
-      "2021-09-08 01:36:23,134 epoch 6 - iter 12/41 - loss 0.44604606 - samples/sec: 11.07 - lr: 0.020000\n",
-      "2021-09-08 01:36:23,359 epoch 6 - iter 16/41 - loss 0.39725770 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 01:36:23,579 epoch 6 - iter 20/41 - loss 0.38491764 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 01:36:23,825 epoch 6 - iter 24/41 - loss 0.36861933 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 01:36:24,124 epoch 6 - iter 28/41 - loss 0.36424127 - samples/sec: 13.40 - lr: 0.020000\n",
-      "2021-09-08 01:36:24,463 epoch 6 - iter 32/41 - loss 0.34007125 - samples/sec: 11.82 - lr: 0.020000\n",
-      "2021-09-08 01:36:24,743 epoch 6 - iter 36/41 - loss 0.35652706 - samples/sec: 14.33 - lr: 0.020000\n",
-      "2021-09-08 01:36:25,050 epoch 6 - iter 40/41 - loss 0.38652624 - samples/sec: 13.08 - lr: 0.020000\n",
-      "2021-09-08 01:36:25,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:25,113 EPOCH 6 done: loss 0.3842 - lr 0.0200000\n",
-      "2021-09-08 01:36:25,302 DEV : loss 0.4239726960659027 - score 0.75\n",
-      "2021-09-08 01:36:25,303 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:36:30,350 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:30,656 epoch 7 - iter 4/41 - loss 0.58085623 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 01:36:30,951 epoch 7 - iter 8/41 - loss 0.44087113 - samples/sec: 13.57 - lr: 0.020000\n",
-      "2021-09-08 01:36:31,174 epoch 7 - iter 12/41 - loss 0.34681610 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 01:36:31,524 epoch 7 - iter 16/41 - loss 0.32589697 - samples/sec: 11.42 - lr: 0.020000\n",
-      "2021-09-08 01:36:31,794 epoch 7 - iter 20/41 - loss 0.27872642 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 01:36:32,074 epoch 7 - iter 24/41 - loss 0.30255241 - samples/sec: 14.30 - lr: 0.020000\n",
-      "2021-09-08 01:36:32,345 epoch 7 - iter 28/41 - loss 0.33690748 - samples/sec: 14.77 - lr: 0.020000\n",
-      "2021-09-08 01:36:32,632 epoch 7 - iter 32/41 - loss 0.35324154 - samples/sec: 13.98 - lr: 0.020000\n",
-      "2021-09-08 01:36:32,906 epoch 7 - iter 36/41 - loss 0.36556604 - samples/sec: 14.63 - lr: 0.020000\n",
-      "2021-09-08 01:36:33,169 epoch 7 - iter 40/41 - loss 0.37360643 - samples/sec: 15.27 - lr: 0.020000\n",
-      "2021-09-08 01:36:33,265 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:33,266 EPOCH 7 done: loss 0.3652 - lr 0.0200000\n",
-      "2021-09-08 01:36:33,526 DEV : loss 0.6230975985527039 - score 0.75\n",
-      "2021-09-08 01:36:33,527 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:36:33,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:33,774 epoch 8 - iter 4/41 - loss 0.74657855 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 01:36:34,052 epoch 8 - iter 8/41 - loss 0.38847134 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 01:36:34,334 epoch 8 - iter 12/41 - loss 0.30284249 - samples/sec: 14.21 - lr: 0.020000\n",
-      "2021-09-08 01:36:34,616 epoch 8 - iter 16/41 - loss 0.30541185 - samples/sec: 14.19 - lr: 0.020000\n",
-      "2021-09-08 01:36:34,840 epoch 8 - iter 20/41 - loss 0.25960962 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 01:36:35,076 epoch 8 - iter 24/41 - loss 0.27511596 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 01:36:35,446 epoch 8 - iter 28/41 - loss 0.26833223 - samples/sec: 10.84 - lr: 0.020000\n",
-      "2021-09-08 01:36:35,649 epoch 8 - iter 32/41 - loss 0.30100857 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 01:36:35,962 epoch 8 - iter 36/41 - loss 0.33929173 - samples/sec: 12.79 - lr: 0.020000\n",
-      "2021-09-08 01:36:36,196 epoch 8 - iter 40/41 - loss 0.34343327 - samples/sec: 17.17 - lr: 0.020000\n",
-      "2021-09-08 01:36:36,289 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:36,290 EPOCH 8 done: loss 0.3385 - lr 0.0200000\n",
-      "2021-09-08 01:36:36,476 DEV : loss 0.4191916584968567 - score 0.5\n",
-      "2021-09-08 01:36:36,477 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:36:36,479 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:36,751 epoch 9 - iter 4/41 - loss 0.14047923 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 01:36:37,106 epoch 9 - iter 8/41 - loss 0.15599530 - samples/sec: 11.30 - lr: 0.020000\n",
-      "2021-09-08 01:36:37,319 epoch 9 - iter 12/41 - loss 0.26742530 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 01:36:37,566 epoch 9 - iter 16/41 - loss 0.23392034 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 01:36:37,814 epoch 9 - iter 20/41 - loss 0.29139295 - samples/sec: 16.16 - lr: 0.020000\n",
-      "2021-09-08 01:36:38,133 epoch 9 - iter 24/41 - loss 0.24449149 - samples/sec: 12.58 - lr: 0.020000\n",
-      "2021-09-08 01:36:38,325 epoch 9 - iter 28/41 - loss 0.22836301 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 01:36:38,668 epoch 9 - iter 32/41 - loss 0.24012620 - samples/sec: 11.70 - lr: 0.020000\n",
-      "2021-09-08 01:36:38,975 epoch 9 - iter 36/41 - loss 0.21696121 - samples/sec: 13.06 - lr: 0.020000\n",
-      "2021-09-08 01:36:39,250 epoch 9 - iter 40/41 - loss 0.23661550 - samples/sec: 14.58 - lr: 0.020000\n",
-      "2021-09-08 01:36:39,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:39,319 EPOCH 9 done: loss 0.2516 - lr 0.0200000\n",
-      "2021-09-08 01:36:39,550 DEV : loss 0.6333110928535461 - score 0.5\n",
-      "2021-09-08 01:36:39,551 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:36:39,553 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:40,051 epoch 10 - iter 4/41 - loss 0.48314793 - samples/sec: 8.29 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,328 epoch 10 - iter 8/41 - loss 0.26097990 - samples/sec: 14.51 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,623 epoch 10 - iter 12/41 - loss 0.25448797 - samples/sec: 13.59 - lr: 0.020000\n",
-      "2021-09-08 01:36:40,888 epoch 10 - iter 16/41 - loss 0.21624266 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,134 epoch 10 - iter 20/41 - loss 0.17676413 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,330 epoch 10 - iter 24/41 - loss 0.29797471 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,654 epoch 10 - iter 28/41 - loss 0.37153560 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 01:36:41,900 epoch 10 - iter 32/41 - loss 0.36154487 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:36:42,161 epoch 10 - iter 36/41 - loss 0.32256200 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 01:36:42,442 epoch 10 - iter 40/41 - loss 0.29431779 - samples/sec: 14.28 - lr: 0.020000\n",
-      "2021-09-08 01:36:42,537 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:42,538 EPOCH 10 done: loss 0.2961 - lr 0.0200000\n",
-      "2021-09-08 01:36:42,801 DEV : loss 0.5914957523345947 - score 0.5\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:36:42,802 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:36:49,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:36:49,688 Testing using best model ...\n",
-      "2021-09-08 01:36:49,717 loading file temp1/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:37:04,798 \t0.0\n",
-      "2021-09-08 01:37:04,798 \n",
-      "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
-      "\n",
-      "By class:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       awful     0.0000    0.0000    0.0000         4\n",
-      "         bad     0.0000    0.0000    0.0000         0\n",
-      "     neutral     0.0000    0.0000    0.0000         0\n",
-      "        good     0.0000    0.0000    0.0000         1\n",
-      "       great     0.0000    0.0000    0.0000         0\n",
-      "\n",
-      "   micro avg     0.0000    0.0000    0.0000         5\n",
-      "   macro avg     0.0000    0.0000    0.0000         5\n",
-      "weighted avg     0.0000    0.0000    0.0000         5\n",
-      " samples avg     0.0000    0.0000    0.0000         5\n",
       "\n"
      ]
     },
@@ -1177,39 +980,211 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:04,799 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:50,073 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 01:37:55,660 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 15081.53it/s]"
+      "2021-09-21 21:06:01,648 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:02,079 epoch 1 - iter 4/41 - loss 0.58866844 - samples/sec: 9.71 - lr: 0.020000\n",
+      "2021-09-21 21:06:02,336 epoch 1 - iter 8/41 - loss 0.33522321 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:06:02,593 epoch 1 - iter 12/41 - loss 0.53351218 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:06:02,864 epoch 1 - iter 16/41 - loss 0.55998640 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 21:06:03,165 epoch 1 - iter 20/41 - loss 0.73707074 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 21:06:03,420 epoch 1 - iter 24/41 - loss 0.82719192 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 21:06:03,729 epoch 1 - iter 28/41 - loss 0.75763498 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:06:03,980 epoch 1 - iter 32/41 - loss 0.69518372 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 21:06:04,227 epoch 1 - iter 36/41 - loss 0.73300427 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:06:04,657 epoch 1 - iter 40/41 - loss 0.71510175 - samples/sec: 9.33 - lr: 0.020000\n",
+      "2021-09-21 21:06:04,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:04,734 EPOCH 1 done: loss 0.7051 - lr 0.0200000\n",
+      "2021-09-21 21:06:04,919 DEV : loss 0.36735039949417114 - score 0.5\n",
+      "2021-09-21 21:06:04,919 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:06:17,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:17,695 epoch 2 - iter 4/41 - loss 0.47426384 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,045 epoch 2 - iter 8/41 - loss 0.61375842 - samples/sec: 11.44 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,485 epoch 2 - iter 12/41 - loss 0.53797089 - samples/sec: 9.10 - lr: 0.020000\n",
+      "2021-09-21 21:06:18,801 epoch 2 - iter 16/41 - loss 0.49906342 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 21:06:19,080 epoch 2 - iter 20/41 - loss 0.48680882 - samples/sec: 14.35 - lr: 0.020000\n",
+      "2021-09-21 21:06:19,313 epoch 2 - iter 24/41 - loss 0.52503106 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:06:19,561 epoch 2 - iter 28/41 - loss 0.63136252 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 21:06:19,835 epoch 2 - iter 32/41 - loss 0.61230202 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:06:20,171 epoch 2 - iter 36/41 - loss 0.61262422 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 21:06:20,379 epoch 2 - iter 40/41 - loss 0.58289534 - samples/sec: 19.29 - lr: 0.020000\n",
+      "2021-09-21 21:06:20,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:20,450 EPOCH 2 done: loss 0.5886 - lr 0.0200000\n",
+      "2021-09-21 21:06:20,700 DEV : loss 0.47268301248550415 - score 0.75\n",
+      "2021-09-21 21:06:20,701 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:06:37,728 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:38,039 epoch 3 - iter 4/41 - loss 0.52561438 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,430 epoch 3 - iter 8/41 - loss 0.45919477 - samples/sec: 10.23 - lr: 0.020000\n",
+      "2021-09-21 21:06:38,727 epoch 3 - iter 12/41 - loss 0.51175151 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,013 epoch 3 - iter 16/41 - loss 0.53722316 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,437 epoch 3 - iter 20/41 - loss 0.48412133 - samples/sec: 9.44 - lr: 0.020000\n",
+      "2021-09-21 21:06:39,678 epoch 3 - iter 24/41 - loss 0.47818822 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,042 epoch 3 - iter 28/41 - loss 0.51022025 - samples/sec: 11.01 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,529 epoch 3 - iter 32/41 - loss 0.48613334 - samples/sec: 8.23 - lr: 0.020000\n",
+      "2021-09-21 21:06:40,806 epoch 3 - iter 36/41 - loss 0.48336569 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 21:06:41,141 epoch 3 - iter 40/41 - loss 0.47109029 - samples/sec: 11.96 - lr: 0.020000\n",
+      "2021-09-21 21:06:41,210 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:41,211 EPOCH 3 done: loss 0.4653 - lr 0.0200000\n",
+      "2021-09-21 21:06:43,546 DEV : loss 0.6174579858779907 - score 0.5\n",
+      "2021-09-21 21:06:43,547 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:06:43,561 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:43,959 epoch 4 - iter 4/41 - loss 0.81779839 - samples/sec: 10.53 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,277 epoch 4 - iter 8/41 - loss 0.75120225 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,601 epoch 4 - iter 12/41 - loss 0.58184000 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:06:44,967 epoch 4 - iter 16/41 - loss 0.52704825 - samples/sec: 10.95 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,495 epoch 4 - iter 20/41 - loss 0.55016359 - samples/sec: 7.59 - lr: 0.020000\n",
+      "2021-09-21 21:06:45,874 epoch 4 - iter 24/41 - loss 0.48505112 - samples/sec: 10.57 - lr: 0.020000\n",
+      "2021-09-21 21:06:46,273 epoch 4 - iter 28/41 - loss 0.48370038 - samples/sec: 10.05 - lr: 0.020000\n",
+      "2021-09-21 21:06:46,598 epoch 4 - iter 32/41 - loss 0.51653535 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:06:47,005 epoch 4 - iter 36/41 - loss 0.54081796 - samples/sec: 9.83 - lr: 0.020000\n",
+      "2021-09-21 21:06:47,599 epoch 4 - iter 40/41 - loss 0.53339609 - samples/sec: 6.75 - lr: 0.020000\n",
+      "2021-09-21 21:06:47,669 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:47,670 EPOCH 4 done: loss 0.5216 - lr 0.0200000\n",
+      "2021-09-21 21:06:47,862 DEV : loss 0.4051457941532135 - score 0.5\n",
+      "2021-09-21 21:06:47,863 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:06:47,866 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:48,240 epoch 5 - iter 4/41 - loss 0.35863826 - samples/sec: 11.42 - lr: 0.020000\n",
+      "2021-09-21 21:06:48,792 epoch 5 - iter 8/41 - loss 0.35527979 - samples/sec: 7.27 - lr: 0.020000\n",
+      "2021-09-21 21:06:49,272 epoch 5 - iter 12/41 - loss 0.39304933 - samples/sec: 8.35 - lr: 0.020000\n",
+      "2021-09-21 21:06:49,802 epoch 5 - iter 16/41 - loss 0.40052935 - samples/sec: 7.56 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,199 epoch 5 - iter 20/41 - loss 0.43462157 - samples/sec: 10.11 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,546 epoch 5 - iter 24/41 - loss 0.43298371 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 21:06:50,904 epoch 5 - iter 28/41 - loss 0.43138627 - samples/sec: 11.20 - lr: 0.020000\n",
+      "2021-09-21 21:06:51,271 epoch 5 - iter 32/41 - loss 0.45349190 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 21:06:51,627 epoch 5 - iter 36/41 - loss 0.43985038 - samples/sec: 11.26 - lr: 0.020000\n",
+      "2021-09-21 21:06:52,080 epoch 5 - iter 40/41 - loss 0.40431486 - samples/sec: 8.84 - lr: 0.020000\n",
+      "2021-09-21 21:06:52,137 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:52,137 EPOCH 5 done: loss 0.3945 - lr 0.0200000\n",
+      "2021-09-21 21:06:52,358 DEV : loss 0.43499913811683655 - score 0.5\n",
+      "2021-09-21 21:06:52,359 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:06:52,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:52,718 epoch 6 - iter 4/41 - loss 0.37016428 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 21:06:52,965 epoch 6 - iter 8/41 - loss 0.21200932 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 21:06:53,213 epoch 6 - iter 12/41 - loss 0.25052462 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 21:06:53,450 epoch 6 - iter 16/41 - loss 0.34520231 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 21:06:53,727 epoch 6 - iter 20/41 - loss 0.31783071 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:06:53,967 epoch 6 - iter 24/41 - loss 0.32737575 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 21:06:54,176 epoch 6 - iter 28/41 - loss 0.30004961 - samples/sec: 19.22 - lr: 0.020000\n",
+      "2021-09-21 21:06:54,369 epoch 6 - iter 32/41 - loss 0.27138537 - samples/sec: 20.83 - lr: 0.020000\n",
+      "2021-09-21 21:06:54,678 epoch 6 - iter 36/41 - loss 0.29431191 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:06:55,011 epoch 6 - iter 40/41 - loss 0.32062589 - samples/sec: 12.03 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:55,665 [b'awful', b'bad', b'neutral', b'good', b'great']\n"
+      "2021-09-21 21:06:55,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:06:55,066 EPOCH 6 done: loss 0.3131 - lr 0.0200000\n",
+      "2021-09-21 21:07:09,107 DEV : loss 0.5971083641052246 - score 0.5\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:07:09,109 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:07:09,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:09,482 epoch 7 - iter 4/41 - loss 0.92939276 - samples/sec: 16.31 - lr: 0.010000\n",
+      "2021-09-21 21:07:09,706 epoch 7 - iter 8/41 - loss 0.52471014 - samples/sec: 17.96 - lr: 0.010000\n",
+      "2021-09-21 21:07:09,964 epoch 7 - iter 12/41 - loss 0.41078415 - samples/sec: 15.50 - lr: 0.010000\n",
+      "2021-09-21 21:07:10,224 epoch 7 - iter 16/41 - loss 0.36873783 - samples/sec: 15.49 - lr: 0.010000\n",
+      "2021-09-21 21:07:10,619 epoch 7 - iter 20/41 - loss 0.37584001 - samples/sec: 10.14 - lr: 0.010000\n",
+      "2021-09-21 21:07:11,051 epoch 7 - iter 24/41 - loss 0.43929738 - samples/sec: 9.26 - lr: 0.010000\n",
+      "2021-09-21 21:07:11,255 epoch 7 - iter 28/41 - loss 0.39934909 - samples/sec: 19.62 - lr: 0.010000\n",
+      "2021-09-21 21:07:11,464 epoch 7 - iter 32/41 - loss 0.39802138 - samples/sec: 19.23 - lr: 0.010000\n",
+      "2021-09-21 21:07:11,702 epoch 7 - iter 36/41 - loss 0.38155782 - samples/sec: 16.85 - lr: 0.010000\n",
+      "2021-09-21 21:07:11,897 epoch 7 - iter 40/41 - loss 0.41207827 - samples/sec: 20.55 - lr: 0.010000\n",
+      "2021-09-21 21:07:11,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:11,954 EPOCH 7 done: loss 0.4021 - lr 0.0100000\n",
+      "2021-09-21 21:07:12,160 DEV : loss 0.48859861493110657 - score 0.25\n",
+      "2021-09-21 21:07:12,161 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:07:12,281 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:12,492 epoch 8 - iter 4/41 - loss 0.18457625 - samples/sec: 20.29 - lr: 0.010000\n",
+      "2021-09-21 21:07:12,782 epoch 8 - iter 8/41 - loss 0.16871381 - samples/sec: 13.82 - lr: 0.010000\n",
+      "2021-09-21 21:07:13,020 epoch 8 - iter 12/41 - loss 0.11935141 - samples/sec: 16.82 - lr: 0.010000\n",
+      "2021-09-21 21:07:13,303 epoch 8 - iter 16/41 - loss 0.15574422 - samples/sec: 14.15 - lr: 0.010000\n",
+      "2021-09-21 21:07:13,515 epoch 8 - iter 20/41 - loss 0.20172587 - samples/sec: 18.90 - lr: 0.010000\n",
+      "2021-09-21 21:07:13,714 epoch 8 - iter 24/41 - loss 0.22973122 - samples/sec: 20.15 - lr: 0.010000\n",
+      "2021-09-21 21:07:13,924 epoch 8 - iter 28/41 - loss 0.25510219 - samples/sec: 19.13 - lr: 0.010000\n",
+      "2021-09-21 21:07:14,113 epoch 8 - iter 32/41 - loss 0.27115310 - samples/sec: 21.24 - lr: 0.010000\n",
+      "2021-09-21 21:07:14,303 epoch 8 - iter 36/41 - loss 0.26820522 - samples/sec: 21.18 - lr: 0.010000\n",
+      "2021-09-21 21:07:14,491 epoch 8 - iter 40/41 - loss 0.26640856 - samples/sec: 21.33 - lr: 0.010000\n",
+      "2021-09-21 21:07:14,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:14,544 EPOCH 8 done: loss 0.2614 - lr 0.0100000\n",
+      "2021-09-21 21:07:22,756 DEV : loss 0.4379042685031891 - score 0.5\n",
+      "2021-09-21 21:07:22,757 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:07:22,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:23,233 epoch 9 - iter 4/41 - loss 0.04131593 - samples/sec: 14.60 - lr: 0.010000\n",
+      "2021-09-21 21:07:23,655 epoch 9 - iter 8/41 - loss 0.12638153 - samples/sec: 9.49 - lr: 0.010000\n",
+      "2021-09-21 21:07:23,946 epoch 9 - iter 12/41 - loss 0.11127584 - samples/sec: 13.78 - lr: 0.010000\n",
+      "2021-09-21 21:07:24,214 epoch 9 - iter 16/41 - loss 0.17886103 - samples/sec: 14.93 - lr: 0.010000\n",
+      "2021-09-21 21:07:24,555 epoch 9 - iter 20/41 - loss 0.14605827 - samples/sec: 11.77 - lr: 0.010000\n",
+      "2021-09-21 21:07:25,011 epoch 9 - iter 24/41 - loss 0.17132504 - samples/sec: 8.78 - lr: 0.010000\n",
+      "2021-09-21 21:07:25,271 epoch 9 - iter 28/41 - loss 0.15409774 - samples/sec: 15.42 - lr: 0.010000\n",
+      "2021-09-21 21:07:25,489 epoch 9 - iter 32/41 - loss 0.15397578 - samples/sec: 18.40 - lr: 0.010000\n",
+      "2021-09-21 21:07:25,734 epoch 9 - iter 36/41 - loss 0.14366910 - samples/sec: 16.34 - lr: 0.010000\n",
+      "2021-09-21 21:07:26,297 epoch 9 - iter 40/41 - loss 0.14530529 - samples/sec: 7.11 - lr: 0.010000\n",
+      "2021-09-21 21:07:26,400 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:26,401 EPOCH 9 done: loss 0.1477 - lr 0.0100000\n",
+      "2021-09-21 21:07:27,282 DEV : loss 0.6659640669822693 - score 0.25\n",
+      "2021-09-21 21:07:27,283 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:07:27,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:27,751 epoch 10 - iter 4/41 - loss 0.09160387 - samples/sec: 9.18 - lr: 0.010000\n",
+      "2021-09-21 21:07:28,167 epoch 10 - iter 8/41 - loss 0.05296112 - samples/sec: 9.64 - lr: 0.010000\n",
+      "2021-09-21 21:07:28,510 epoch 10 - iter 12/41 - loss 0.12327007 - samples/sec: 11.68 - lr: 0.010000\n",
+      "2021-09-21 21:07:28,944 epoch 10 - iter 16/41 - loss 0.10352131 - samples/sec: 9.24 - lr: 0.010000\n",
+      "2021-09-21 21:07:29,789 epoch 10 - iter 20/41 - loss 0.08561318 - samples/sec: 4.74 - lr: 0.010000\n",
+      "2021-09-21 21:07:30,137 epoch 10 - iter 24/41 - loss 0.09826771 - samples/sec: 11.52 - lr: 0.010000\n",
+      "2021-09-21 21:07:30,526 epoch 10 - iter 28/41 - loss 0.08717199 - samples/sec: 10.31 - lr: 0.010000\n",
+      "2021-09-21 21:07:31,060 epoch 10 - iter 32/41 - loss 0.07869565 - samples/sec: 7.49 - lr: 0.010000\n",
+      "2021-09-21 21:07:31,439 epoch 10 - iter 36/41 - loss 0.07263221 - samples/sec: 10.57 - lr: 0.010000\n",
+      "2021-09-21 21:07:31,944 epoch 10 - iter 40/41 - loss 0.09096704 - samples/sec: 7.93 - lr: 0.010000\n",
+      "2021-09-21 21:07:32,023 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:32,023 EPOCH 10 done: loss 0.0888 - lr 0.0100000\n",
+      "2021-09-21 21:07:32,261 DEV : loss 0.5241574645042419 - score 0.5\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:07:32,262 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:07:38,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:07:38,371 Testing using best model ...\n",
+      "2021-09-21 21:07:38,391 loading file temp1/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:07:48,507 \t0.4\n",
+      "2021-09-21 21:07:48,508 \n",
+      "Results:\n",
+      "- F-score (micro) 0.4\n",
+      "- F-score (macro) 0.2333\n",
+      "- Accuracy 0.4\n",
+      "\n",
+      "By class:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       awful     0.0000    0.0000    0.0000         1\n",
+      "         bad     0.5000    1.0000    0.6667         1\n",
+      "     neutral     0.0000    0.0000    0.0000         0\n",
+      "        good     0.0000    0.0000    0.0000         2\n",
+      "       great     0.3333    1.0000    0.5000         1\n",
+      "\n",
+      "   micro avg     0.4000    0.4000    0.4000         5\n",
+      "   macro avg     0.1667    0.4000    0.2333         5\n",
+      "weighted avg     0.1667    0.4000    0.2333         5\n",
+      " samples avg     0.4000    0.4000    0.4000         5\n",
+      "\n",
+      "2021-09-21 21:07:48,508 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:37,787 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 21:08:42,235 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 46/46 [00:00<00:00, 14759.64it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:55,989 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:55,991 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:08:42,240 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 21:08:42,382 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:42,384 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1522,236 +1497,241 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:37:55,992 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:55,992 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:37:55,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:55,993 Parameters:\n",
-      "2021-09-08 01:37:55,993  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:37:55,994  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:37:55,994  - patience: \"3\"\n",
-      "2021-09-08 01:37:55,994  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:37:55,994  - max_epochs: \"10\"\n",
-      "2021-09-08 01:37:55,995  - shuffle: \"True\"\n",
-      "2021-09-08 01:37:55,995  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:37:55,995  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:37:55,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:55,996 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:37:55,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:55,996 Device: cuda:0\n",
-      "2021-09-08 01:37:55,997 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:55,997 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:37:56,461 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:56,693 epoch 1 - iter 4/41 - loss 0.44969234 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:37:56,982 epoch 1 - iter 8/41 - loss 0.36910853 - samples/sec: 13.87 - lr: 0.020000\n",
-      "2021-09-08 01:37:57,263 epoch 1 - iter 12/41 - loss 0.62963768 - samples/sec: 14.27 - lr: 0.020000\n",
-      "2021-09-08 01:37:57,637 epoch 1 - iter 16/41 - loss 0.63853934 - samples/sec: 10.71 - lr: 0.020000\n",
-      "2021-09-08 01:37:57,886 epoch 1 - iter 20/41 - loss 0.62063955 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,121 epoch 1 - iter 24/41 - loss 0.53882671 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,411 epoch 1 - iter 28/41 - loss 0.62142847 - samples/sec: 13.85 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,648 epoch 1 - iter 32/41 - loss 0.58515186 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 01:37:58,989 epoch 1 - iter 36/41 - loss 0.61377956 - samples/sec: 11.75 - lr: 0.020000\n",
-      "2021-09-08 01:37:59,306 epoch 1 - iter 40/41 - loss 0.62862172 - samples/sec: 12.65 - lr: 0.020000\n",
-      "2021-09-08 01:37:59,364 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:37:59,364 EPOCH 1 done: loss 0.6150 - lr 0.0200000\n",
-      "2021-09-08 01:37:59,495 DEV : loss 1.3537875413894653 - score 0.5\n",
-      "2021-09-08 01:37:59,495 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:38:06,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:06,834 epoch 2 - iter 4/41 - loss 0.94955906 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,150 epoch 2 - iter 8/41 - loss 0.84403664 - samples/sec: 12.71 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,461 epoch 2 - iter 12/41 - loss 0.85324803 - samples/sec: 12.91 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,680 epoch 2 - iter 16/41 - loss 0.84512829 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:38:07,973 epoch 2 - iter 20/41 - loss 0.83439577 - samples/sec: 13.70 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,332 epoch 2 - iter 24/41 - loss 0.82799289 - samples/sec: 11.17 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,660 epoch 2 - iter 28/41 - loss 0.80072629 - samples/sec: 12.21 - lr: 0.020000\n",
-      "2021-09-08 01:38:08,885 epoch 2 - iter 32/41 - loss 0.79214423 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:38:09,156 epoch 2 - iter 36/41 - loss 0.78457143 - samples/sec: 14.79 - lr: 0.020000\n",
-      "2021-09-08 01:38:09,418 epoch 2 - iter 40/41 - loss 0.77722706 - samples/sec: 15.31 - lr: 0.020000\n",
-      "2021-09-08 01:38:09,475 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:09,475 EPOCH 2 done: loss 0.7761 - lr 0.0200000\n",
-      "2021-09-08 01:38:09,586 DEV : loss 0.6888076066970825 - score 0.5\n",
-      "2021-09-08 01:38:09,587 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:38:15,945 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:16,239 epoch 3 - iter 4/41 - loss 0.74056290 - samples/sec: 14.35 - lr: 0.020000\n",
-      "2021-09-08 01:38:16,645 epoch 3 - iter 8/41 - loss 0.73401635 - samples/sec: 9.86 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,043 epoch 3 - iter 12/41 - loss 0.73228404 - samples/sec: 10.06 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,296 epoch 3 - iter 16/41 - loss 0.71145225 - samples/sec: 15.90 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,700 epoch 3 - iter 20/41 - loss 0.69181657 - samples/sec: 9.91 - lr: 0.020000\n",
-      "2021-09-08 01:38:17,972 epoch 3 - iter 24/41 - loss 0.69728967 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 01:38:18,228 epoch 3 - iter 28/41 - loss 0.69138872 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:38:18,448 epoch 3 - iter 32/41 - loss 0.68804561 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 01:38:18,697 epoch 3 - iter 36/41 - loss 0.68469288 - samples/sec: 16.12 - lr: 0.020000\n",
-      "2021-09-08 01:38:18,985 epoch 3 - iter 40/41 - loss 0.68745503 - samples/sec: 13.93 - lr: 0.020000\n",
-      "2021-09-08 01:38:19,057 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:19,057 EPOCH 3 done: loss 0.6875 - lr 0.0200000\n",
-      "2021-09-08 01:38:19,299 DEV : loss 0.6237187385559082 - score 0.5\n",
-      "2021-09-08 01:38:19,301 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:08:42,385 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:42,385 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:08:42,385 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:42,386 Parameters:\n",
+      "2021-09-21 21:08:42,386  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:08:42,386  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:08:42,387  - patience: \"3\"\n",
+      "2021-09-21 21:08:42,387  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:08:42,387  - max_epochs: \"10\"\n",
+      "2021-09-21 21:08:42,388  - shuffle: \"True\"\n",
+      "2021-09-21 21:08:42,388  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:08:42,388  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:08:42,389 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:42,389 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:08:42,389 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:42,390 Device: cuda:0\n",
+      "2021-09-21 21:08:42,390 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:42,390 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:08:42,575 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:42,843 epoch 1 - iter 4/41 - loss 0.44333081 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 21:08:43,121 epoch 1 - iter 8/41 - loss 0.35927231 - samples/sec: 14.41 - lr: 0.020000\n",
+      "2021-09-21 21:08:43,421 epoch 1 - iter 12/41 - loss 0.45082290 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 21:08:43,758 epoch 1 - iter 16/41 - loss 0.48452192 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 21:08:44,011 epoch 1 - iter 20/41 - loss 0.55940885 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 21:08:44,308 epoch 1 - iter 24/41 - loss 0.55654765 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 21:08:44,595 epoch 1 - iter 28/41 - loss 0.62889246 - samples/sec: 13.99 - lr: 0.020000\n",
+      "2021-09-21 21:08:44,940 epoch 1 - iter 32/41 - loss 0.61453790 - samples/sec: 11.62 - lr: 0.020000\n",
+      "2021-09-21 21:08:45,269 epoch 1 - iter 36/41 - loss 0.58469706 - samples/sec: 12.17 - lr: 0.020000\n",
+      "2021-09-21 21:08:45,569 epoch 1 - iter 40/41 - loss 0.56875389 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:08:45,634 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:45,634 EPOCH 1 done: loss 0.5562 - lr 0.0200000\n",
+      "2021-09-21 21:08:45,854 DEV : loss 0.6265429258346558 - score 0.5\n",
+      "2021-09-21 21:08:45,855 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:38:24,298 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:24,556 epoch 4 - iter 4/41 - loss 0.66779336 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 01:38:24,802 epoch 4 - iter 8/41 - loss 0.68873206 - samples/sec: 16.30 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,065 epoch 4 - iter 12/41 - loss 0.67480624 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,307 epoch 4 - iter 16/41 - loss 0.66784381 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,633 epoch 4 - iter 20/41 - loss 0.66184862 - samples/sec: 12.31 - lr: 0.020000\n",
-      "2021-09-08 01:38:25,910 epoch 4 - iter 24/41 - loss 0.64820638 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 01:38:26,220 epoch 4 - iter 28/41 - loss 0.66115391 - samples/sec: 12.95 - lr: 0.020000\n",
-      "2021-09-08 01:38:26,489 epoch 4 - iter 32/41 - loss 0.68334796 - samples/sec: 14.93 - lr: 0.020000\n",
-      "2021-09-08 01:38:26,854 epoch 4 - iter 36/41 - loss 0.69091972 - samples/sec: 10.98 - lr: 0.020000\n",
-      "2021-09-08 01:38:27,126 epoch 4 - iter 40/41 - loss 0.69075841 - samples/sec: 14.76 - lr: 0.020000\n",
-      "2021-09-08 01:38:27,192 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:27,193 EPOCH 4 done: loss 0.6889 - lr 0.0200000\n",
-      "2021-09-08 01:38:27,407 DEV : loss 0.4987359642982483 - score 0.5\n",
-      "2021-09-08 01:38:27,408 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:08:50,152 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:50,538 epoch 2 - iter 4/41 - loss 0.83234517 - samples/sec: 11.26 - lr: 0.020000\n",
+      "2021-09-21 21:08:50,888 epoch 2 - iter 8/41 - loss 0.73580414 - samples/sec: 11.42 - lr: 0.020000\n",
+      "2021-09-21 21:08:51,215 epoch 2 - iter 12/41 - loss 0.64500650 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 21:08:51,545 epoch 2 - iter 16/41 - loss 0.59207328 - samples/sec: 12.13 - lr: 0.020000\n",
+      "2021-09-21 21:08:51,916 epoch 2 - iter 20/41 - loss 0.56755302 - samples/sec: 10.81 - lr: 0.020000\n",
+      "2021-09-21 21:08:52,257 epoch 2 - iter 24/41 - loss 0.64392192 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 21:08:52,597 epoch 2 - iter 28/41 - loss 0.61803817 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 21:08:52,986 epoch 2 - iter 32/41 - loss 0.60342169 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:08:53,337 epoch 2 - iter 36/41 - loss 0.61564326 - samples/sec: 11.41 - lr: 0.020000\n",
+      "2021-09-21 21:08:53,751 epoch 2 - iter 40/41 - loss 0.64569338 - samples/sec: 9.69 - lr: 0.020000\n",
+      "2021-09-21 21:08:53,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:53,827 EPOCH 2 done: loss 0.6413 - lr 0.0200000\n",
+      "2021-09-21 21:08:54,057 DEV : loss 0.4487917721271515 - score 0.25\n",
+      "2021-09-21 21:08:54,058 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:08:54,060 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:54,471 epoch 3 - iter 4/41 - loss 0.62693936 - samples/sec: 10.23 - lr: 0.020000\n",
+      "2021-09-21 21:08:54,824 epoch 3 - iter 8/41 - loss 0.64183986 - samples/sec: 11.35 - lr: 0.020000\n",
+      "2021-09-21 21:08:55,200 epoch 3 - iter 12/41 - loss 0.59769114 - samples/sec: 10.66 - lr: 0.020000\n",
+      "2021-09-21 21:08:55,512 epoch 3 - iter 16/41 - loss 0.48193895 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:08:55,878 epoch 3 - iter 20/41 - loss 0.49979554 - samples/sec: 10.93 - lr: 0.020000\n",
+      "2021-09-21 21:08:56,199 epoch 3 - iter 24/41 - loss 0.52930558 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 21:08:56,531 epoch 3 - iter 28/41 - loss 0.49473733 - samples/sec: 12.08 - lr: 0.020000\n",
+      "2021-09-21 21:08:56,830 epoch 3 - iter 32/41 - loss 0.50263538 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 21:08:57,235 epoch 3 - iter 36/41 - loss 0.52274380 - samples/sec: 9.90 - lr: 0.020000\n",
+      "2021-09-21 21:08:57,548 epoch 3 - iter 40/41 - loss 0.54484834 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 21:08:57,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:57,625 EPOCH 3 done: loss 0.5355 - lr 0.0200000\n",
+      "2021-09-21 21:08:57,873 DEV : loss 0.6450220346450806 - score 0.25\n",
+      "2021-09-21 21:08:57,874 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:08:57,876 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:08:58,295 epoch 4 - iter 4/41 - loss 0.52628873 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 21:08:58,612 epoch 4 - iter 8/41 - loss 0.34899425 - samples/sec: 12.66 - lr: 0.020000\n",
+      "2021-09-21 21:08:58,933 epoch 4 - iter 12/41 - loss 0.43775522 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 21:08:59,235 epoch 4 - iter 16/41 - loss 0.49053693 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:08:59,531 epoch 4 - iter 20/41 - loss 0.53424850 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 21:08:59,819 epoch 4 - iter 24/41 - loss 0.51317777 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 21:09:00,184 epoch 4 - iter 28/41 - loss 0.49017742 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 21:09:00,437 epoch 4 - iter 32/41 - loss 0.48675548 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 21:09:00,673 epoch 4 - iter 36/41 - loss 0.47060574 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 21:09:00,954 epoch 4 - iter 40/41 - loss 0.46803434 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 21:09:01,015 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:01,016 EPOCH 4 done: loss 0.4587 - lr 0.0200000\n",
+      "2021-09-21 21:09:01,234 DEV : loss 0.31006985902786255 - score 0.25\n",
+      "2021-09-21 21:09:01,235 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:09:01,237 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:01,587 epoch 5 - iter 4/41 - loss 0.63009023 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 21:09:01,875 epoch 5 - iter 8/41 - loss 0.48419238 - samples/sec: 13.89 - lr: 0.020000\n",
+      "2021-09-21 21:09:02,222 epoch 5 - iter 12/41 - loss 0.50092920 - samples/sec: 11.55 - lr: 0.020000\n",
+      "2021-09-21 21:09:02,470 epoch 5 - iter 16/41 - loss 0.47023730 - samples/sec: 16.23 - lr: 0.020000\n",
+      "2021-09-21 21:09:02,669 epoch 5 - iter 20/41 - loss 0.38141750 - samples/sec: 20.14 - lr: 0.020000\n",
+      "2021-09-21 21:09:02,891 epoch 5 - iter 24/41 - loss 0.35308072 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:09:03,119 epoch 5 - iter 28/41 - loss 0.40811570 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 21:09:03,418 epoch 5 - iter 32/41 - loss 0.42316017 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:09:03,716 epoch 5 - iter 36/41 - loss 0.39530513 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,036 epoch 5 - iter 40/41 - loss 0.40334429 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 21:09:04,113 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:04,114 EPOCH 5 done: loss 0.3969 - lr 0.0200000\n",
+      "2021-09-21 21:09:04,343 DEV : loss 0.302986204624176 - score 0.5\n",
+      "2021-09-21 21:09:04,346 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:38:35,115 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:35,427 epoch 5 - iter 4/41 - loss 0.70082654 - samples/sec: 13.50 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,646 epoch 5 - iter 8/41 - loss 0.68075737 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 01:38:35,880 epoch 5 - iter 12/41 - loss 0.65415075 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,114 epoch 5 - iter 16/41 - loss 0.64742967 - samples/sec: 17.17 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,498 epoch 5 - iter 20/41 - loss 0.64566288 - samples/sec: 10.41 - lr: 0.020000\n",
-      "2021-09-08 01:38:36,857 epoch 5 - iter 24/41 - loss 0.64811695 - samples/sec: 11.16 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,095 epoch 5 - iter 28/41 - loss 0.64670187 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,348 epoch 5 - iter 32/41 - loss 0.65332738 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,604 epoch 5 - iter 36/41 - loss 0.65405956 - samples/sec: 15.62 - lr: 0.020000\n",
-      "2021-09-08 01:38:37,971 epoch 5 - iter 40/41 - loss 0.65402150 - samples/sec: 10.91 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,017 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:09:10,805 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:11,050 epoch 6 - iter 4/41 - loss 0.04883319 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:09:11,342 epoch 6 - iter 8/41 - loss 0.22426206 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:09:11,642 epoch 6 - iter 12/41 - loss 0.30907383 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 21:09:11,889 epoch 6 - iter 16/41 - loss 0.28328559 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,178 epoch 6 - iter 20/41 - loss 0.29924679 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,465 epoch 6 - iter 24/41 - loss 0.29330929 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:09:12,749 epoch 6 - iter 28/41 - loss 0.31158285 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,073 epoch 6 - iter 32/41 - loss 0.32755548 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,441 epoch 6 - iter 36/41 - loss 0.33650063 - samples/sec: 10.87 - lr: 0.020000\n",
+      "2021-09-21 21:09:13,686 epoch 6 - iter 40/41 - loss 0.31779135 - samples/sec: 16.38 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:38:38,017 EPOCH 5 done: loss 0.6550 - lr 0.0200000\n",
-      "2021-09-08 01:38:38,122 DEV : loss 0.5634372234344482 - score 0.5\n",
-      "2021-09-08 01:38:38,125 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:38,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:38,444 epoch 6 - iter 4/41 - loss 0.62371609 - samples/sec: 13.63 - lr: 0.020000\n",
-      "2021-09-08 01:38:38,775 epoch 6 - iter 8/41 - loss 0.63541619 - samples/sec: 12.12 - lr: 0.020000\n",
-      "2021-09-08 01:38:39,007 epoch 6 - iter 12/41 - loss 0.63465020 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:38:39,214 epoch 6 - iter 16/41 - loss 0.62844848 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 01:38:39,638 epoch 6 - iter 20/41 - loss 0.63069515 - samples/sec: 9.46 - lr: 0.020000\n",
-      "2021-09-08 01:38:39,865 epoch 6 - iter 24/41 - loss 0.63149469 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:38:40,115 epoch 6 - iter 28/41 - loss 0.62846759 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 01:38:40,618 epoch 6 - iter 32/41 - loss 0.63051583 - samples/sec: 7.96 - lr: 0.020000\n",
-      "2021-09-08 01:38:40,819 epoch 6 - iter 36/41 - loss 0.63491879 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:38:41,059 epoch 6 - iter 40/41 - loss 0.63802792 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 01:38:41,120 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:41,120 EPOCH 6 done: loss 0.6414 - lr 0.0200000\n",
-      "2021-09-08 01:38:41,383 DEV : loss 0.4987775683403015 - score 0.5\n",
-      "2021-09-08 01:38:41,384 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:38:41,453 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:41,692 epoch 7 - iter 4/41 - loss 0.62590683 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 01:38:42,087 epoch 7 - iter 8/41 - loss 0.69018535 - samples/sec: 10.16 - lr: 0.020000\n",
-      "2021-09-08 01:38:42,313 epoch 7 - iter 12/41 - loss 0.71446200 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:38:42,527 epoch 7 - iter 16/41 - loss 0.70078681 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:38:42,887 epoch 7 - iter 20/41 - loss 0.67758942 - samples/sec: 11.13 - lr: 0.020000\n",
-      "2021-09-08 01:38:43,246 epoch 7 - iter 24/41 - loss 0.68033126 - samples/sec: 11.17 - lr: 0.020000\n",
-      "2021-09-08 01:38:43,486 epoch 7 - iter 28/41 - loss 0.67923611 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 01:38:43,916 epoch 7 - iter 32/41 - loss 0.68562675 - samples/sec: 9.32 - lr: 0.020000\n",
-      "2021-09-08 01:38:44,187 epoch 7 - iter 36/41 - loss 0.68795392 - samples/sec: 14.79 - lr: 0.020000\n",
-      "2021-09-08 01:38:44,559 epoch 7 - iter 40/41 - loss 0.68500457 - samples/sec: 10.79 - lr: 0.020000\n",
-      "2021-09-08 01:38:44,643 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:44,643 EPOCH 7 done: loss 0.6850 - lr 0.0200000\n",
-      "2021-09-08 01:38:44,871 DEV : loss 0.5309065580368042 - score 0.5\n",
-      "2021-09-08 01:38:44,873 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:38:44,879 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:45,274 epoch 8 - iter 4/41 - loss 0.67253979 - samples/sec: 10.58 - lr: 0.020000\n",
-      "2021-09-08 01:38:45,594 epoch 8 - iter 8/41 - loss 0.67139754 - samples/sec: 12.53 - lr: 0.020000\n",
-      "2021-09-08 01:38:46,003 epoch 8 - iter 12/41 - loss 0.66338026 - samples/sec: 9.78 - lr: 0.020000\n",
-      "2021-09-08 01:38:46,262 epoch 8 - iter 16/41 - loss 0.66582591 - samples/sec: 15.53 - lr: 0.020000\n",
-      "2021-09-08 01:38:46,593 epoch 8 - iter 20/41 - loss 0.65914617 - samples/sec: 12.09 - lr: 0.020000\n",
-      "2021-09-08 01:38:46,821 epoch 8 - iter 24/41 - loss 0.65455944 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:38:47,120 epoch 8 - iter 28/41 - loss 0.65754718 - samples/sec: 13.42 - lr: 0.020000\n",
-      "2021-09-08 01:38:47,460 epoch 8 - iter 32/41 - loss 0.65721898 - samples/sec: 11.78 - lr: 0.020000\n",
-      "2021-09-08 01:38:47,670 epoch 8 - iter 36/41 - loss 0.65516099 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 01:38:47,989 epoch 8 - iter 40/41 - loss 0.65377577 - samples/sec: 12.59 - lr: 0.020000\n",
-      "2021-09-08 01:38:48,051 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:48,051 EPOCH 8 done: loss 0.6541 - lr 0.0200000\n",
-      "2021-09-08 01:38:48,250 DEV : loss 0.5012527704238892 - score 0.5\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:38:48,255 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:38:48,263 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:48,523 epoch 9 - iter 4/41 - loss 0.76813783 - samples/sec: 16.60 - lr: 0.010000\n",
-      "2021-09-08 01:38:48,731 epoch 9 - iter 8/41 - loss 0.70618981 - samples/sec: 19.27 - lr: 0.010000\n",
-      "2021-09-08 01:38:49,019 epoch 9 - iter 12/41 - loss 0.67351898 - samples/sec: 13.91 - lr: 0.010000\n",
-      "2021-09-08 01:38:49,249 epoch 9 - iter 16/41 - loss 0.66802711 - samples/sec: 17.45 - lr: 0.010000\n",
-      "2021-09-08 01:38:49,512 epoch 9 - iter 20/41 - loss 0.66650902 - samples/sec: 15.25 - lr: 0.010000\n",
-      "2021-09-08 01:38:49,771 epoch 9 - iter 24/41 - loss 0.67100507 - samples/sec: 15.47 - lr: 0.010000\n",
-      "2021-09-08 01:38:50,149 epoch 9 - iter 28/41 - loss 0.66530904 - samples/sec: 10.61 - lr: 0.010000\n",
-      "2021-09-08 01:38:50,609 epoch 9 - iter 32/41 - loss 0.66182741 - samples/sec: 8.70 - lr: 0.010000\n",
-      "2021-09-08 01:38:50,990 epoch 9 - iter 36/41 - loss 0.66073309 - samples/sec: 10.51 - lr: 0.010000\n",
-      "2021-09-08 01:38:51,213 epoch 9 - iter 40/41 - loss 0.65578881 - samples/sec: 17.99 - lr: 0.010000\n",
-      "2021-09-08 01:38:51,290 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:51,290 EPOCH 9 done: loss 0.6549 - lr 0.0100000\n",
-      "2021-09-08 01:38:51,400 DEV : loss 0.535936176776886 - score 0.5\n",
-      "2021-09-08 01:38:51,401 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:38:51,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:51,723 epoch 10 - iter 4/41 - loss 0.62100132 - samples/sec: 13.41 - lr: 0.010000\n",
-      "2021-09-08 01:38:52,075 epoch 10 - iter 8/41 - loss 0.62919036 - samples/sec: 11.39 - lr: 0.010000\n",
-      "2021-09-08 01:38:52,338 epoch 10 - iter 12/41 - loss 0.62884984 - samples/sec: 15.21 - lr: 0.010000\n",
-      "2021-09-08 01:38:52,568 epoch 10 - iter 16/41 - loss 0.62616628 - samples/sec: 17.49 - lr: 0.010000\n",
-      "2021-09-08 01:38:52,873 epoch 10 - iter 20/41 - loss 0.62668077 - samples/sec: 13.11 - lr: 0.010000\n",
-      "2021-09-08 01:38:53,284 epoch 10 - iter 24/41 - loss 0.62412070 - samples/sec: 9.75 - lr: 0.010000\n",
-      "2021-09-08 01:38:53,499 epoch 10 - iter 28/41 - loss 0.62162318 - samples/sec: 18.66 - lr: 0.010000\n",
-      "2021-09-08 01:38:53,793 epoch 10 - iter 32/41 - loss 0.62240134 - samples/sec: 13.63 - lr: 0.010000\n",
-      "2021-09-08 01:38:54,134 epoch 10 - iter 36/41 - loss 0.62297020 - samples/sec: 11.78 - lr: 0.010000\n",
-      "2021-09-08 01:38:54,350 epoch 10 - iter 40/41 - loss 0.62337838 - samples/sec: 18.54 - lr: 0.010000\n",
-      "2021-09-08 01:38:54,395 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:54,396 EPOCH 10 done: loss 0.6244 - lr 0.0100000\n",
-      "2021-09-08 01:38:54,539 DEV : loss 0.5192484855651855 - score 0.5\n",
-      "2021-09-08 01:38:54,540 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:38:58,761 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:38:58,762 Testing using best model ...\n",
-      "2021-09-08 01:38:58,763 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:09:13,738 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:13,738 EPOCH 6 done: loss 0.3337 - lr 0.0200000\n",
+      "2021-09-21 21:09:13,980 DEV : loss 0.9398170709609985 - score 0.25\n",
+      "2021-09-21 21:09:13,981 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:13,984 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:14,483 epoch 7 - iter 4/41 - loss 0.04595522 - samples/sec: 8.86 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,022 epoch 7 - iter 8/41 - loss 0.07084059 - samples/sec: 7.43 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,418 epoch 7 - iter 12/41 - loss 0.22683473 - samples/sec: 10.11 - lr: 0.020000\n",
+      "2021-09-21 21:09:15,815 epoch 7 - iter 16/41 - loss 0.17582505 - samples/sec: 10.10 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,198 epoch 7 - iter 20/41 - loss 0.24775325 - samples/sec: 10.45 - lr: 0.020000\n",
+      "2021-09-21 21:09:16,626 epoch 7 - iter 24/41 - loss 0.32441676 - samples/sec: 9.37 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,045 epoch 7 - iter 28/41 - loss 0.32635135 - samples/sec: 9.56 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,468 epoch 7 - iter 32/41 - loss 0.33822812 - samples/sec: 9.47 - lr: 0.020000\n",
+      "2021-09-21 21:09:17,832 epoch 7 - iter 36/41 - loss 0.35630057 - samples/sec: 11.00 - lr: 0.020000\n",
+      "2021-09-21 21:09:18,268 epoch 7 - iter 40/41 - loss 0.34496499 - samples/sec: 9.19 - lr: 0.020000\n",
+      "2021-09-21 21:09:18,336 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:18,337 EPOCH 7 done: loss 0.3369 - lr 0.0200000\n",
+      "2021-09-21 21:09:18,575 DEV : loss 0.5483263731002808 - score 0.25\n",
+      "2021-09-21 21:09:18,576 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:09:18,577 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:18,945 epoch 8 - iter 4/41 - loss 0.26342154 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 21:09:19,273 epoch 8 - iter 8/41 - loss 0.17347763 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 21:09:19,543 epoch 8 - iter 12/41 - loss 0.27334401 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:09:19,842 epoch 8 - iter 16/41 - loss 0.23090703 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 21:09:20,078 epoch 8 - iter 20/41 - loss 0.20026004 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:09:20,329 epoch 8 - iter 24/41 - loss 0.25572673 - samples/sec: 15.96 - lr: 0.020000\n",
+      "2021-09-21 21:09:20,564 epoch 8 - iter 28/41 - loss 0.26247533 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 21:09:20,775 epoch 8 - iter 32/41 - loss 0.23867287 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 21:09:21,003 epoch 8 - iter 36/41 - loss 0.24721019 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 21:09:21,219 epoch 8 - iter 40/41 - loss 0.25598805 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 21:09:21,279 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:21,279 EPOCH 8 done: loss 0.2505 - lr 0.0200000\n",
+      "2021-09-21 21:09:21,570 DEV : loss 0.91800856590271 - score 0.0\n",
+      "2021-09-21 21:09:21,571 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:09:21,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:21,940 epoch 9 - iter 4/41 - loss 0.02778419 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 21:09:22,165 epoch 9 - iter 8/41 - loss 0.23200482 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 21:09:22,430 epoch 9 - iter 12/41 - loss 0.22091631 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 21:09:22,632 epoch 9 - iter 16/41 - loss 0.17056623 - samples/sec: 19.83 - lr: 0.020000\n",
+      "2021-09-21 21:09:22,850 epoch 9 - iter 20/41 - loss 0.18602968 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 21:09:23,076 epoch 9 - iter 24/41 - loss 0.17271503 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 21:09:23,375 epoch 9 - iter 28/41 - loss 0.18710482 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 21:09:23,600 epoch 9 - iter 32/41 - loss 0.20258351 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:09:23,818 epoch 9 - iter 36/41 - loss 0.27065507 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:09:24,076 epoch 9 - iter 40/41 - loss 0.27270162 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 21:09:24,131 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:24,131 EPOCH 9 done: loss 0.2723 - lr 0.0200000\n",
+      "2021-09-21 21:09:26,806 DEV : loss 0.5252734422683716 - score 0.25\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:09:26,807 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:09:26,846 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:27,259 epoch 10 - iter 4/41 - loss 0.00438769 - samples/sec: 10.88 - lr: 0.010000\n",
+      "2021-09-21 21:09:27,775 epoch 10 - iter 8/41 - loss 0.03682556 - samples/sec: 7.76 - lr: 0.010000\n",
+      "2021-09-21 21:09:28,189 epoch 10 - iter 12/41 - loss 0.34493968 - samples/sec: 9.66 - lr: 0.010000\n",
+      "2021-09-21 21:09:28,771 epoch 10 - iter 16/41 - loss 0.30486842 - samples/sec: 6.89 - lr: 0.010000\n",
+      "2021-09-21 21:09:29,092 epoch 10 - iter 20/41 - loss 0.25719138 - samples/sec: 12.48 - lr: 0.010000\n",
+      "2021-09-21 21:09:29,471 epoch 10 - iter 24/41 - loss 0.21901079 - samples/sec: 10.58 - lr: 0.010000\n",
+      "2021-09-21 21:09:29,876 epoch 10 - iter 28/41 - loss 0.25336917 - samples/sec: 9.89 - lr: 0.010000\n",
+      "2021-09-21 21:09:30,261 epoch 10 - iter 32/41 - loss 0.22798192 - samples/sec: 10.38 - lr: 0.010000\n",
+      "2021-09-21 21:09:30,659 epoch 10 - iter 36/41 - loss 0.20583655 - samples/sec: 10.08 - lr: 0.010000\n",
+      "2021-09-21 21:09:31,160 epoch 10 - iter 40/41 - loss 0.23841469 - samples/sec: 7.99 - lr: 0.010000\n",
+      "2021-09-21 21:09:31,251 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:31,251 EPOCH 10 done: loss 0.2327 - lr 0.0100000\n",
+      "2021-09-21 21:09:31,822 DEV : loss 1.4163213968276978 - score 0.25\n",
+      "2021-09-21 21:09:31,823 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:09:35,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:09:35,845 Testing using best model ...\n",
+      "2021-09-21 21:09:35,846 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:39:04,200 \t0.0\n",
-      "2021-09-08 01:39:04,201 \n",
+      "2021-09-21 21:09:41,507 \t0.6\n",
+      "2021-09-21 21:09:41,507 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.6\n",
+      "- F-score (macro) 0.4333\n",
+      "- Accuracy 0.6\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "       awful     0.0000    0.0000    0.0000         2\n",
+      "       awful     0.5000    0.5000    0.5000         2\n",
       "         bad     0.0000    0.0000    0.0000         1\n",
-      "     neutral     0.0000    0.0000    0.0000         2\n",
-      "        good     0.0000    0.0000    0.0000         0\n",
+      "     neutral     0.5000    1.0000    0.6667         1\n",
+      "        good     1.0000    1.0000    1.0000         1\n",
       "       great     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "   micro avg     0.0000    0.0000    0.0000         5\n",
-      "   macro avg     0.0000    0.0000    0.0000         5\n",
-      "weighted avg     0.0000    0.0000    0.0000         5\n",
-      " samples avg     0.0000    0.0000    0.0000         5\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:39:04,201 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:51,543 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "   micro avg     0.6000    0.6000    0.6000         5\n",
+      "   macro avg     0.4000    0.5000    0.4333         5\n",
+      "weighted avg     0.5000    0.6000    0.5333         5\n",
+      " samples avg     0.6000    0.6000    0.6000         5\n",
+      "\n",
+      "2021-09-21 21:09:41,508 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:30,023 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:39:55,617 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:10:34,658 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 13289.57it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 15052.11it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:39:55,622 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
-      "2021-09-08 01:39:55,633 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:55,635 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:10:34,663 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 21:10:34,818 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:34,820 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2064,25 +2044,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:39:55,635 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:55,636 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:39:55,636 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:55,636 Parameters:\n",
-      "2021-09-08 01:39:55,636  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:39:55,637  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:39:55,637  - patience: \"3\"\n",
-      "2021-09-08 01:39:55,637  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:39:55,638  - max_epochs: \"10\"\n",
-      "2021-09-08 01:39:55,638  - shuffle: \"True\"\n",
-      "2021-09-08 01:39:55,638  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:39:55,638  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:39:55,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:55,639 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:39:55,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:55,640 Device: cuda:0\n",
-      "2021-09-08 01:39:55,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:55,640 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:39:55,647 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:10:34,820 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:34,821 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:10:34,821 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:34,822 Parameters:\n",
+      "2021-09-21 21:10:34,822  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:10:34,822  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:10:34,822  - patience: \"3\"\n",
+      "2021-09-21 21:10:34,823  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:10:34,823  - max_epochs: \"10\"\n",
+      "2021-09-21 21:10:34,823  - shuffle: \"True\"\n",
+      "2021-09-21 21:10:34,824  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:10:34,825  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:10:34,825 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:34,825 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:10:34,826 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:34,826 Device: cuda:0\n",
+      "2021-09-21 21:10:34,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:34,827 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -2096,222 +2075,214 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:39:55,912 epoch 1 - iter 4/41 - loss 0.41927256 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 01:39:56,242 epoch 1 - iter 8/41 - loss 0.25085089 - samples/sec: 12.15 - lr: 0.020000\n",
-      "2021-09-08 01:39:56,502 epoch 1 - iter 12/41 - loss 0.62042087 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 01:39:56,775 epoch 1 - iter 16/41 - loss 0.71660874 - samples/sec: 14.67 - lr: 0.020000\n",
-      "2021-09-08 01:39:57,210 epoch 1 - iter 20/41 - loss 0.66651021 - samples/sec: 9.20 - lr: 0.020000\n",
-      "2021-09-08 01:39:57,454 epoch 1 - iter 24/41 - loss 0.57357481 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 01:39:57,934 epoch 1 - iter 28/41 - loss 0.70363871 - samples/sec: 8.34 - lr: 0.020000\n",
-      "2021-09-08 01:39:58,287 epoch 1 - iter 32/41 - loss 0.71364983 - samples/sec: 11.36 - lr: 0.020000\n",
-      "2021-09-08 01:39:58,568 epoch 1 - iter 36/41 - loss 0.71804081 - samples/sec: 14.28 - lr: 0.020000\n",
-      "2021-09-08 01:39:58,787 epoch 1 - iter 40/41 - loss 0.71155840 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 01:39:58,837 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:39:58,837 EPOCH 1 done: loss 0.7081 - lr 0.0200000\n",
-      "2021-09-08 01:39:58,978 DEV : loss 0.6070660948753357 - score 0.5\n",
-      "2021-09-08 01:39:58,979 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:10:35,042 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:35,355 epoch 1 - iter 4/41 - loss 0.38984622 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:10:35,751 epoch 1 - iter 8/41 - loss 0.43079359 - samples/sec: 10.11 - lr: 0.020000\n",
+      "2021-09-21 21:10:36,015 epoch 1 - iter 12/41 - loss 0.58579075 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:10:36,308 epoch 1 - iter 16/41 - loss 0.52388551 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 21:10:36,752 epoch 1 - iter 20/41 - loss 0.65538358 - samples/sec: 9.02 - lr: 0.020000\n",
+      "2021-09-21 21:10:37,063 epoch 1 - iter 24/41 - loss 0.56309211 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:10:37,317 epoch 1 - iter 28/41 - loss 0.65418052 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 21:10:37,629 epoch 1 - iter 32/41 - loss 0.62008736 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 21:10:37,909 epoch 1 - iter 36/41 - loss 0.64187118 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,103 epoch 1 - iter 40/41 - loss 0.60822798 - samples/sec: 20.72 - lr: 0.020000\n",
+      "2021-09-21 21:10:38,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:38,149 EPOCH 1 done: loss 0.5937 - lr 0.0200000\n",
+      "2021-09-21 21:10:38,856 DEV : loss 1.363743543624878 - score 0.0\n",
+      "2021-09-21 21:10:38,856 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:40:03,433 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:03,831 epoch 2 - iter 4/41 - loss 0.37453003 - samples/sec: 10.51 - lr: 0.020000\n",
-      "2021-09-08 01:40:04,094 epoch 2 - iter 8/41 - loss 0.30073929 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 01:40:04,390 epoch 2 - iter 12/41 - loss 0.34411559 - samples/sec: 13.56 - lr: 0.020000\n",
-      "2021-09-08 01:40:04,776 epoch 2 - iter 16/41 - loss 0.38946697 - samples/sec: 10.40 - lr: 0.020000\n",
-      "2021-09-08 01:40:05,150 epoch 2 - iter 20/41 - loss 0.40755808 - samples/sec: 10.71 - lr: 0.020000\n",
-      "2021-09-08 01:40:05,376 epoch 2 - iter 24/41 - loss 0.54955224 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 01:40:05,744 epoch 2 - iter 28/41 - loss 0.53975347 - samples/sec: 10.90 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,081 epoch 2 - iter 32/41 - loss 0.55308454 - samples/sec: 11.89 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,431 epoch 2 - iter 36/41 - loss 0.54719381 - samples/sec: 11.44 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,734 epoch 2 - iter 40/41 - loss 0.53399752 - samples/sec: 13.24 - lr: 0.020000\n",
-      "2021-09-08 01:40:06,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:06,965 EPOCH 2 done: loss 0.5332 - lr 0.0200000\n",
-      "2021-09-08 01:40:07,145 DEV : loss 0.5769699215888977 - score 0.25\n",
-      "2021-09-08 01:40:07,146 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:40:07,148 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:07,763 epoch 3 - iter 4/41 - loss 0.37778380 - samples/sec: 7.09 - lr: 0.020000\n",
-      "2021-09-08 01:40:08,223 epoch 3 - iter 8/41 - loss 0.45869022 - samples/sec: 8.70 - lr: 0.020000\n",
-      "2021-09-08 01:40:08,552 epoch 3 - iter 12/41 - loss 0.43245331 - samples/sec: 12.20 - lr: 0.020000\n",
-      "2021-09-08 01:40:08,813 epoch 3 - iter 16/41 - loss 0.46119018 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 01:40:09,029 epoch 3 - iter 20/41 - loss 0.59345980 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 01:40:09,329 epoch 3 - iter 24/41 - loss 0.62389143 - samples/sec: 13.33 - lr: 0.020000\n",
-      "2021-09-08 01:40:09,817 epoch 3 - iter 28/41 - loss 0.58873656 - samples/sec: 8.21 - lr: 0.020000\n",
-      "2021-09-08 01:40:10,061 epoch 3 - iter 32/41 - loss 0.55395266 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:40:10,409 epoch 3 - iter 36/41 - loss 0.55763406 - samples/sec: 11.51 - lr: 0.020000\n",
-      "2021-09-08 01:40:10,729 epoch 3 - iter 40/41 - loss 0.55259855 - samples/sec: 12.53 - lr: 0.020000\n",
-      "2021-09-08 01:40:10,796 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:10,796 EPOCH 3 done: loss 0.5461 - lr 0.0200000\n",
-      "2021-09-08 01:40:11,054 DEV : loss 0.9600083827972412 - score 0.25\n",
-      "2021-09-08 01:40:11,055 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:40:11,139 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:11,377 epoch 4 - iter 4/41 - loss 0.14588716 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 01:40:11,634 epoch 4 - iter 8/41 - loss 0.36384823 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 01:40:11,848 epoch 4 - iter 12/41 - loss 0.44715926 - samples/sec: 18.76 - lr: 0.020000\n",
-      "2021-09-08 01:40:12,157 epoch 4 - iter 16/41 - loss 0.43267613 - samples/sec: 12.94 - lr: 0.020000\n",
-      "2021-09-08 01:40:12,438 epoch 4 - iter 20/41 - loss 0.43945815 - samples/sec: 14.28 - lr: 0.020000\n",
-      "2021-09-08 01:40:12,857 epoch 4 - iter 24/41 - loss 0.39310952 - samples/sec: 9.55 - lr: 0.020000\n",
-      "2021-09-08 01:40:13,283 epoch 4 - iter 28/41 - loss 0.38662313 - samples/sec: 9.41 - lr: 0.020000\n",
-      "2021-09-08 01:40:13,676 epoch 4 - iter 32/41 - loss 0.36064424 - samples/sec: 10.19 - lr: 0.020000\n",
-      "2021-09-08 01:40:14,389 epoch 4 - iter 36/41 - loss 0.36014192 - samples/sec: 5.62 - lr: 0.020000\n",
-      "2021-09-08 01:40:14,695 epoch 4 - iter 40/41 - loss 0.40196361 - samples/sec: 13.10 - lr: 0.020000\n",
-      "2021-09-08 01:40:14,753 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:14,754 EPOCH 4 done: loss 0.4234 - lr 0.0200000\n",
-      "2021-09-08 01:40:14,922 DEV : loss 1.607583999633789 - score 0.0\n",
-      "2021-09-08 01:40:14,923 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:40:14,926 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:15,265 epoch 5 - iter 4/41 - loss 0.35252955 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 01:40:15,482 epoch 5 - iter 8/41 - loss 0.28861463 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 01:40:15,801 epoch 5 - iter 12/41 - loss 0.37933424 - samples/sec: 12.56 - lr: 0.020000\n",
-      "2021-09-08 01:40:16,240 epoch 5 - iter 16/41 - loss 0.34746044 - samples/sec: 9.13 - lr: 0.020000\n",
-      "2021-09-08 01:40:16,568 epoch 5 - iter 20/41 - loss 0.41091053 - samples/sec: 12.21 - lr: 0.020000\n",
-      "2021-09-08 01:40:16,800 epoch 5 - iter 24/41 - loss 0.41368421 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 01:40:17,116 epoch 5 - iter 28/41 - loss 0.42347299 - samples/sec: 12.69 - lr: 0.020000\n",
-      "2021-09-08 01:40:17,347 epoch 5 - iter 32/41 - loss 0.40917578 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:40:17,774 epoch 5 - iter 36/41 - loss 0.37809578 - samples/sec: 9.38 - lr: 0.020000\n",
-      "2021-09-08 01:40:18,081 epoch 5 - iter 40/41 - loss 0.36828566 - samples/sec: 13.08 - lr: 0.020000\n",
-      "2021-09-08 01:40:18,152 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:18,153 EPOCH 5 done: loss 0.3736 - lr 0.0200000\n",
-      "2021-09-08 01:40:18,651 DEV : loss 1.0219388008117676 - score 0.25\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:40:18,652 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:40:18,664 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:18,943 epoch 6 - iter 4/41 - loss 0.45734189 - samples/sec: 15.20 - lr: 0.010000\n",
-      "2021-09-08 01:40:19,298 epoch 6 - iter 8/41 - loss 0.31833719 - samples/sec: 11.27 - lr: 0.010000\n",
-      "2021-09-08 01:40:19,733 epoch 6 - iter 12/41 - loss 0.36594662 - samples/sec: 9.21 - lr: 0.010000\n",
-      "2021-09-08 01:40:20,135 epoch 6 - iter 16/41 - loss 0.29310076 - samples/sec: 9.97 - lr: 0.010000\n",
-      "2021-09-08 01:40:20,605 epoch 6 - iter 20/41 - loss 0.26770599 - samples/sec: 8.52 - lr: 0.010000\n",
-      "2021-09-08 01:40:21,093 epoch 6 - iter 24/41 - loss 0.23316237 - samples/sec: 8.21 - lr: 0.010000\n",
-      "2021-09-08 01:40:21,469 epoch 6 - iter 28/41 - loss 0.24944932 - samples/sec: 10.65 - lr: 0.010000\n",
-      "2021-09-08 01:40:21,870 epoch 6 - iter 32/41 - loss 0.25024179 - samples/sec: 9.99 - lr: 0.010000\n",
-      "2021-09-08 01:40:22,171 epoch 6 - iter 36/41 - loss 0.26217239 - samples/sec: 13.30 - lr: 0.010000\n",
-      "2021-09-08 01:40:22,580 epoch 6 - iter 40/41 - loss 0.24132689 - samples/sec: 9.79 - lr: 0.010000\n",
-      "2021-09-08 01:40:22,654 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:10:44,023 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:44,367 epoch 2 - iter 4/41 - loss 0.85307211 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 21:10:44,979 epoch 2 - iter 8/41 - loss 0.74785738 - samples/sec: 6.54 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,395 epoch 2 - iter 12/41 - loss 0.63162080 - samples/sec: 9.63 - lr: 0.020000\n",
+      "2021-09-21 21:10:45,819 epoch 2 - iter 16/41 - loss 0.64089441 - samples/sec: 9.45 - lr: 0.020000\n",
+      "2021-09-21 21:10:46,146 epoch 2 - iter 20/41 - loss 0.62054954 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 21:10:46,385 epoch 2 - iter 24/41 - loss 0.60463366 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 21:10:46,662 epoch 2 - iter 28/41 - loss 0.61560587 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 21:10:47,125 epoch 2 - iter 32/41 - loss 0.60974931 - samples/sec: 8.66 - lr: 0.020000\n",
+      "2021-09-21 21:10:47,665 epoch 2 - iter 36/41 - loss 0.62516262 - samples/sec: 7.41 - lr: 0.020000\n",
+      "2021-09-21 21:10:48,476 epoch 2 - iter 40/41 - loss 0.61057757 - samples/sec: 4.94 - lr: 0.020000\n",
+      "2021-09-21 21:10:48,570 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:48,570 EPOCH 2 done: loss 0.6031 - lr 0.0200000\n",
+      "2021-09-21 21:10:51,179 DEV : loss 0.5876691341400146 - score 0.0\n",
+      "2021-09-21 21:10:51,180 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:10:59,343 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:10:59,554 epoch 3 - iter 4/41 - loss 0.31097497 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 21:10:59,889 epoch 3 - iter 8/41 - loss 0.36214759 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 21:11:00,124 epoch 3 - iter 12/41 - loss 0.42692839 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 21:11:00,313 epoch 3 - iter 16/41 - loss 0.41568641 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 21:11:00,493 epoch 3 - iter 20/41 - loss 0.37692988 - samples/sec: 22.25 - lr: 0.020000\n",
+      "2021-09-21 21:11:00,686 epoch 3 - iter 24/41 - loss 0.41950209 - samples/sec: 20.76 - lr: 0.020000\n",
+      "2021-09-21 21:11:00,908 epoch 3 - iter 28/41 - loss 0.45071359 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 21:11:01,122 epoch 3 - iter 32/41 - loss 0.45270548 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 21:11:01,421 epoch 3 - iter 36/41 - loss 0.44134196 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 21:11:01,649 epoch 3 - iter 40/41 - loss 0.46051849 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 21:11:01,699 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:01,700 EPOCH 3 done: loss 0.4606 - lr 0.0200000\n",
+      "2021-09-21 21:11:01,964 DEV : loss 0.5474129915237427 - score 0.25\n",
+      "2021-09-21 21:11:01,965 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:11:09,645 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:09,930 epoch 4 - iter 4/41 - loss 0.69327688 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 21:11:10,186 epoch 4 - iter 8/41 - loss 0.42670258 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 21:11:10,455 epoch 4 - iter 12/41 - loss 0.46829305 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 21:11:10,742 epoch 4 - iter 16/41 - loss 0.51842569 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 21:11:11,102 epoch 4 - iter 20/41 - loss 0.47375322 - samples/sec: 11.11 - lr: 0.020000\n",
+      "2021-09-21 21:11:11,328 epoch 4 - iter 24/41 - loss 0.48065979 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:11:11,562 epoch 4 - iter 28/41 - loss 0.47357276 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 21:11:11,796 epoch 4 - iter 32/41 - loss 0.49128344 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:11:12,006 epoch 4 - iter 36/41 - loss 0.46737933 - samples/sec: 19.08 - lr: 0.020000\n",
+      "2021-09-21 21:11:12,366 epoch 4 - iter 40/41 - loss 0.47591813 - samples/sec: 11.12 - lr: 0.020000\n",
+      "2021-09-21 21:11:12,431 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:12,431 EPOCH 4 done: loss 0.4813 - lr 0.0200000\n",
+      "2021-09-21 21:11:14,273 DEV : loss 0.48892855644226074 - score 0.25\n",
+      "2021-09-21 21:11:14,275 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:11:18,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:19,161 epoch 5 - iter 4/41 - loss 0.62119365 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 21:11:19,438 epoch 5 - iter 8/41 - loss 0.45135803 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 21:11:19,758 epoch 5 - iter 12/41 - loss 0.41040208 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:11:20,057 epoch 5 - iter 16/41 - loss 0.37285242 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:11:20,323 epoch 5 - iter 20/41 - loss 0.44775972 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 21:11:20,580 epoch 5 - iter 24/41 - loss 0.41353142 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:11:20,856 epoch 5 - iter 28/41 - loss 0.37294240 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 21:11:21,433 epoch 5 - iter 32/41 - loss 0.35586072 - samples/sec: 6.94 - lr: 0.020000\n",
+      "2021-09-21 21:11:21,937 epoch 5 - iter 36/41 - loss 0.32731345 - samples/sec: 7.96 - lr: 0.020000\n",
+      "2021-09-21 21:11:22,211 epoch 5 - iter 40/41 - loss 0.33654580 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 21:11:22,337 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:22,337 EPOCH 5 done: loss 0.3300 - lr 0.0200000\n",
+      "2021-09-21 21:11:22,512 DEV : loss 0.6776857972145081 - score 0.25\n",
+      "2021-09-21 21:11:22,516 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:11:22,520 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:22,839 epoch 6 - iter 4/41 - loss 0.65030228 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 21:11:23,110 epoch 6 - iter 8/41 - loss 0.59385317 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 21:11:23,472 epoch 6 - iter 12/41 - loss 0.54128269 - samples/sec: 11.07 - lr: 0.020000\n",
+      "2021-09-21 21:11:23,780 epoch 6 - iter 16/41 - loss 0.53205319 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 21:11:24,057 epoch 6 - iter 20/41 - loss 0.51657641 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 21:11:24,565 epoch 6 - iter 24/41 - loss 0.50933195 - samples/sec: 7.88 - lr: 0.020000\n",
+      "2021-09-21 21:11:24,934 epoch 6 - iter 28/41 - loss 0.50760008 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 21:11:25,238 epoch 6 - iter 32/41 - loss 0.46115174 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 21:11:25,719 epoch 6 - iter 36/41 - loss 0.43201168 - samples/sec: 8.33 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:40:22,654 EPOCH 6 done: loss 0.2651 - lr 0.0100000\n",
-      "2021-09-08 01:40:22,786 DEV : loss 0.9498051404953003 - score 0.0\n",
-      "2021-09-08 01:40:22,787 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:40:22,789 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:23,137 epoch 7 - iter 4/41 - loss 0.25545279 - samples/sec: 12.35 - lr: 0.010000\n",
-      "2021-09-08 01:40:23,498 epoch 7 - iter 8/41 - loss 0.30228368 - samples/sec: 11.11 - lr: 0.010000\n",
-      "2021-09-08 01:40:23,839 epoch 7 - iter 12/41 - loss 0.22479161 - samples/sec: 11.73 - lr: 0.010000\n",
-      "2021-09-08 01:40:24,128 epoch 7 - iter 16/41 - loss 0.20778516 - samples/sec: 13.89 - lr: 0.010000\n",
-      "2021-09-08 01:40:24,326 epoch 7 - iter 20/41 - loss 0.27050058 - samples/sec: 20.24 - lr: 0.010000\n",
-      "2021-09-08 01:40:24,603 epoch 7 - iter 24/41 - loss 0.22681276 - samples/sec: 14.53 - lr: 0.010000\n",
-      "2021-09-08 01:40:24,927 epoch 7 - iter 28/41 - loss 0.20525723 - samples/sec: 12.34 - lr: 0.010000\n",
-      "2021-09-08 01:40:25,267 epoch 7 - iter 32/41 - loss 0.24229161 - samples/sec: 11.81 - lr: 0.010000\n",
-      "2021-09-08 01:40:25,659 epoch 7 - iter 36/41 - loss 0.21910166 - samples/sec: 10.22 - lr: 0.010000\n",
-      "2021-09-08 01:40:25,948 epoch 7 - iter 40/41 - loss 0.22876505 - samples/sec: 13.89 - lr: 0.010000\n",
-      "2021-09-08 01:40:26,026 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:26,027 EPOCH 7 done: loss 0.2307 - lr 0.0100000\n",
-      "2021-09-08 01:40:26,272 DEV : loss 0.999095618724823 - score 0.25\n",
-      "2021-09-08 01:40:26,273 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:40:26,344 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:26,595 epoch 8 - iter 4/41 - loss 0.02152126 - samples/sec: 17.31 - lr: 0.010000\n",
-      "2021-09-08 01:40:27,084 epoch 8 - iter 8/41 - loss 0.08535735 - samples/sec: 8.19 - lr: 0.010000\n",
-      "2021-09-08 01:40:27,503 epoch 8 - iter 12/41 - loss 0.10191877 - samples/sec: 9.57 - lr: 0.010000\n",
-      "2021-09-08 01:40:27,758 epoch 8 - iter 16/41 - loss 0.10211017 - samples/sec: 15.69 - lr: 0.010000\n",
-      "2021-09-08 01:40:28,098 epoch 8 - iter 20/41 - loss 0.08721509 - samples/sec: 11.81 - lr: 0.010000\n",
-      "2021-09-08 01:40:28,473 epoch 8 - iter 24/41 - loss 0.11081539 - samples/sec: 10.67 - lr: 0.010000\n",
-      "2021-09-08 01:40:28,792 epoch 8 - iter 28/41 - loss 0.09707569 - samples/sec: 12.55 - lr: 0.010000\n",
-      "2021-09-08 01:40:29,291 epoch 8 - iter 32/41 - loss 0.08964611 - samples/sec: 8.03 - lr: 0.010000\n",
-      "2021-09-08 01:40:29,539 epoch 8 - iter 36/41 - loss 0.12086486 - samples/sec: 16.13 - lr: 0.010000\n",
-      "2021-09-08 01:40:29,843 epoch 8 - iter 40/41 - loss 0.11454520 - samples/sec: 13.18 - lr: 0.010000\n",
-      "2021-09-08 01:40:30,097 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:30,098 EPOCH 8 done: loss 0.1165 - lr 0.0100000\n",
-      "2021-09-08 01:40:30,240 DEV : loss 1.2182729244232178 - score 0.0\n",
-      "2021-09-08 01:40:30,240 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:40:30,243 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:30,764 epoch 9 - iter 4/41 - loss 0.14301720 - samples/sec: 7.93 - lr: 0.010000\n",
-      "2021-09-08 01:40:31,105 epoch 9 - iter 8/41 - loss 0.10606677 - samples/sec: 11.75 - lr: 0.010000\n",
-      "2021-09-08 01:40:31,518 epoch 9 - iter 12/41 - loss 0.07679499 - samples/sec: 9.69 - lr: 0.010000\n",
-      "2021-09-08 01:40:31,887 epoch 9 - iter 16/41 - loss 0.07535078 - samples/sec: 10.85 - lr: 0.010000\n",
-      "2021-09-08 01:40:32,150 epoch 9 - iter 20/41 - loss 0.06284501 - samples/sec: 15.23 - lr: 0.010000\n",
-      "2021-09-08 01:40:32,560 epoch 9 - iter 24/41 - loss 0.05839588 - samples/sec: 9.78 - lr: 0.010000\n",
-      "2021-09-08 01:40:32,891 epoch 9 - iter 28/41 - loss 0.07283357 - samples/sec: 12.12 - lr: 0.010000\n",
-      "2021-09-08 01:40:33,178 epoch 9 - iter 32/41 - loss 0.07300184 - samples/sec: 13.92 - lr: 0.010000\n",
-      "2021-09-08 01:40:33,667 epoch 9 - iter 36/41 - loss 0.06618397 - samples/sec: 8.20 - lr: 0.010000\n",
-      "2021-09-08 01:40:34,062 epoch 9 - iter 40/41 - loss 0.08202017 - samples/sec: 10.13 - lr: 0.010000\n",
-      "2021-09-08 01:40:34,124 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:34,125 EPOCH 9 done: loss 0.0803 - lr 0.0100000\n",
-      "2021-09-08 01:40:34,270 DEV : loss 0.9121147990226746 - score 0.25\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:40:34,271 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:40:34,272 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:34,604 epoch 10 - iter 4/41 - loss 0.14554753 - samples/sec: 12.71 - lr: 0.005000\n",
-      "2021-09-08 01:40:34,958 epoch 10 - iter 8/41 - loss 0.19389082 - samples/sec: 11.33 - lr: 0.005000\n",
-      "2021-09-08 01:40:35,223 epoch 10 - iter 12/41 - loss 0.14416582 - samples/sec: 15.13 - lr: 0.005000\n",
-      "2021-09-08 01:40:35,656 epoch 10 - iter 16/41 - loss 0.12487281 - samples/sec: 9.25 - lr: 0.005000\n",
-      "2021-09-08 01:40:36,141 epoch 10 - iter 20/41 - loss 0.11822561 - samples/sec: 8.26 - lr: 0.005000\n",
-      "2021-09-08 01:40:36,689 epoch 10 - iter 24/41 - loss 0.10305981 - samples/sec: 7.30 - lr: 0.005000\n",
-      "2021-09-08 01:40:36,992 epoch 10 - iter 28/41 - loss 0.09214489 - samples/sec: 13.21 - lr: 0.005000\n",
-      "2021-09-08 01:40:37,328 epoch 10 - iter 32/41 - loss 0.08207528 - samples/sec: 11.95 - lr: 0.005000\n",
-      "2021-09-08 01:40:37,762 epoch 10 - iter 36/41 - loss 0.07334312 - samples/sec: 9.21 - lr: 0.005000\n",
-      "2021-09-08 01:40:38,061 epoch 10 - iter 40/41 - loss 0.09225554 - samples/sec: 13.41 - lr: 0.005000\n",
-      "2021-09-08 01:40:38,177 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:38,178 EPOCH 10 done: loss 0.0931 - lr 0.0050000\n",
-      "2021-09-08 01:40:38,388 DEV : loss 1.0805405378341675 - score 0.25\n",
-      "2021-09-08 01:40:38,389 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:40:46,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:40:46,263 Testing using best model ...\n",
-      "2021-09-08 01:40:46,287 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:11:26,153 epoch 6 - iter 40/41 - loss 0.46626980 - samples/sec: 9.21 - lr: 0.020000\n",
+      "2021-09-21 21:11:26,213 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:26,214 EPOCH 6 done: loss 0.4591 - lr 0.0200000\n",
+      "2021-09-21 21:11:26,410 DEV : loss 0.6787286996841431 - score 0.25\n",
+      "2021-09-21 21:11:26,412 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:11:26,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:26,834 epoch 7 - iter 4/41 - loss 0.27750155 - samples/sec: 10.28 - lr: 0.020000\n",
+      "2021-09-21 21:11:27,236 epoch 7 - iter 8/41 - loss 0.39070649 - samples/sec: 9.94 - lr: 0.020000\n",
+      "2021-09-21 21:11:27,781 epoch 7 - iter 12/41 - loss 0.47877055 - samples/sec: 7.36 - lr: 0.020000\n",
+      "2021-09-21 21:11:28,092 epoch 7 - iter 16/41 - loss 0.44030682 - samples/sec: 12.88 - lr: 0.020000\n",
+      "2021-09-21 21:11:28,360 epoch 7 - iter 20/41 - loss 0.44749413 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 21:11:28,669 epoch 7 - iter 24/41 - loss 0.44912772 - samples/sec: 12.96 - lr: 0.020000\n",
+      "2021-09-21 21:11:28,968 epoch 7 - iter 28/41 - loss 0.43483412 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:11:29,307 epoch 7 - iter 32/41 - loss 0.41095643 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 21:11:29,770 epoch 7 - iter 36/41 - loss 0.39018892 - samples/sec: 8.65 - lr: 0.020000\n",
+      "2021-09-21 21:11:30,078 epoch 7 - iter 40/41 - loss 0.35563497 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:11:30,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:30,146 EPOCH 7 done: loss 0.3581 - lr 0.0200000\n",
+      "2021-09-21 21:11:30,327 DEV : loss 0.7172600626945496 - score 0.0\n",
+      "2021-09-21 21:11:30,328 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:11:30,330 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:30,754 epoch 8 - iter 4/41 - loss 0.25418811 - samples/sec: 10.30 - lr: 0.020000\n",
+      "2021-09-21 21:11:31,064 epoch 8 - iter 8/41 - loss 0.16891091 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 21:11:31,401 epoch 8 - iter 12/41 - loss 0.31385741 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 21:11:31,681 epoch 8 - iter 16/41 - loss 0.28059062 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:11:32,471 epoch 8 - iter 20/41 - loss 0.30406676 - samples/sec: 5.07 - lr: 0.020000\n",
+      "2021-09-21 21:11:32,771 epoch 8 - iter 24/41 - loss 0.29680350 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 21:11:33,051 epoch 8 - iter 28/41 - loss 0.34623904 - samples/sec: 14.35 - lr: 0.020000\n",
+      "2021-09-21 21:11:33,557 epoch 8 - iter 32/41 - loss 0.32313256 - samples/sec: 7.90 - lr: 0.020000\n",
+      "2021-09-21 21:11:33,817 epoch 8 - iter 36/41 - loss 0.29357741 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 21:11:34,129 epoch 8 - iter 40/41 - loss 0.26662543 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 21:11:34,254 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:34,255 EPOCH 8 done: loss 0.2612 - lr 0.0200000\n",
+      "2021-09-21 21:11:34,440 DEV : loss 0.8232121467590332 - score 0.25\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:11:34,445 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:11:34,446 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:34,934 epoch 9 - iter 4/41 - loss 0.06160413 - samples/sec: 8.63 - lr: 0.010000\n",
+      "2021-09-21 21:11:35,219 epoch 9 - iter 8/41 - loss 0.41496543 - samples/sec: 14.06 - lr: 0.010000\n",
+      "2021-09-21 21:11:35,546 epoch 9 - iter 12/41 - loss 0.30157572 - samples/sec: 12.24 - lr: 0.010000\n",
+      "2021-09-21 21:11:35,852 epoch 9 - iter 16/41 - loss 0.33724834 - samples/sec: 13.11 - lr: 0.010000\n",
+      "2021-09-21 21:11:36,435 epoch 9 - iter 20/41 - loss 0.30949550 - samples/sec: 6.87 - lr: 0.010000\n",
+      "2021-09-21 21:11:36,771 epoch 9 - iter 24/41 - loss 0.27828685 - samples/sec: 11.93 - lr: 0.010000\n",
+      "2021-09-21 21:11:37,110 epoch 9 - iter 28/41 - loss 0.30913582 - samples/sec: 11.84 - lr: 0.010000\n",
+      "2021-09-21 21:11:37,499 epoch 9 - iter 32/41 - loss 0.34486198 - samples/sec: 10.29 - lr: 0.010000\n",
+      "2021-09-21 21:11:37,822 epoch 9 - iter 36/41 - loss 0.32758144 - samples/sec: 12.40 - lr: 0.010000\n",
+      "2021-09-21 21:11:38,133 epoch 9 - iter 40/41 - loss 0.32281772 - samples/sec: 12.90 - lr: 0.010000\n",
+      "2021-09-21 21:11:38,201 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:38,201 EPOCH 9 done: loss 0.3157 - lr 0.0100000\n",
+      "2021-09-21 21:11:38,374 DEV : loss 0.6774414777755737 - score 0.5\n",
+      "2021-09-21 21:11:38,375 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:11:42,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:42,710 epoch 10 - iter 4/41 - loss 0.01367150 - samples/sec: 16.13 - lr: 0.010000\n",
+      "2021-09-21 21:11:42,984 epoch 10 - iter 8/41 - loss 0.11026432 - samples/sec: 14.60 - lr: 0.010000\n",
+      "2021-09-21 21:11:43,190 epoch 10 - iter 12/41 - loss 0.08270087 - samples/sec: 19.53 - lr: 0.010000\n",
+      "2021-09-21 21:11:43,596 epoch 10 - iter 16/41 - loss 0.16617363 - samples/sec: 9.87 - lr: 0.010000\n",
+      "2021-09-21 21:11:43,860 epoch 10 - iter 20/41 - loss 0.16185555 - samples/sec: 15.16 - lr: 0.010000\n",
+      "2021-09-21 21:11:44,321 epoch 10 - iter 24/41 - loss 0.16480723 - samples/sec: 8.69 - lr: 0.010000\n",
+      "2021-09-21 21:11:44,740 epoch 10 - iter 28/41 - loss 0.18749716 - samples/sec: 9.55 - lr: 0.010000\n",
+      "2021-09-21 21:11:45,047 epoch 10 - iter 32/41 - loss 0.21482838 - samples/sec: 13.05 - lr: 0.010000\n",
+      "2021-09-21 21:11:45,383 epoch 10 - iter 36/41 - loss 0.22648291 - samples/sec: 11.95 - lr: 0.010000\n",
+      "2021-09-21 21:11:45,698 epoch 10 - iter 40/41 - loss 0.25211199 - samples/sec: 12.68 - lr: 0.010000\n",
+      "2021-09-21 21:11:45,790 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:11:45,791 EPOCH 10 done: loss 0.2461 - lr 0.0100000\n",
+      "2021-09-21 21:11:45,967 DEV : loss 0.533523678779602 - score 0.5\n",
+      "2021-09-21 21:11:45,968 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:12:02,049 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:02,049 Testing using best model ...\n",
+      "2021-09-21 21:12:02,051 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:40:56,341 \t0.2\n",
-      "2021-09-08 01:40:56,341 \n",
+      "2021-09-21 21:12:07,875 \t0.4\n",
+      "2021-09-21 21:12:07,875 \n",
       "Results:\n",
-      "- F-score (micro) 0.2\n",
-      "- F-score (macro) 0.2\n",
-      "- Accuracy 0.2\n",
+      "- F-score (micro) 0.4\n",
+      "- F-score (macro) 0.3333\n",
+      "- Accuracy 0.4\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "       awful     1.0000    1.0000    1.0000         1\n",
-      "         bad     0.0000    0.0000    0.0000         2\n",
-      "     neutral     0.0000    0.0000    0.0000         0\n",
-      "        good     0.0000    0.0000    0.0000         1\n",
-      "       great     0.0000    0.0000    0.0000         1\n",
+      "       awful     0.0000    0.0000    0.0000         1\n",
+      "         bad     0.0000    0.0000    0.0000         0\n",
+      "     neutral     0.0000    0.0000    0.0000         1\n",
+      "        good     1.0000    1.0000    1.0000         1\n",
+      "       great     1.0000    0.5000    0.6667         2\n",
       "\n",
-      "   micro avg     0.2000    0.2000    0.2000         5\n",
-      "   macro avg     0.2000    0.2000    0.2000         5\n",
-      "weighted avg     0.2000    0.2000    0.2000         5\n",
-      " samples avg     0.2000    0.2000    0.2000         5\n",
+      "   micro avg     0.4000    0.4000    0.4000         5\n",
+      "   macro avg     0.4000    0.3000    0.3333         5\n",
+      "weighted avg     0.6000    0.4000    0.4667         5\n",
+      " samples avg     0.4000    0.4000    0.4000         5\n",
       "\n",
-      "2021-09-08 01:40:56,342 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:39,131 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:12:07,875 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:54,522 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:41:43,891 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:12:58,864 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 14741.59it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 13109.87it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:41:43,896 [b'awful', b'bad', b'neutral', b'good', b'great']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:41:44,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:44,321 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:12:58,869 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 21:12:58,897 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:58,900 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2624,217 +2595,225 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:41:44,321 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:44,322 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:41:44,322 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:44,322 Parameters:\n",
-      "2021-09-08 01:41:44,323  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:41:44,323  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:41:44,323  - patience: \"3\"\n",
-      "2021-09-08 01:41:44,323  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:41:44,324  - max_epochs: \"10\"\n",
-      "2021-09-08 01:41:44,324  - shuffle: \"True\"\n",
-      "2021-09-08 01:41:44,324  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:41:44,324  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:41:44,325 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:44,325 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:41:44,325 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:44,326 Device: cuda:0\n",
-      "2021-09-08 01:41:44,326 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:44,326 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:41:44,864 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:45,131 epoch 1 - iter 4/41 - loss 0.08560709 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 01:41:45,344 epoch 1 - iter 8/41 - loss 0.21407473 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 01:41:45,629 epoch 1 - iter 12/41 - loss 0.23665146 - samples/sec: 14.04 - lr: 0.020000\n",
-      "2021-09-08 01:41:46,063 epoch 1 - iter 16/41 - loss 0.35281370 - samples/sec: 9.24 - lr: 0.020000\n",
-      "2021-09-08 01:41:46,352 epoch 1 - iter 20/41 - loss 0.62531365 - samples/sec: 13.87 - lr: 0.020000\n",
-      "2021-09-08 01:41:46,625 epoch 1 - iter 24/41 - loss 0.63602334 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 01:41:46,936 epoch 1 - iter 28/41 - loss 0.63408638 - samples/sec: 12.88 - lr: 0.020000\n",
-      "2021-09-08 01:41:47,340 epoch 1 - iter 32/41 - loss 0.62878638 - samples/sec: 9.92 - lr: 0.020000\n",
-      "2021-09-08 01:41:47,746 epoch 1 - iter 36/41 - loss 0.62281513 - samples/sec: 9.87 - lr: 0.020000\n",
-      "2021-09-08 01:41:48,078 epoch 1 - iter 40/41 - loss 0.66593983 - samples/sec: 12.06 - lr: 0.020000\n",
-      "2021-09-08 01:41:48,155 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:48,155 EPOCH 1 done: loss 0.6671 - lr 0.0200000\n",
-      "2021-09-08 01:41:48,273 DEV : loss 0.7215302586555481 - score 0.0\n",
-      "2021-09-08 01:41:48,274 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:12:58,900 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:58,900 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:12:58,901 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:58,901 Parameters:\n",
+      "2021-09-21 21:12:58,901  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:12:58,901  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:12:58,902  - patience: \"3\"\n",
+      "2021-09-21 21:12:58,902  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:12:58,902  - max_epochs: \"10\"\n",
+      "2021-09-21 21:12:58,903  - shuffle: \"True\"\n",
+      "2021-09-21 21:12:58,903  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:12:58,903  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:12:58,903 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:58,904 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:12:58,904 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:58,904 Device: cuda:0\n",
+      "2021-09-21 21:12:58,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:12:58,905 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:12:58,911 ----------------------------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:12:59,230 epoch 1 - iter 4/41 - loss 0.76190443 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 21:12:59,543 epoch 1 - iter 8/41 - loss 0.47850460 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 21:12:59,858 epoch 1 - iter 12/41 - loss 0.60590922 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 21:13:00,559 epoch 1 - iter 16/41 - loss 0.66834109 - samples/sec: 5.71 - lr: 0.020000\n",
+      "2021-09-21 21:13:00,980 epoch 1 - iter 20/41 - loss 0.72540479 - samples/sec: 9.52 - lr: 0.020000\n",
+      "2021-09-21 21:13:01,307 epoch 1 - iter 24/41 - loss 0.64153532 - samples/sec: 12.27 - lr: 0.020000\n",
+      "2021-09-21 21:13:01,771 epoch 1 - iter 28/41 - loss 0.63591913 - samples/sec: 8.62 - lr: 0.020000\n",
+      "2021-09-21 21:13:02,238 epoch 1 - iter 32/41 - loss 0.62301690 - samples/sec: 8.59 - lr: 0.020000\n",
+      "2021-09-21 21:13:02,789 epoch 1 - iter 36/41 - loss 0.64779448 - samples/sec: 7.27 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,260 epoch 1 - iter 40/41 - loss 0.66523860 - samples/sec: 8.50 - lr: 0.020000\n",
+      "2021-09-21 21:13:03,349 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:03,349 EPOCH 1 done: loss 0.6598 - lr 0.0200000\n",
+      "2021-09-21 21:13:03,586 DEV : loss 0.5849138498306274 - score 0.5\n",
+      "2021-09-21 21:13:03,587 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:13:07,753 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:08,244 epoch 2 - iter 4/41 - loss 0.80466317 - samples/sec: 8.50 - lr: 0.020000\n",
+      "2021-09-21 21:13:08,908 epoch 2 - iter 8/41 - loss 0.76429330 - samples/sec: 6.04 - lr: 0.020000\n",
+      "2021-09-21 21:13:09,487 epoch 2 - iter 12/41 - loss 0.66210592 - samples/sec: 6.91 - lr: 0.020000\n",
+      "2021-09-21 21:13:10,074 epoch 2 - iter 16/41 - loss 0.61762713 - samples/sec: 6.83 - lr: 0.020000\n",
+      "2021-09-21 21:13:10,538 epoch 2 - iter 20/41 - loss 0.66122246 - samples/sec: 8.63 - lr: 0.020000\n",
+      "2021-09-21 21:13:10,908 epoch 2 - iter 24/41 - loss 0.64322416 - samples/sec: 10.81 - lr: 0.020000\n",
+      "2021-09-21 21:13:11,445 epoch 2 - iter 28/41 - loss 0.63370413 - samples/sec: 7.46 - lr: 0.020000\n",
+      "2021-09-21 21:13:12,120 epoch 2 - iter 32/41 - loss 0.60990167 - samples/sec: 5.93 - lr: 0.020000\n",
+      "2021-09-21 21:13:12,506 epoch 2 - iter 36/41 - loss 0.57603595 - samples/sec: 10.36 - lr: 0.020000\n",
+      "2021-09-21 21:13:13,115 epoch 2 - iter 40/41 - loss 0.59315524 - samples/sec: 6.58 - lr: 0.020000\n",
+      "2021-09-21 21:13:13,220 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:13,220 EPOCH 2 done: loss 0.5917 - lr 0.0200000\n",
+      "2021-09-21 21:13:13,447 DEV : loss 0.424838662147522 - score 0.25\n",
+      "2021-09-21 21:13:13,448 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:13,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:13,810 epoch 3 - iter 4/41 - loss 0.53826309 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:13:14,294 epoch 3 - iter 8/41 - loss 0.47705432 - samples/sec: 8.27 - lr: 0.020000\n",
+      "2021-09-21 21:13:14,673 epoch 3 - iter 12/41 - loss 0.56092815 - samples/sec: 10.59 - lr: 0.020000\n",
+      "2021-09-21 21:13:15,104 epoch 3 - iter 16/41 - loss 0.56488096 - samples/sec: 9.29 - lr: 0.020000\n",
+      "2021-09-21 21:13:15,478 epoch 3 - iter 20/41 - loss 0.55161795 - samples/sec: 10.70 - lr: 0.020000\n",
+      "2021-09-21 21:13:15,931 epoch 3 - iter 24/41 - loss 0.50427589 - samples/sec: 8.84 - lr: 0.020000\n",
+      "2021-09-21 21:13:16,329 epoch 3 - iter 28/41 - loss 0.53639963 - samples/sec: 10.08 - lr: 0.020000\n",
+      "2021-09-21 21:13:16,855 epoch 3 - iter 32/41 - loss 0.53363653 - samples/sec: 7.61 - lr: 0.020000\n",
+      "2021-09-21 21:13:17,398 epoch 3 - iter 36/41 - loss 0.52053554 - samples/sec: 7.37 - lr: 0.020000\n",
+      "2021-09-21 21:13:18,020 epoch 3 - iter 40/41 - loss 0.55624823 - samples/sec: 6.44 - lr: 0.020000\n",
+      "2021-09-21 21:13:18,102 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:18,102 EPOCH 3 done: loss 0.5568 - lr 0.0200000\n",
+      "2021-09-21 21:13:18,273 DEV : loss 0.43495696783065796 - score 0.5\n",
+      "2021-09-21 21:13:18,276 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:13:23,114 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:23,892 epoch 4 - iter 4/41 - loss 0.25741147 - samples/sec: 5.38 - lr: 0.020000\n",
+      "2021-09-21 21:13:24,366 epoch 4 - iter 8/41 - loss 0.49946243 - samples/sec: 8.45 - lr: 0.020000\n",
+      "2021-09-21 21:13:24,832 epoch 4 - iter 12/41 - loss 0.40910411 - samples/sec: 8.60 - lr: 0.020000\n",
+      "2021-09-21 21:13:25,320 epoch 4 - iter 16/41 - loss 0.43320307 - samples/sec: 8.21 - lr: 0.020000\n",
+      "2021-09-21 21:13:25,810 epoch 4 - iter 20/41 - loss 0.41717300 - samples/sec: 8.16 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,415 epoch 4 - iter 24/41 - loss 0.39490283 - samples/sec: 6.62 - lr: 0.020000\n",
+      "2021-09-21 21:13:26,809 epoch 4 - iter 28/41 - loss 0.39859944 - samples/sec: 10.15 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,159 epoch 4 - iter 32/41 - loss 0.41612624 - samples/sec: 11.44 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,460 epoch 4 - iter 36/41 - loss 0.44374935 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,720 epoch 4 - iter 40/41 - loss 0.43470352 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 21:13:27,830 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:27,831 EPOCH 4 done: loss 0.4271 - lr 0.0200000\n",
+      "2021-09-21 21:13:28,033 DEV : loss 0.3858884572982788 - score 0.5\n",
+      "2021-09-21 21:13:28,034 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:41:52,470 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:52,894 epoch 2 - iter 4/41 - loss 0.66576102 - samples/sec: 9.96 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,150 epoch 2 - iter 8/41 - loss 0.66557936 - samples/sec: 15.67 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,449 epoch 2 - iter 12/41 - loss 0.68087238 - samples/sec: 13.43 - lr: 0.020000\n",
-      "2021-09-08 01:41:53,880 epoch 2 - iter 16/41 - loss 0.70541214 - samples/sec: 9.30 - lr: 0.020000\n",
-      "2021-09-08 01:41:54,273 epoch 2 - iter 20/41 - loss 0.71393940 - samples/sec: 10.19 - lr: 0.020000\n",
-      "2021-09-08 01:41:54,596 epoch 2 - iter 24/41 - loss 0.71851672 - samples/sec: 12.39 - lr: 0.020000\n",
-      "2021-09-08 01:41:54,860 epoch 2 - iter 28/41 - loss 0.72043363 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 01:41:55,151 epoch 2 - iter 32/41 - loss 0.72488813 - samples/sec: 13.79 - lr: 0.020000\n",
-      "2021-09-08 01:41:55,373 epoch 2 - iter 36/41 - loss 0.71569060 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:41:55,750 epoch 2 - iter 40/41 - loss 0.70995436 - samples/sec: 10.64 - lr: 0.020000\n",
-      "2021-09-08 01:41:55,823 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:41:55,824 EPOCH 2 done: loss 0.7102 - lr 0.0200000\n",
-      "2021-09-08 01:41:56,015 DEV : loss 0.5016753077507019 - score 0.75\n",
-      "2021-09-08 01:41:56,016 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:13:43,971 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:44,321 epoch 5 - iter 4/41 - loss 0.26317176 - samples/sec: 11.93 - lr: 0.020000\n",
+      "2021-09-21 21:13:44,578 epoch 5 - iter 8/41 - loss 0.44427706 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 21:13:44,905 epoch 5 - iter 12/41 - loss 0.38977070 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,295 epoch 5 - iter 16/41 - loss 0.46069990 - samples/sec: 10.27 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,606 epoch 5 - iter 20/41 - loss 0.42368435 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:13:45,817 epoch 5 - iter 24/41 - loss 0.44721920 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,237 epoch 5 - iter 28/41 - loss 0.46507396 - samples/sec: 9.52 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,604 epoch 5 - iter 32/41 - loss 0.46795221 - samples/sec: 10.91 - lr: 0.020000\n",
+      "2021-09-21 21:13:46,876 epoch 5 - iter 36/41 - loss 0.47771961 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,202 epoch 5 - iter 40/41 - loss 0.45801664 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 21:13:47,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:47,280 EPOCH 5 done: loss 0.4472 - lr 0.0200000\n",
+      "2021-09-21 21:13:47,423 DEV : loss 0.373312383890152 - score 0.5\n",
+      "2021-09-21 21:13:47,426 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:42:02,618 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:02,855 epoch 3 - iter 4/41 - loss 0.71856162 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 01:42:03,245 epoch 3 - iter 8/41 - loss 0.69309545 - samples/sec: 10.29 - lr: 0.020000\n",
-      "2021-09-08 01:42:03,447 epoch 3 - iter 12/41 - loss 0.68075311 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 01:42:03,726 epoch 3 - iter 16/41 - loss 0.67100157 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 01:42:03,985 epoch 3 - iter 20/41 - loss 0.67467445 - samples/sec: 15.53 - lr: 0.020000\n",
-      "2021-09-08 01:42:04,293 epoch 3 - iter 24/41 - loss 0.66918565 - samples/sec: 13.00 - lr: 0.020000\n",
-      "2021-09-08 01:42:04,541 epoch 3 - iter 28/41 - loss 0.67402640 - samples/sec: 16.19 - lr: 0.020000\n",
-      "2021-09-08 01:42:04,793 epoch 3 - iter 32/41 - loss 0.68848673 - samples/sec: 15.86 - lr: 0.020000\n",
-      "2021-09-08 01:42:05,073 epoch 3 - iter 36/41 - loss 0.68407887 - samples/sec: 14.31 - lr: 0.020000\n",
-      "2021-09-08 01:42:05,336 epoch 3 - iter 40/41 - loss 0.68038949 - samples/sec: 15.28 - lr: 0.020000\n",
-      "2021-09-08 01:42:05,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:05,416 EPOCH 3 done: loss 0.6859 - lr 0.0200000\n",
-      "2021-09-08 01:42:05,554 DEV : loss 0.7299845814704895 - score 0.5\n",
-      "2021-09-08 01:42:05,555 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:42:05,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:05,997 epoch 4 - iter 4/41 - loss 0.71748330 - samples/sec: 9.40 - lr: 0.020000\n",
-      "2021-09-08 01:42:06,239 epoch 4 - iter 8/41 - loss 0.67790224 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 01:42:06,552 epoch 4 - iter 12/41 - loss 0.65672127 - samples/sec: 12.80 - lr: 0.020000\n",
-      "2021-09-08 01:42:06,786 epoch 4 - iter 16/41 - loss 0.69128922 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 01:42:07,048 epoch 4 - iter 20/41 - loss 0.68523585 - samples/sec: 15.30 - lr: 0.020000\n",
-      "2021-09-08 01:42:07,442 epoch 4 - iter 24/41 - loss 0.68529422 - samples/sec: 10.16 - lr: 0.020000\n",
-      "2021-09-08 01:42:07,752 epoch 4 - iter 28/41 - loss 0.66957117 - samples/sec: 12.92 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,008 epoch 4 - iter 32/41 - loss 0.68359153 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,268 epoch 4 - iter 36/41 - loss 0.67912473 - samples/sec: 15.43 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,549 epoch 4 - iter 40/41 - loss 0.67435738 - samples/sec: 14.27 - lr: 0.020000\n",
-      "2021-09-08 01:42:08,632 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:08,632 EPOCH 4 done: loss 0.6743 - lr 0.0200000\n",
-      "2021-09-08 01:42:08,725 DEV : loss 0.5140536427497864 - score 0.25\n",
-      "2021-09-08 01:42:08,726 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:42:08,728 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:08,983 epoch 5 - iter 4/41 - loss 0.72319062 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:42:09,329 epoch 5 - iter 8/41 - loss 0.70947555 - samples/sec: 11.55 - lr: 0.020000\n",
-      "2021-09-08 01:42:09,549 epoch 5 - iter 12/41 - loss 0.71259966 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 01:42:09,882 epoch 5 - iter 16/41 - loss 0.69628631 - samples/sec: 12.04 - lr: 0.020000\n",
-      "2021-09-08 01:42:10,185 epoch 5 - iter 20/41 - loss 0.69211223 - samples/sec: 13.21 - lr: 0.020000\n",
-      "2021-09-08 01:42:10,560 epoch 5 - iter 24/41 - loss 0.68803176 - samples/sec: 10.70 - lr: 0.020000\n",
-      "2021-09-08 01:42:10,881 epoch 5 - iter 28/41 - loss 0.68296345 - samples/sec: 12.47 - lr: 0.020000\n",
-      "2021-09-08 01:42:11,259 epoch 5 - iter 32/41 - loss 0.67985798 - samples/sec: 10.61 - lr: 0.020000\n",
-      "2021-09-08 01:42:11,503 epoch 5 - iter 36/41 - loss 0.67401781 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:42:11,716 epoch 5 - iter 40/41 - loss 0.66927104 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:42:11,784 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:13:51,247 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:51,633 epoch 6 - iter 4/41 - loss 0.12175419 - samples/sec: 10.84 - lr: 0.020000\n",
+      "2021-09-21 21:13:51,959 epoch 6 - iter 8/41 - loss 0.18235055 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 21:13:52,331 epoch 6 - iter 12/41 - loss 0.33109266 - samples/sec: 10.78 - lr: 0.020000\n",
+      "2021-09-21 21:13:52,641 epoch 6 - iter 16/41 - loss 0.45221650 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:13:52,874 epoch 6 - iter 20/41 - loss 0.40007665 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:13:53,196 epoch 6 - iter 24/41 - loss 0.38392461 - samples/sec: 12.44 - lr: 0.020000\n",
+      "2021-09-21 21:13:53,560 epoch 6 - iter 28/41 - loss 0.37239835 - samples/sec: 11.01 - lr: 0.020000\n",
+      "2021-09-21 21:13:53,858 epoch 6 - iter 32/41 - loss 0.37933116 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 21:13:54,247 epoch 6 - iter 36/41 - loss 0.41526937 - samples/sec: 10.30 - lr: 0.020000\n",
+      "2021-09-21 21:13:54,553 epoch 6 - iter 40/41 - loss 0.40736877 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:13:54,685 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:42:11,785 EPOCH 5 done: loss 0.6697 - lr 0.0200000\n",
-      "2021-09-08 01:42:12,686 DEV : loss 0.567592978477478 - score 0.25\n",
-      "2021-09-08 01:42:12,687 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:42:12,697 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:12,982 epoch 6 - iter 4/41 - loss 0.63531411 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,279 epoch 6 - iter 8/41 - loss 0.64239651 - samples/sec: 13.53 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,568 epoch 6 - iter 12/41 - loss 0.64481566 - samples/sec: 13.86 - lr: 0.020000\n",
-      "2021-09-08 01:42:13,940 epoch 6 - iter 16/41 - loss 0.64645534 - samples/sec: 10.77 - lr: 0.020000\n",
-      "2021-09-08 01:42:14,185 epoch 6 - iter 20/41 - loss 0.65084834 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 01:42:14,550 epoch 6 - iter 24/41 - loss 0.66363572 - samples/sec: 10.99 - lr: 0.020000\n",
-      "2021-09-08 01:42:14,928 epoch 6 - iter 28/41 - loss 0.67179550 - samples/sec: 10.58 - lr: 0.020000\n",
-      "2021-09-08 01:42:15,284 epoch 6 - iter 32/41 - loss 0.66571971 - samples/sec: 11.27 - lr: 0.020000\n",
-      "2021-09-08 01:42:15,502 epoch 6 - iter 36/41 - loss 0.66698223 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 01:42:15,727 epoch 6 - iter 40/41 - loss 0.66912124 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 01:42:15,783 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:15,784 EPOCH 6 done: loss 0.6708 - lr 0.0200000\n",
-      "2021-09-08 01:42:16,009 DEV : loss 0.5019926428794861 - score 0.25\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:42:16,010 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:42:16,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:16,359 epoch 7 - iter 4/41 - loss 0.70147166 - samples/sec: 16.61 - lr: 0.010000\n",
-      "2021-09-08 01:42:16,596 epoch 7 - iter 8/41 - loss 0.67075630 - samples/sec: 16.94 - lr: 0.010000\n",
-      "2021-09-08 01:42:16,842 epoch 7 - iter 12/41 - loss 0.66174009 - samples/sec: 16.27 - lr: 0.010000\n",
-      "2021-09-08 01:42:17,119 epoch 7 - iter 16/41 - loss 0.66121786 - samples/sec: 14.50 - lr: 0.010000\n",
-      "2021-09-08 01:42:17,328 epoch 7 - iter 20/41 - loss 0.66794578 - samples/sec: 19.16 - lr: 0.010000\n",
-      "2021-09-08 01:42:17,662 epoch 7 - iter 24/41 - loss 0.66865682 - samples/sec: 11.99 - lr: 0.010000\n",
-      "2021-09-08 01:42:18,013 epoch 7 - iter 28/41 - loss 0.66765140 - samples/sec: 11.42 - lr: 0.010000\n",
-      "2021-09-08 01:42:18,229 epoch 7 - iter 32/41 - loss 0.65585557 - samples/sec: 18.62 - lr: 0.010000\n",
-      "2021-09-08 01:42:19,041 epoch 7 - iter 36/41 - loss 0.65093331 - samples/sec: 7.18 - lr: 0.010000\n",
-      "2021-09-08 01:42:19,384 epoch 7 - iter 40/41 - loss 0.65470454 - samples/sec: 11.71 - lr: 0.010000\n",
-      "2021-09-08 01:42:19,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:19,464 EPOCH 7 done: loss 0.6536 - lr 0.0100000\n",
-      "2021-09-08 01:42:19,580 DEV : loss 0.5622895956039429 - score 0.25\n",
-      "2021-09-08 01:42:19,581 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:42:19,585 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:20,068 epoch 8 - iter 4/41 - loss 0.59160411 - samples/sec: 8.95 - lr: 0.010000\n",
-      "2021-09-08 01:42:20,318 epoch 8 - iter 8/41 - loss 0.59541335 - samples/sec: 16.04 - lr: 0.010000\n",
-      "2021-09-08 01:42:20,676 epoch 8 - iter 12/41 - loss 0.61599593 - samples/sec: 11.19 - lr: 0.010000\n",
-      "2021-09-08 01:42:20,943 epoch 8 - iter 16/41 - loss 0.62416546 - samples/sec: 15.03 - lr: 0.010000\n",
-      "2021-09-08 01:42:21,205 epoch 8 - iter 20/41 - loss 0.64017282 - samples/sec: 15.29 - lr: 0.010000\n",
-      "2021-09-08 01:42:21,476 epoch 8 - iter 24/41 - loss 0.64114076 - samples/sec: 14.82 - lr: 0.010000\n",
-      "2021-09-08 01:42:21,706 epoch 8 - iter 28/41 - loss 0.64164583 - samples/sec: 17.45 - lr: 0.010000\n",
-      "2021-09-08 01:42:21,903 epoch 8 - iter 32/41 - loss 0.64426577 - samples/sec: 20.36 - lr: 0.010000\n",
-      "2021-09-08 01:42:22,198 epoch 8 - iter 36/41 - loss 0.64894210 - samples/sec: 13.60 - lr: 0.010000\n",
-      "2021-09-08 01:42:22,417 epoch 8 - iter 40/41 - loss 0.65035666 - samples/sec: 18.26 - lr: 0.010000\n",
-      "2021-09-08 01:42:22,524 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:22,524 EPOCH 8 done: loss 0.6487 - lr 0.0100000\n",
-      "2021-09-08 01:42:22,638 DEV : loss 0.5357226729393005 - score 0.5\n",
-      "2021-09-08 01:42:22,639 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:42:22,642 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:22,968 epoch 9 - iter 4/41 - loss 0.63987370 - samples/sec: 12.93 - lr: 0.010000\n",
-      "2021-09-08 01:42:23,334 epoch 9 - iter 8/41 - loss 0.63494664 - samples/sec: 10.93 - lr: 0.010000\n",
-      "2021-09-08 01:42:23,599 epoch 9 - iter 12/41 - loss 0.66697706 - samples/sec: 15.13 - lr: 0.010000\n",
-      "2021-09-08 01:42:23,876 epoch 9 - iter 16/41 - loss 0.65929379 - samples/sec: 14.49 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,135 epoch 9 - iter 20/41 - loss 0.66597155 - samples/sec: 15.46 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,388 epoch 9 - iter 24/41 - loss 0.66425847 - samples/sec: 15.91 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,679 epoch 9 - iter 28/41 - loss 0.65999410 - samples/sec: 13.78 - lr: 0.010000\n",
-      "2021-09-08 01:42:24,976 epoch 9 - iter 32/41 - loss 0.65886030 - samples/sec: 13.50 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,181 epoch 9 - iter 36/41 - loss 0.66259955 - samples/sec: 19.51 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,496 epoch 9 - iter 40/41 - loss 0.66224325 - samples/sec: 12.75 - lr: 0.010000\n",
-      "2021-09-08 01:42:25,572 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:25,572 EPOCH 9 done: loss 0.6610 - lr 0.0100000\n",
-      "2021-09-08 01:42:25,689 DEV : loss 0.5427137613296509 - score 0.25\n",
-      "2021-09-08 01:42:25,690 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:42:25,692 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:26,001 epoch 10 - iter 4/41 - loss 0.67417829 - samples/sec: 14.05 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,298 epoch 10 - iter 8/41 - loss 0.66426653 - samples/sec: 13.47 - lr: 0.010000\n",
-      "2021-09-08 01:42:26,727 epoch 10 - iter 12/41 - loss 0.66385032 - samples/sec: 9.35 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,032 epoch 10 - iter 16/41 - loss 0.66112129 - samples/sec: 13.11 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,306 epoch 10 - iter 20/41 - loss 0.65003130 - samples/sec: 14.66 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,610 epoch 10 - iter 24/41 - loss 0.65204231 - samples/sec: 13.19 - lr: 0.010000\n",
-      "2021-09-08 01:42:27,906 epoch 10 - iter 28/41 - loss 0.64890975 - samples/sec: 13.53 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,144 epoch 10 - iter 32/41 - loss 0.64340840 - samples/sec: 16.87 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,497 epoch 10 - iter 36/41 - loss 0.64544309 - samples/sec: 11.36 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,840 epoch 10 - iter 40/41 - loss 0.64541604 - samples/sec: 11.69 - lr: 0.010000\n",
-      "2021-09-08 01:42:28,907 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:28,907 EPOCH 10 done: loss 0.6456 - lr 0.0100000\n",
-      "2021-09-08 01:42:29,096 DEV : loss 0.5309613943099976 - score 0.25\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:42:29,097 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:42:41,551 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:42:41,552 Testing using best model ...\n",
-      "2021-09-08 01:42:41,554 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:13:54,686 EPOCH 6 done: loss 0.3982 - lr 0.0200000\n",
+      "2021-09-21 21:13:54,843 DEV : loss 0.420066773891449 - score 0.25\n",
+      "2021-09-21 21:13:54,844 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:13:54,846 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:55,331 epoch 7 - iter 4/41 - loss 0.16708862 - samples/sec: 8.85 - lr: 0.020000\n",
+      "2021-09-21 21:13:55,654 epoch 7 - iter 8/41 - loss 0.38658724 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 21:13:56,161 epoch 7 - iter 12/41 - loss 0.33177241 - samples/sec: 7.90 - lr: 0.020000\n",
+      "2021-09-21 21:13:56,415 epoch 7 - iter 16/41 - loss 0.27455357 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 21:13:56,780 epoch 7 - iter 20/41 - loss 0.31228949 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 21:13:57,076 epoch 7 - iter 24/41 - loss 0.34649051 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 21:13:57,525 epoch 7 - iter 28/41 - loss 0.33828555 - samples/sec: 8.91 - lr: 0.020000\n",
+      "2021-09-21 21:13:57,852 epoch 7 - iter 32/41 - loss 0.31706712 - samples/sec: 12.27 - lr: 0.020000\n",
+      "2021-09-21 21:13:58,176 epoch 7 - iter 36/41 - loss 0.32420597 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 21:13:58,431 epoch 7 - iter 40/41 - loss 0.31463461 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 21:13:58,639 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:58,639 EPOCH 7 done: loss 0.3109 - lr 0.0200000\n",
+      "2021-09-21 21:13:58,877 DEV : loss 0.45094966888427734 - score 0.25\n",
+      "2021-09-21 21:13:58,878 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:13:58,959 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:13:59,399 epoch 8 - iter 4/41 - loss 0.16188259 - samples/sec: 9.55 - lr: 0.020000\n",
+      "2021-09-21 21:13:59,799 epoch 8 - iter 8/41 - loss 0.27573961 - samples/sec: 10.03 - lr: 0.020000\n",
+      "2021-09-21 21:14:00,168 epoch 8 - iter 12/41 - loss 0.25293449 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 21:14:00,592 epoch 8 - iter 16/41 - loss 0.24498558 - samples/sec: 9.44 - lr: 0.020000\n",
+      "2021-09-21 21:14:00,966 epoch 8 - iter 20/41 - loss 0.20872707 - samples/sec: 10.70 - lr: 0.020000\n",
+      "2021-09-21 21:14:01,312 epoch 8 - iter 24/41 - loss 0.25163235 - samples/sec: 11.61 - lr: 0.020000\n",
+      "2021-09-21 21:14:01,705 epoch 8 - iter 28/41 - loss 0.28314967 - samples/sec: 10.19 - lr: 0.020000\n",
+      "2021-09-21 21:14:02,046 epoch 8 - iter 32/41 - loss 0.28372030 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 21:14:02,434 epoch 8 - iter 36/41 - loss 0.34190546 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:14:02,705 epoch 8 - iter 40/41 - loss 0.32631873 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 21:14:02,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:02,929 EPOCH 8 done: loss 0.3640 - lr 0.0200000\n",
+      "2021-09-21 21:14:03,082 DEV : loss 0.4111178517341614 - score 0.5\n",
+      "2021-09-21 21:14:03,085 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:14:03,086 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:03,419 epoch 9 - iter 4/41 - loss 0.01675689 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 21:14:03,772 epoch 9 - iter 8/41 - loss 0.09945246 - samples/sec: 11.35 - lr: 0.020000\n",
+      "2021-09-21 21:14:04,088 epoch 9 - iter 12/41 - loss 0.15088975 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 21:14:04,443 epoch 9 - iter 16/41 - loss 0.18185570 - samples/sec: 11.28 - lr: 0.020000\n",
+      "2021-09-21 21:14:04,834 epoch 9 - iter 20/41 - loss 0.18843252 - samples/sec: 10.23 - lr: 0.020000\n",
+      "2021-09-21 21:14:05,155 epoch 9 - iter 24/41 - loss 0.20313868 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 21:14:05,435 epoch 9 - iter 28/41 - loss 0.20997720 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 21:14:05,706 epoch 9 - iter 32/41 - loss 0.25300480 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:14:05,963 epoch 9 - iter 36/41 - loss 0.22563014 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 21:14:06,186 epoch 9 - iter 40/41 - loss 0.20699117 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:14:06,242 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,242 EPOCH 9 done: loss 0.2021 - lr 0.0200000\n",
+      "2021-09-21 21:14:06,434 DEV : loss 0.628145694732666 - score 0.25\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:14:06,435 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:14:06,511 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:06,779 epoch 10 - iter 4/41 - loss 0.15558516 - samples/sec: 16.16 - lr: 0.010000\n",
+      "2021-09-21 21:14:07,011 epoch 10 - iter 8/41 - loss 0.08608644 - samples/sec: 17.29 - lr: 0.010000\n",
+      "2021-09-21 21:14:07,415 epoch 10 - iter 12/41 - loss 0.05987345 - samples/sec: 9.91 - lr: 0.010000\n",
+      "2021-09-21 21:14:07,789 epoch 10 - iter 16/41 - loss 0.07495071 - samples/sec: 10.71 - lr: 0.010000\n",
+      "2021-09-21 21:14:08,228 epoch 10 - iter 20/41 - loss 0.11754088 - samples/sec: 9.12 - lr: 0.010000\n",
+      "2021-09-21 21:14:08,740 epoch 10 - iter 24/41 - loss 0.09942952 - samples/sec: 7.82 - lr: 0.010000\n",
+      "2021-09-21 21:14:09,099 epoch 10 - iter 28/41 - loss 0.13677906 - samples/sec: 11.15 - lr: 0.010000\n",
+      "2021-09-21 21:14:09,417 epoch 10 - iter 32/41 - loss 0.14909065 - samples/sec: 12.62 - lr: 0.010000\n",
+      "2021-09-21 21:14:09,634 epoch 10 - iter 36/41 - loss 0.13714027 - samples/sec: 18.47 - lr: 0.010000\n",
+      "2021-09-21 21:14:09,984 epoch 10 - iter 40/41 - loss 0.12753907 - samples/sec: 11.43 - lr: 0.010000\n",
+      "2021-09-21 21:14:10,046 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:10,046 EPOCH 10 done: loss 0.1249 - lr 0.0100000\n",
+      "2021-09-21 21:14:10,286 DEV : loss 0.6183924078941345 - score 0.0\n",
+      "2021-09-21 21:14:10,287 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:14:18,947 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:14:18,948 Testing using best model ...\n",
+      "2021-09-21 21:14:18,949 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:42:47,567 \t0.4\n",
-      "2021-09-08 01:42:47,568 \n",
+      "2021-09-21 21:14:24,200 \t0.4\n",
+      "2021-09-21 21:14:24,200 \n",
       "Results:\n",
       "- F-score (micro) 0.4\n",
-      "- F-score (macro) 0.2\n",
+      "- F-score (macro) 0.16\n",
       "- Accuracy 0.4\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "       awful     0.0000    0.0000    0.0000         0\n",
-      "         bad     0.5000    0.5000    0.5000         2\n",
-      "     neutral     0.0000    0.0000    0.0000         0\n",
+      "       awful     0.0000    0.0000    0.0000         1\n",
+      "         bad     0.0000    0.0000    0.0000         0\n",
+      "     neutral     0.0000    0.0000    0.0000         1\n",
       "        good     0.0000    0.0000    0.0000         1\n",
-      "       great     0.5000    0.5000    0.5000         2\n",
+      "       great     0.6667    1.0000    0.8000         2\n",
       "\n",
       "   micro avg     0.4000    0.4000    0.4000         5\n",
-      "   macro avg     0.2000    0.2000    0.2000         5\n",
-      "weighted avg     0.4000    0.4000    0.4000         5\n",
+      "   macro avg     0.1333    0.2000    0.1600         5\n",
+      "weighted avg     0.2667    0.4000    0.3200         5\n",
       " samples avg     0.4000    0.4000    0.4000         5\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:42:47,568 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.32715447154471544\n"
+      "\n",
+      "2021-09-21 21:14:24,201 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.38\n"
      ]
     }
    ],
@@ -2902,11 +2881,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "0c4025f0",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.3975609756097561, 0.36666666666666664, 0.3642276422764228, 0.36585365853658536, 0.4056910569105691]\n",
+      "0.017861031841897256\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -2918,7 +2909,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2926,25 +2917,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:33,636 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:15:18,363 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:43:37,656 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:15:23,691 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 14522.99it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 9380.04it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:37,661 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 01:43:37,791 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:37,793 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:15:23,698 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
+      "2021-09-21 21:15:23,840 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:23,842 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3257,24 +3248,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:37,794 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:37,794 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:43:37,794 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:37,795 Parameters:\n",
-      "2021-09-08 01:43:37,795  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:43:37,795  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:43:37,796  - patience: \"3\"\n",
-      "2021-09-08 01:43:37,796  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:43:37,796  - max_epochs: \"10\"\n",
-      "2021-09-08 01:43:37,796  - shuffle: \"True\"\n",
-      "2021-09-08 01:43:37,797  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:43:37,797  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:43:37,797 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:37,797 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:43:37,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:37,798 Device: cuda:0\n",
-      "2021-09-08 01:43:37,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:37,799 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:15:23,843 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:23,843 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:15:23,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:23,844 Parameters:\n",
+      "2021-09-21 21:15:23,844  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:15:23,845  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:15:23,846  - patience: \"3\"\n",
+      "2021-09-21 21:15:23,846  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:15:23,847  - max_epochs: \"10\"\n",
+      "2021-09-21 21:15:23,847  - shuffle: \"True\"\n",
+      "2021-09-21 21:15:23,848  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:15:23,848  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:15:23,849 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:23,849 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:15:23,850 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:23,850 Device: cuda:0\n",
+      "2021-09-21 21:15:23,851 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:23,851 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -3288,212 +3279,223 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:43:37,981 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:38,231 epoch 1 - iter 4/41 - loss 0.76494720 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 01:43:38,626 epoch 1 - iter 8/41 - loss 0.65313135 - samples/sec: 10.17 - lr: 0.020000\n",
-      "2021-09-08 01:43:38,865 epoch 1 - iter 12/41 - loss 0.63531979 - samples/sec: 16.78 - lr: 0.020000\n",
-      "2021-09-08 01:43:39,205 epoch 1 - iter 16/41 - loss 0.61517182 - samples/sec: 11.79 - lr: 0.020000\n",
-      "2021-09-08 01:43:39,555 epoch 1 - iter 20/41 - loss 0.68165234 - samples/sec: 11.44 - lr: 0.020000\n",
-      "2021-09-08 01:43:39,873 epoch 1 - iter 24/41 - loss 0.66665104 - samples/sec: 12.64 - lr: 0.020000\n",
-      "2021-09-08 01:43:40,199 epoch 1 - iter 28/41 - loss 0.68847016 - samples/sec: 12.30 - lr: 0.020000\n",
-      "2021-09-08 01:43:40,495 epoch 1 - iter 32/41 - loss 0.65652500 - samples/sec: 13.56 - lr: 0.020000\n",
-      "2021-09-08 01:43:40,746 epoch 1 - iter 36/41 - loss 0.68994621 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 01:43:40,971 epoch 1 - iter 40/41 - loss 0.67909129 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 01:43:41,090 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:41,090 EPOCH 1 done: loss 0.6711 - lr 0.0200000\n",
-      "2021-09-08 01:43:41,210 DEV : loss 0.7421371936798096 - score 0.0\n",
-      "2021-09-08 01:43:41,211 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:43:45,147 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:45,543 epoch 2 - iter 4/41 - loss 0.92075674 - samples/sec: 10.58 - lr: 0.020000\n",
-      "2021-09-08 01:43:45,759 epoch 2 - iter 8/41 - loss 0.75984355 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 01:43:46,145 epoch 2 - iter 12/41 - loss 0.71137698 - samples/sec: 10.38 - lr: 0.020000\n",
-      "2021-09-08 01:43:46,531 epoch 2 - iter 16/41 - loss 0.68847418 - samples/sec: 10.39 - lr: 0.020000\n",
-      "2021-09-08 01:43:46,928 epoch 2 - iter 20/41 - loss 0.68105116 - samples/sec: 10.08 - lr: 0.020000\n",
-      "2021-09-08 01:43:47,168 epoch 2 - iter 24/41 - loss 0.65602952 - samples/sec: 16.77 - lr: 0.020000\n",
-      "2021-09-08 01:43:47,403 epoch 2 - iter 28/41 - loss 0.65712311 - samples/sec: 17.02 - lr: 0.020000\n",
-      "2021-09-08 01:43:47,656 epoch 2 - iter 32/41 - loss 0.66441374 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 01:43:47,935 epoch 2 - iter 36/41 - loss 0.65969450 - samples/sec: 14.38 - lr: 0.020000\n",
-      "2021-09-08 01:43:48,158 epoch 2 - iter 40/41 - loss 0.64174121 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:43:48,317 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:48,318 EPOCH 2 done: loss 0.6383 - lr 0.0200000\n",
-      "2021-09-08 01:43:48,556 DEV : loss 0.6381513476371765 - score 0.0\n",
-      "2021-09-08 01:43:48,557 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:43:54,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:55,041 epoch 3 - iter 4/41 - loss 0.60115529 - samples/sec: 12.13 - lr: 0.020000\n",
-      "2021-09-08 01:43:55,329 epoch 3 - iter 8/41 - loss 0.60293176 - samples/sec: 13.93 - lr: 0.020000\n",
-      "2021-09-08 01:43:55,674 epoch 3 - iter 12/41 - loss 0.58972141 - samples/sec: 11.63 - lr: 0.020000\n",
-      "2021-09-08 01:43:55,934 epoch 3 - iter 16/41 - loss 0.60124588 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 01:43:56,273 epoch 3 - iter 20/41 - loss 0.59832040 - samples/sec: 11.80 - lr: 0.020000\n",
-      "2021-09-08 01:43:56,586 epoch 3 - iter 24/41 - loss 0.57883425 - samples/sec: 12.80 - lr: 0.020000\n",
-      "2021-09-08 01:43:56,850 epoch 3 - iter 28/41 - loss 0.56364099 - samples/sec: 15.23 - lr: 0.020000\n",
-      "2021-09-08 01:43:57,106 epoch 3 - iter 32/41 - loss 0.52517313 - samples/sec: 15.63 - lr: 0.020000\n",
-      "2021-09-08 01:43:57,339 epoch 3 - iter 36/41 - loss 0.53425855 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 01:43:57,707 epoch 3 - iter 40/41 - loss 0.53451198 - samples/sec: 10.88 - lr: 0.020000\n",
-      "2021-09-08 01:43:57,756 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:43:57,757 EPOCH 3 done: loss 0.5225 - lr 0.0200000\n",
-      "2021-09-08 01:43:57,857 DEV : loss 0.6031181812286377 - score 0.0\n",
-      "2021-09-08 01:43:57,859 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:44:02,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:02,376 epoch 4 - iter 4/41 - loss 0.62293041 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 01:44:02,575 epoch 4 - iter 8/41 - loss 0.63396633 - samples/sec: 20.13 - lr: 0.020000\n",
-      "2021-09-08 01:44:02,785 epoch 4 - iter 12/41 - loss 0.61034332 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:44:03,080 epoch 4 - iter 16/41 - loss 0.56070632 - samples/sec: 13.60 - lr: 0.020000\n",
-      "2021-09-08 01:44:03,392 epoch 4 - iter 20/41 - loss 0.56545176 - samples/sec: 12.84 - lr: 0.020000\n",
-      "2021-09-08 01:44:03,571 epoch 4 - iter 24/41 - loss 0.51345077 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 01:44:03,766 epoch 4 - iter 28/41 - loss 0.50904944 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:44:04,016 epoch 4 - iter 32/41 - loss 0.51754427 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 01:44:04,330 epoch 4 - iter 36/41 - loss 0.52893397 - samples/sec: 12.74 - lr: 0.020000\n",
-      "2021-09-08 01:44:04,574 epoch 4 - iter 40/41 - loss 0.54634080 - samples/sec: 16.50 - lr: 0.020000\n",
-      "2021-09-08 01:44:04,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:04,624 EPOCH 4 done: loss 0.5452 - lr 0.0200000\n",
-      "2021-09-08 01:44:04,708 DEV : loss 0.4773566722869873 - score 0.25\n",
-      "2021-09-08 01:44:04,708 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:15:24,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:24,355 epoch 1 - iter 4/41 - loss 0.68598553 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 21:15:24,632 epoch 1 - iter 8/41 - loss 0.56304218 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,068 epoch 1 - iter 12/41 - loss 0.71795604 - samples/sec: 9.20 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,476 epoch 1 - iter 16/41 - loss 0.68694202 - samples/sec: 9.80 - lr: 0.020000\n",
+      "2021-09-21 21:15:25,894 epoch 1 - iter 20/41 - loss 0.71353743 - samples/sec: 9.60 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,176 epoch 1 - iter 24/41 - loss 0.67632864 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,636 epoch 1 - iter 28/41 - loss 0.70312770 - samples/sec: 8.72 - lr: 0.020000\n",
+      "2021-09-21 21:15:26,917 epoch 1 - iter 32/41 - loss 0.69032206 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:15:27,161 epoch 1 - iter 36/41 - loss 0.66540353 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 21:15:27,393 epoch 1 - iter 40/41 - loss 0.65991454 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 21:15:27,451 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:27,451 EPOCH 1 done: loss 0.6771 - lr 0.0200000\n",
+      "2021-09-21 21:15:29,466 DEV : loss 0.6078627109527588 - score 0.5\n",
+      "2021-09-21 21:15:29,467 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:44:09,114 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:09,343 epoch 5 - iter 4/41 - loss 0.46759568 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 01:44:09,656 epoch 5 - iter 8/41 - loss 0.53846412 - samples/sec: 12.81 - lr: 0.020000\n",
-      "2021-09-08 01:44:09,939 epoch 5 - iter 12/41 - loss 0.48272766 - samples/sec: 14.19 - lr: 0.020000\n",
-      "2021-09-08 01:44:10,165 epoch 5 - iter 16/41 - loss 0.42299800 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 01:44:10,370 epoch 5 - iter 20/41 - loss 0.40652918 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 01:44:10,637 epoch 5 - iter 24/41 - loss 0.46248613 - samples/sec: 15.00 - lr: 0.020000\n",
-      "2021-09-08 01:44:10,854 epoch 5 - iter 28/41 - loss 0.46083062 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 01:44:11,124 epoch 5 - iter 32/41 - loss 0.49475367 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 01:44:11,633 epoch 5 - iter 36/41 - loss 0.54873537 - samples/sec: 7.87 - lr: 0.020000\n",
-      "2021-09-08 01:44:11,834 epoch 5 - iter 40/41 - loss 0.52645793 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 01:44:11,890 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:11,891 EPOCH 5 done: loss 0.5163 - lr 0.0200000\n",
-      "2021-09-08 01:44:12,137 DEV : loss 0.8145270347595215 - score 0.0\n",
-      "2021-09-08 01:44:12,138 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:44:12,217 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:12,805 epoch 6 - iter 4/41 - loss 0.58735285 - samples/sec: 6.99 - lr: 0.020000\n",
-      "2021-09-08 01:44:13,119 epoch 6 - iter 8/41 - loss 0.40365156 - samples/sec: 12.78 - lr: 0.020000\n",
-      "2021-09-08 01:44:13,318 epoch 6 - iter 12/41 - loss 0.38354194 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 01:44:13,562 epoch 6 - iter 16/41 - loss 0.34788532 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:44:13,749 epoch 6 - iter 20/41 - loss 0.40601662 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,026 epoch 6 - iter 24/41 - loss 0.36669560 - samples/sec: 14.48 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,246 epoch 6 - iter 28/41 - loss 0.36816674 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,568 epoch 6 - iter 32/41 - loss 0.36228964 - samples/sec: 12.44 - lr: 0.020000\n",
-      "2021-09-08 01:44:14,925 epoch 6 - iter 36/41 - loss 0.40436648 - samples/sec: 11.24 - lr: 0.020000\n"
+      "2021-09-21 21:15:37,539 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:37,905 epoch 2 - iter 4/41 - loss 1.23326561 - samples/sec: 11.81 - lr: 0.020000\n",
+      "2021-09-21 21:15:38,326 epoch 2 - iter 8/41 - loss 0.98723407 - samples/sec: 9.51 - lr: 0.020000\n",
+      "2021-09-21 21:15:38,872 epoch 2 - iter 12/41 - loss 0.85798316 - samples/sec: 7.33 - lr: 0.020000\n",
+      "2021-09-21 21:15:39,270 epoch 2 - iter 16/41 - loss 0.79409439 - samples/sec: 10.07 - lr: 0.020000\n",
+      "2021-09-21 21:15:39,613 epoch 2 - iter 20/41 - loss 0.80798216 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 21:15:40,029 epoch 2 - iter 24/41 - loss 0.78536629 - samples/sec: 9.63 - lr: 0.020000\n",
+      "2021-09-21 21:15:40,317 epoch 2 - iter 28/41 - loss 0.76690984 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 21:15:40,572 epoch 2 - iter 32/41 - loss 0.75259823 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 21:15:40,818 epoch 2 - iter 36/41 - loss 0.73735365 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 21:15:41,466 epoch 2 - iter 40/41 - loss 0.72756476 - samples/sec: 6.17 - lr: 0.020000\n",
+      "2021-09-21 21:15:41,585 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:41,585 EPOCH 2 done: loss 0.7263 - lr 0.0200000\n",
+      "2021-09-21 21:15:41,844 DEV : loss 0.4802243411540985 - score 0.25\n",
+      "2021-09-21 21:15:41,844 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:15:41,846 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:42,618 epoch 3 - iter 4/41 - loss 0.64548352 - samples/sec: 5.46 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,169 epoch 3 - iter 8/41 - loss 0.60904310 - samples/sec: 7.27 - lr: 0.020000\n",
+      "2021-09-21 21:15:43,588 epoch 3 - iter 12/41 - loss 0.58208907 - samples/sec: 9.56 - lr: 0.020000\n",
+      "2021-09-21 21:15:44,190 epoch 3 - iter 16/41 - loss 0.56492691 - samples/sec: 6.66 - lr: 0.020000\n",
+      "2021-09-21 21:15:44,495 epoch 3 - iter 20/41 - loss 0.56718251 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 21:15:44,846 epoch 3 - iter 24/41 - loss 0.53828542 - samples/sec: 11.40 - lr: 0.020000\n",
+      "2021-09-21 21:15:45,405 epoch 3 - iter 28/41 - loss 0.52995420 - samples/sec: 7.16 - lr: 0.020000\n",
+      "2021-09-21 21:15:45,811 epoch 3 - iter 32/41 - loss 0.57343660 - samples/sec: 9.87 - lr: 0.020000\n",
+      "2021-09-21 21:15:46,253 epoch 3 - iter 36/41 - loss 0.58962757 - samples/sec: 9.06 - lr: 0.020000\n",
+      "2021-09-21 21:15:46,569 epoch 3 - iter 40/41 - loss 0.59413470 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 21:15:46,651 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:46,651 EPOCH 3 done: loss 0.6023 - lr 0.0200000\n",
+      "2021-09-21 21:15:46,939 DEV : loss 0.5112876892089844 - score 0.25\n",
+      "2021-09-21 21:15:46,940 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:15:47,014 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:47,354 epoch 4 - iter 4/41 - loss 0.56448121 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 21:15:47,625 epoch 4 - iter 8/41 - loss 0.48763942 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 21:15:48,544 epoch 4 - iter 12/41 - loss 0.55329538 - samples/sec: 4.36 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,215 epoch 4 - iter 16/41 - loss 0.54528370 - samples/sec: 5.96 - lr: 0.020000\n",
+      "2021-09-21 21:15:49,650 epoch 4 - iter 20/41 - loss 0.49019069 - samples/sec: 9.22 - lr: 0.020000\n",
+      "2021-09-21 21:15:50,201 epoch 4 - iter 24/41 - loss 0.49623120 - samples/sec: 7.26 - lr: 0.020000\n",
+      "2021-09-21 21:15:50,690 epoch 4 - iter 28/41 - loss 0.48885451 - samples/sec: 8.19 - lr: 0.020000\n",
+      "2021-09-21 21:15:51,159 epoch 4 - iter 32/41 - loss 0.46725315 - samples/sec: 8.55 - lr: 0.020000\n",
+      "2021-09-21 21:15:51,687 epoch 4 - iter 36/41 - loss 0.47745489 - samples/sec: 7.57 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,252 epoch 4 - iter 40/41 - loss 0.48856484 - samples/sec: 7.09 - lr: 0.020000\n",
+      "2021-09-21 21:15:52,351 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:52,351 EPOCH 4 done: loss 0.4997 - lr 0.0200000\n",
+      "2021-09-21 21:15:52,614 DEV : loss 0.6954683661460876 - score 0.25\n",
+      "2021-09-21 21:15:52,615 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:15:52,617 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:53,106 epoch 5 - iter 4/41 - loss 0.41182173 - samples/sec: 9.17 - lr: 0.020000\n",
+      "2021-09-21 21:15:53,564 epoch 5 - iter 8/41 - loss 0.40553918 - samples/sec: 8.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:53,995 epoch 5 - iter 12/41 - loss 0.49662709 - samples/sec: 9.31 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,443 epoch 5 - iter 16/41 - loss 0.49133311 - samples/sec: 8.93 - lr: 0.020000\n",
+      "2021-09-21 21:15:54,870 epoch 5 - iter 20/41 - loss 0.47131682 - samples/sec: 9.38 - lr: 0.020000\n",
+      "2021-09-21 21:15:55,520 epoch 5 - iter 24/41 - loss 0.48560808 - samples/sec: 6.16 - lr: 0.020000\n",
+      "2021-09-21 21:15:56,204 epoch 5 - iter 28/41 - loss 0.45156780 - samples/sec: 5.85 - lr: 0.020000\n",
+      "2021-09-21 21:15:56,903 epoch 5 - iter 32/41 - loss 0.44913387 - samples/sec: 5.73 - lr: 0.020000\n",
+      "2021-09-21 21:15:57,431 epoch 5 - iter 36/41 - loss 0.49638362 - samples/sec: 7.58 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,051 epoch 5 - iter 40/41 - loss 0.50993663 - samples/sec: 6.46 - lr: 0.020000\n",
+      "2021-09-21 21:15:58,184 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:58,185 EPOCH 5 done: loss 0.5052 - lr 0.0200000\n",
+      "2021-09-21 21:15:58,430 DEV : loss 0.7670365571975708 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:15:58,431 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:15:58,433 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:15:58,938 epoch 6 - iter 4/41 - loss 0.18067887 - samples/sec: 8.68 - lr: 0.010000\n",
+      "2021-09-21 21:15:59,428 epoch 6 - iter 8/41 - loss 0.27997279 - samples/sec: 8.17 - lr: 0.010000\n",
+      "2021-09-21 21:15:59,930 epoch 6 - iter 12/41 - loss 0.22540107 - samples/sec: 7.98 - lr: 0.010000\n",
+      "2021-09-21 21:16:00,355 epoch 6 - iter 16/41 - loss 0.32367264 - samples/sec: 9.41 - lr: 0.010000\n",
+      "2021-09-21 21:16:00,834 epoch 6 - iter 20/41 - loss 0.39547782 - samples/sec: 8.36 - lr: 0.010000\n",
+      "2021-09-21 21:16:01,396 epoch 6 - iter 24/41 - loss 0.40505534 - samples/sec: 7.13 - lr: 0.010000\n",
+      "2021-09-21 21:16:02,045 epoch 6 - iter 28/41 - loss 0.41192745 - samples/sec: 6.16 - lr: 0.010000\n",
+      "2021-09-21 21:16:02,730 epoch 6 - iter 32/41 - loss 0.42179605 - samples/sec: 5.85 - lr: 0.010000\n",
+      "2021-09-21 21:16:03,215 epoch 6 - iter 36/41 - loss 0.45331601 - samples/sec: 8.26 - lr: 0.010000\n",
+      "2021-09-21 21:16:03,517 epoch 6 - iter 40/41 - loss 0.42684967 - samples/sec: 13.28 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:44:15,225 epoch 6 - iter 40/41 - loss 0.39607486 - samples/sec: 13.34 - lr: 0.020000\n",
-      "2021-09-08 01:44:15,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:15,274 EPOCH 6 done: loss 0.3980 - lr 0.0200000\n",
-      "2021-09-08 01:44:15,417 DEV : loss 0.7414005994796753 - score 0.0\n",
-      "2021-09-08 01:44:15,418 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:44:15,420 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:15,661 epoch 7 - iter 4/41 - loss 0.38391589 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 01:44:15,917 epoch 7 - iter 8/41 - loss 0.29391620 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:44:16,290 epoch 7 - iter 12/41 - loss 0.29934716 - samples/sec: 10.75 - lr: 0.020000\n",
-      "2021-09-08 01:44:16,557 epoch 7 - iter 16/41 - loss 0.25428139 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:44:16,747 epoch 7 - iter 20/41 - loss 0.30992886 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 01:44:17,081 epoch 7 - iter 24/41 - loss 0.32648171 - samples/sec: 12.02 - lr: 0.020000\n",
-      "2021-09-08 01:44:17,320 epoch 7 - iter 28/41 - loss 0.32435950 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:44:17,671 epoch 7 - iter 32/41 - loss 0.34921082 - samples/sec: 11.41 - lr: 0.020000\n",
-      "2021-09-08 01:44:18,005 epoch 7 - iter 36/41 - loss 0.38074848 - samples/sec: 12.00 - lr: 0.020000\n",
-      "2021-09-08 01:44:18,221 epoch 7 - iter 40/41 - loss 0.36287685 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 01:44:18,437 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:18,438 EPOCH 7 done: loss 0.3649 - lr 0.0200000\n",
-      "2021-09-08 01:44:19,545 DEV : loss 1.0252245664596558 - score 0.0\n",
-      "2021-09-08 01:44:19,547 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:44:19,553 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:19,914 epoch 8 - iter 4/41 - loss 0.05584257 - samples/sec: 11.69 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,191 epoch 8 - iter 8/41 - loss 0.18198958 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,508 epoch 8 - iter 12/41 - loss 0.38966389 - samples/sec: 12.64 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,710 epoch 8 - iter 16/41 - loss 0.37769153 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 01:44:20,954 epoch 8 - iter 20/41 - loss 0.30860423 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:44:21,195 epoch 8 - iter 24/41 - loss 0.27397966 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 01:44:21,468 epoch 8 - iter 28/41 - loss 0.36586074 - samples/sec: 14.71 - lr: 0.020000\n",
-      "2021-09-08 01:44:21,752 epoch 8 - iter 32/41 - loss 0.33470576 - samples/sec: 14.10 - lr: 0.020000\n",
-      "2021-09-08 01:44:22,013 epoch 8 - iter 36/41 - loss 0.44573195 - samples/sec: 15.35 - lr: 0.020000\n",
-      "2021-09-08 01:44:22,391 epoch 8 - iter 40/41 - loss 0.43273588 - samples/sec: 10.60 - lr: 0.020000\n",
-      "2021-09-08 01:44:22,550 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:22,551 EPOCH 8 done: loss 0.4288 - lr 0.0200000\n",
-      "2021-09-08 01:44:22,785 DEV : loss 0.8933014869689941 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:44:22,786 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:44:22,865 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:23,414 epoch 9 - iter 4/41 - loss 0.17680929 - samples/sec: 7.75 - lr: 0.010000\n",
-      "2021-09-08 01:44:23,809 epoch 9 - iter 8/41 - loss 0.12440895 - samples/sec: 10.13 - lr: 0.010000\n",
-      "2021-09-08 01:44:24,064 epoch 9 - iter 12/41 - loss 0.27110388 - samples/sec: 15.72 - lr: 0.010000\n",
-      "2021-09-08 01:44:24,371 epoch 9 - iter 16/41 - loss 0.22721194 - samples/sec: 13.05 - lr: 0.010000\n",
-      "2021-09-08 01:44:24,638 epoch 9 - iter 20/41 - loss 0.24510845 - samples/sec: 15.06 - lr: 0.010000\n",
-      "2021-09-08 01:44:24,922 epoch 9 - iter 24/41 - loss 0.28348683 - samples/sec: 14.12 - lr: 0.010000\n",
-      "2021-09-08 01:44:25,144 epoch 9 - iter 28/41 - loss 0.27251143 - samples/sec: 18.05 - lr: 0.010000\n",
-      "2021-09-08 01:44:25,452 epoch 9 - iter 32/41 - loss 0.27333115 - samples/sec: 12.99 - lr: 0.010000\n",
-      "2021-09-08 01:44:25,668 epoch 9 - iter 36/41 - loss 0.24876093 - samples/sec: 18.61 - lr: 0.010000\n",
-      "2021-09-08 01:44:25,902 epoch 9 - iter 40/41 - loss 0.24184924 - samples/sec: 17.11 - lr: 0.010000\n",
-      "2021-09-08 01:44:25,962 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:25,962 EPOCH 9 done: loss 0.2386 - lr 0.0100000\n",
-      "2021-09-08 01:44:26,055 DEV : loss 1.1764236688613892 - score 0.25\n",
-      "2021-09-08 01:44:26,056 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:44:26,058 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:26,374 epoch 10 - iter 4/41 - loss 0.04378981 - samples/sec: 13.30 - lr: 0.010000\n",
-      "2021-09-08 01:44:26,573 epoch 10 - iter 8/41 - loss 0.02803718 - samples/sec: 20.16 - lr: 0.010000\n",
-      "2021-09-08 01:44:26,927 epoch 10 - iter 12/41 - loss 0.02315315 - samples/sec: 11.33 - lr: 0.010000\n",
-      "2021-09-08 01:44:27,157 epoch 10 - iter 16/41 - loss 0.11040420 - samples/sec: 17.42 - lr: 0.010000\n",
-      "2021-09-08 01:44:27,628 epoch 10 - iter 20/41 - loss 0.09630842 - samples/sec: 8.51 - lr: 0.010000\n",
-      "2021-09-08 01:44:27,856 epoch 10 - iter 24/41 - loss 0.15551699 - samples/sec: 17.62 - lr: 0.010000\n",
-      "2021-09-08 01:44:28,183 epoch 10 - iter 28/41 - loss 0.22132932 - samples/sec: 12.23 - lr: 0.010000\n",
-      "2021-09-08 01:44:28,390 epoch 10 - iter 32/41 - loss 0.19789334 - samples/sec: 19.38 - lr: 0.010000\n",
-      "2021-09-08 01:44:28,760 epoch 10 - iter 36/41 - loss 0.22016369 - samples/sec: 10.84 - lr: 0.010000\n",
-      "2021-09-08 01:44:29,125 epoch 10 - iter 40/41 - loss 0.20375012 - samples/sec: 10.98 - lr: 0.010000\n",
-      "2021-09-08 01:44:29,314 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:29,315 EPOCH 10 done: loss 0.2094 - lr 0.0100000\n",
-      "2021-09-08 01:44:29,596 DEV : loss 0.9717786312103271 - score 0.25\n",
-      "2021-09-08 01:44:29,598 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:44:33,745 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:44:33,745 Testing using best model ...\n",
-      "2021-09-08 01:44:33,747 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:16:03,639 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:03,639 EPOCH 6 done: loss 0.4218 - lr 0.0100000\n",
+      "2021-09-21 21:16:03,916 DEV : loss 0.9694600105285645 - score 0.0\n",
+      "2021-09-21 21:16:03,917 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:16:03,998 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:04,393 epoch 7 - iter 4/41 - loss 0.37432376 - samples/sec: 11.09 - lr: 0.010000\n",
+      "2021-09-21 21:16:04,677 epoch 7 - iter 8/41 - loss 0.29883995 - samples/sec: 14.10 - lr: 0.010000\n",
+      "2021-09-21 21:16:05,000 epoch 7 - iter 12/41 - loss 0.28584635 - samples/sec: 12.42 - lr: 0.010000\n",
+      "2021-09-21 21:16:05,450 epoch 7 - iter 16/41 - loss 0.32000020 - samples/sec: 8.89 - lr: 0.010000\n",
+      "2021-09-21 21:16:05,964 epoch 7 - iter 20/41 - loss 0.34638219 - samples/sec: 7.78 - lr: 0.010000\n",
+      "2021-09-21 21:16:06,535 epoch 7 - iter 24/41 - loss 0.33023897 - samples/sec: 7.02 - lr: 0.010000\n",
+      "2021-09-21 21:16:06,978 epoch 7 - iter 28/41 - loss 0.32463129 - samples/sec: 9.04 - lr: 0.010000\n",
+      "2021-09-21 21:16:07,422 epoch 7 - iter 32/41 - loss 0.32960404 - samples/sec: 9.01 - lr: 0.010000\n",
+      "2021-09-21 21:16:07,759 epoch 7 - iter 36/41 - loss 0.37120141 - samples/sec: 11.90 - lr: 0.010000\n",
+      "2021-09-21 21:16:08,013 epoch 7 - iter 40/41 - loss 0.39630760 - samples/sec: 15.77 - lr: 0.010000\n",
+      "2021-09-21 21:16:08,077 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:08,078 EPOCH 7 done: loss 0.3883 - lr 0.0100000\n",
+      "2021-09-21 21:16:15,466 DEV : loss 0.7546497583389282 - score 0.0\n",
+      "2021-09-21 21:16:15,467 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:16:15,510 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:15,864 epoch 8 - iter 4/41 - loss 0.73027275 - samples/sec: 11.86 - lr: 0.010000\n",
+      "2021-09-21 21:16:16,129 epoch 8 - iter 8/41 - loss 0.50374644 - samples/sec: 15.10 - lr: 0.010000\n",
+      "2021-09-21 21:16:16,366 epoch 8 - iter 12/41 - loss 0.41548159 - samples/sec: 16.92 - lr: 0.010000\n",
+      "2021-09-21 21:16:16,799 epoch 8 - iter 16/41 - loss 0.40845050 - samples/sec: 9.25 - lr: 0.010000\n",
+      "2021-09-21 21:16:17,266 epoch 8 - iter 20/41 - loss 0.36020586 - samples/sec: 8.58 - lr: 0.010000\n",
+      "2021-09-21 21:16:17,755 epoch 8 - iter 24/41 - loss 0.36584276 - samples/sec: 8.18 - lr: 0.010000\n",
+      "2021-09-21 21:16:18,178 epoch 8 - iter 28/41 - loss 0.34216262 - samples/sec: 9.49 - lr: 0.010000\n",
+      "2021-09-21 21:16:18,456 epoch 8 - iter 32/41 - loss 0.37582970 - samples/sec: 14.38 - lr: 0.010000\n",
+      "2021-09-21 21:16:18,784 epoch 8 - iter 36/41 - loss 0.36120779 - samples/sec: 12.24 - lr: 0.010000\n",
+      "2021-09-21 21:16:19,048 epoch 8 - iter 40/41 - loss 0.39939952 - samples/sec: 15.15 - lr: 0.010000\n",
+      "2021-09-21 21:16:19,112 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:19,113 EPOCH 8 done: loss 0.3981 - lr 0.0100000\n",
+      "2021-09-21 21:16:19,607 DEV : loss 1.0425677299499512 - score 0.0\n",
+      "2021-09-21 21:16:19,608 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:16:19,685 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:20,061 epoch 9 - iter 4/41 - loss 0.29032435 - samples/sec: 11.51 - lr: 0.010000\n",
+      "2021-09-21 21:16:20,373 epoch 9 - iter 8/41 - loss 0.20353208 - samples/sec: 12.85 - lr: 0.010000\n",
+      "2021-09-21 21:16:20,765 epoch 9 - iter 12/41 - loss 0.19358238 - samples/sec: 10.22 - lr: 0.010000\n",
+      "2021-09-21 21:16:21,231 epoch 9 - iter 16/41 - loss 0.17388903 - samples/sec: 8.60 - lr: 0.010000\n",
+      "2021-09-21 21:16:21,698 epoch 9 - iter 20/41 - loss 0.14341538 - samples/sec: 8.57 - lr: 0.010000\n",
+      "2021-09-21 21:16:22,017 epoch 9 - iter 24/41 - loss 0.19660595 - samples/sec: 12.55 - lr: 0.010000\n",
+      "2021-09-21 21:16:22,429 epoch 9 - iter 28/41 - loss 0.19690841 - samples/sec: 9.74 - lr: 0.010000\n",
+      "2021-09-21 21:16:22,827 epoch 9 - iter 32/41 - loss 0.20565123 - samples/sec: 10.07 - lr: 0.010000\n",
+      "2021-09-21 21:16:23,586 epoch 9 - iter 36/41 - loss 0.30442193 - samples/sec: 5.27 - lr: 0.010000\n",
+      "2021-09-21 21:16:24,269 epoch 9 - iter 40/41 - loss 0.29425160 - samples/sec: 5.86 - lr: 0.010000\n",
+      "2021-09-21 21:16:24,390 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:24,390 EPOCH 9 done: loss 0.2940 - lr 0.0100000\n",
+      "2021-09-21 21:16:24,603 DEV : loss 1.0154316425323486 - score 0.25\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:16:24,604 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:16:24,608 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:25,082 epoch 10 - iter 4/41 - loss 0.22987511 - samples/sec: 9.20 - lr: 0.005000\n",
+      "2021-09-21 21:16:25,372 epoch 10 - iter 8/41 - loss 0.24563749 - samples/sec: 13.81 - lr: 0.005000\n",
+      "2021-09-21 21:16:25,834 epoch 10 - iter 12/41 - loss 0.19227471 - samples/sec: 8.66 - lr: 0.005000\n",
+      "2021-09-21 21:16:26,512 epoch 10 - iter 16/41 - loss 0.27252291 - samples/sec: 5.91 - lr: 0.005000\n",
+      "2021-09-21 21:16:26,840 epoch 10 - iter 20/41 - loss 0.22301696 - samples/sec: 12.22 - lr: 0.005000\n",
+      "2021-09-21 21:16:27,192 epoch 10 - iter 24/41 - loss 0.18854089 - samples/sec: 11.38 - lr: 0.005000\n",
+      "2021-09-21 21:16:27,570 epoch 10 - iter 28/41 - loss 0.23150464 - samples/sec: 10.61 - lr: 0.005000\n",
+      "2021-09-21 21:16:27,897 epoch 10 - iter 32/41 - loss 0.26593098 - samples/sec: 12.27 - lr: 0.005000\n",
+      "2021-09-21 21:16:28,198 epoch 10 - iter 36/41 - loss 0.23812653 - samples/sec: 13.29 - lr: 0.005000\n",
+      "2021-09-21 21:16:28,544 epoch 10 - iter 40/41 - loss 0.22793334 - samples/sec: 11.59 - lr: 0.005000\n",
+      "2021-09-21 21:16:28,617 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:28,618 EPOCH 10 done: loss 0.2226 - lr 0.0050000\n",
+      "2021-09-21 21:16:28,791 DEV : loss 0.810788631439209 - score 0.25\n",
+      "2021-09-21 21:16:28,791 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:16:35,180 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:16:35,181 Testing using best model ...\n",
+      "2021-09-21 21:16:35,182 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:44:38,790 \t0.4\n",
-      "2021-09-08 01:44:38,791 \n",
+      "2021-09-21 21:16:42,392 \t0.2\n",
+      "2021-09-21 21:16:42,392 \n",
       "Results:\n",
-      "- F-score (micro) 0.4\n",
-      "- F-score (macro) 0.2333\n",
-      "- Accuracy 0.4\n",
+      "- F-score (micro) 0.2\n",
+      "- F-score (macro) 0.2\n",
+      "- Accuracy 0.2\n",
       "\n",
       "By class:\n",
       "                                          precision    recall  f1-score   support\n",
       "\n",
       "  The product has been reviewed as awful     0.0000    0.0000    0.0000         1\n",
-      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         0\n",
-      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         1\n",
-      "   The product has been reviewed as good     0.5000    0.5000    0.5000         2\n",
-      "  The product has been reviewed as great     0.5000    1.0000    0.6667         1\n",
+      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         1\n",
+      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         2\n",
+      "   The product has been reviewed as good     1.0000    1.0000    1.0000         1\n",
+      "  The product has been reviewed as great     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                               micro avg     0.4000    0.4000    0.4000         5\n",
-      "                               macro avg     0.2000    0.3000    0.2333         5\n",
-      "                            weighted avg     0.3000    0.4000    0.3333         5\n",
-      "                             samples avg     0.4000    0.4000    0.4000         5\n",
+      "                               micro avg     0.2000    0.2000    0.2000         5\n",
+      "                               macro avg     0.2000    0.2000    0.2000         5\n",
+      "                            weighted avg     0.2000    0.2000    0.2000         5\n",
+      "                             samples avg     0.2000    0.2000    0.2000         5\n",
       "\n",
-      "2021-09-08 01:44:38,791 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:30,232 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:16:42,392 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:40,116 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:45:34,477 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:17:44,497 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 46/46 [00:00<00:00, 13308.82it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:17:44,502 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 14934.44it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:45:34,482 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 01:45:34,646 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:34,648 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:17:44,710 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:44,712 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3806,241 +3808,235 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:45:34,648 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:34,649 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:45:34,649 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:34,649 Parameters:\n",
-      "2021-09-08 01:45:34,650  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:45:34,650  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:45:34,650  - patience: \"3\"\n",
-      "2021-09-08 01:45:34,650  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:45:34,651  - max_epochs: \"10\"\n",
-      "2021-09-08 01:45:34,651  - shuffle: \"True\"\n",
-      "2021-09-08 01:45:34,651  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:45:34,652  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:45:34,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:34,652 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:45:34,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:34,653 Device: cuda:0\n",
-      "2021-09-08 01:45:34,653 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:34,653 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:45:34,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:35,289 epoch 1 - iter 4/41 - loss 0.97120866 - samples/sec: 11.21 - lr: 0.020000\n",
-      "2021-09-08 01:45:35,538 epoch 1 - iter 8/41 - loss 0.65219873 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:45:35,804 epoch 1 - iter 12/41 - loss 0.73140917 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:45:36,132 epoch 1 - iter 16/41 - loss 0.76733783 - samples/sec: 12.22 - lr: 0.020000\n",
-      "2021-09-08 01:45:36,615 epoch 1 - iter 20/41 - loss 0.76157632 - samples/sec: 8.28 - lr: 0.020000\n",
-      "2021-09-08 01:45:36,979 epoch 1 - iter 24/41 - loss 0.75426534 - samples/sec: 11.02 - lr: 0.020000\n",
-      "2021-09-08 01:45:37,182 epoch 1 - iter 28/41 - loss 0.74578138 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 01:45:37,528 epoch 1 - iter 32/41 - loss 0.73746018 - samples/sec: 11.60 - lr: 0.020000\n",
-      "2021-09-08 01:45:37,793 epoch 1 - iter 36/41 - loss 0.73168318 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:45:38,067 epoch 1 - iter 40/41 - loss 0.72605651 - samples/sec: 14.62 - lr: 0.020000\n",
-      "2021-09-08 01:45:38,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:38,113 EPOCH 1 done: loss 0.7223 - lr 0.0200000\n",
-      "2021-09-08 01:45:38,584 DEV : loss 0.539113461971283 - score 0.0\n",
-      "2021-09-08 01:45:38,585 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:44,712 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:44,713 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:17:44,713 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:44,713 Parameters:\n",
+      "2021-09-21 21:17:44,714  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:17:44,714  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:17:44,714  - patience: \"3\"\n",
+      "2021-09-21 21:17:44,714  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:17:44,715  - max_epochs: \"10\"\n",
+      "2021-09-21 21:17:44,715  - shuffle: \"True\"\n",
+      "2021-09-21 21:17:44,715  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:17:44,716  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:17:44,716 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:44,716 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:17:44,717 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:44,717 Device: cuda:0\n",
+      "2021-09-21 21:17:44,717 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:44,717 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:17:44,728 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:45,294 epoch 1 - iter 4/41 - loss 0.94936112 - samples/sec: 7.41 - lr: 0.020000\n",
+      "2021-09-21 21:17:45,678 epoch 1 - iter 8/41 - loss 0.70085894 - samples/sec: 10.42 - lr: 0.020000\n",
+      "2021-09-21 21:17:46,129 epoch 1 - iter 12/41 - loss 0.81472971 - samples/sec: 8.89 - lr: 0.020000\n",
+      "2021-09-21 21:17:46,726 epoch 1 - iter 16/41 - loss 0.70777687 - samples/sec: 6.70 - lr: 0.020000\n",
+      "2021-09-21 21:17:47,106 epoch 1 - iter 20/41 - loss 0.65267444 - samples/sec: 10.55 - lr: 0.020000\n",
+      "2021-09-21 21:17:47,426 epoch 1 - iter 24/41 - loss 0.59876968 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 21:17:47,770 epoch 1 - iter 28/41 - loss 0.61456002 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 21:17:48,158 epoch 1 - iter 32/41 - loss 0.62704801 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 21:17:48,578 epoch 1 - iter 36/41 - loss 0.64148973 - samples/sec: 9.53 - lr: 0.020000\n",
+      "2021-09-21 21:17:48,850 epoch 1 - iter 40/41 - loss 0.62112387 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:17:48,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:48,906 EPOCH 1 done: loss 0.6149 - lr 0.0200000\n",
+      "2021-09-21 21:17:49,229 DEV : loss 1.0012121200561523 - score 0.0\n",
+      "2021-09-21 21:17:49,230 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:46,500 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:46,912 epoch 2 - iter 4/41 - loss 0.69133916 - samples/sec: 10.21 - lr: 0.020000\n",
-      "2021-09-08 01:45:47,202 epoch 2 - iter 8/41 - loss 0.67111833 - samples/sec: 13.83 - lr: 0.020000\n",
-      "2021-09-08 01:45:47,469 epoch 2 - iter 12/41 - loss 0.67224204 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:45:47,827 epoch 2 - iter 16/41 - loss 0.67134678 - samples/sec: 11.18 - lr: 0.020000\n",
-      "2021-09-08 01:45:48,192 epoch 2 - iter 20/41 - loss 0.66822867 - samples/sec: 10.99 - lr: 0.020000\n",
-      "2021-09-08 01:45:48,444 epoch 2 - iter 24/41 - loss 0.66645048 - samples/sec: 15.91 - lr: 0.020000\n",
-      "2021-09-08 01:45:48,681 epoch 2 - iter 28/41 - loss 0.66723722 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 01:45:48,931 epoch 2 - iter 32/41 - loss 0.66254232 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 01:45:49,159 epoch 2 - iter 36/41 - loss 0.66015838 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:45:49,522 epoch 2 - iter 40/41 - loss 0.65739182 - samples/sec: 11.05 - lr: 0.020000\n",
-      "2021-09-08 01:45:49,570 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:49,570 EPOCH 2 done: loss 0.6574 - lr 0.0200000\n",
-      "2021-09-08 01:45:49,770 DEV : loss 0.5720733404159546 - score 0.0\n",
-      "2021-09-08 01:45:49,772 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:45:49,774 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:50,054 epoch 3 - iter 4/41 - loss 0.61600333 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 01:45:50,447 epoch 3 - iter 8/41 - loss 0.65300897 - samples/sec: 10.20 - lr: 0.020000\n",
-      "2021-09-08 01:45:50,724 epoch 3 - iter 12/41 - loss 0.65947126 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 01:45:51,062 epoch 3 - iter 16/41 - loss 0.65098454 - samples/sec: 11.86 - lr: 0.020000\n",
-      "2021-09-08 01:45:51,357 epoch 3 - iter 20/41 - loss 0.65005909 - samples/sec: 13.58 - lr: 0.020000\n",
-      "2021-09-08 01:45:51,675 epoch 3 - iter 24/41 - loss 0.64634539 - samples/sec: 12.60 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,066 epoch 3 - iter 28/41 - loss 0.64254152 - samples/sec: 10.25 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,377 epoch 3 - iter 32/41 - loss 0.64038091 - samples/sec: 12.87 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,661 epoch 3 - iter 36/41 - loss 0.64213669 - samples/sec: 14.10 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,860 epoch 3 - iter 40/41 - loss 0.64323829 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 01:45:52,953 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:52,953 EPOCH 3 done: loss 0.6427 - lr 0.0200000\n",
-      "2021-09-08 01:45:53,137 DEV : loss 0.5183840394020081 - score 0.25\n",
-      "2021-09-08 01:45:53,138 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:17:53,349 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:53,846 epoch 2 - iter 4/41 - loss 0.71134680 - samples/sec: 8.69 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,168 epoch 2 - iter 8/41 - loss 0.72364695 - samples/sec: 12.44 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,517 epoch 2 - iter 12/41 - loss 0.62905609 - samples/sec: 11.48 - lr: 0.020000\n",
+      "2021-09-21 21:17:54,840 epoch 2 - iter 16/41 - loss 0.61769549 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:17:55,142 epoch 2 - iter 20/41 - loss 0.60906042 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 21:17:55,636 epoch 2 - iter 24/41 - loss 0.62461395 - samples/sec: 8.09 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,124 epoch 2 - iter 28/41 - loss 0.61363162 - samples/sec: 8.21 - lr: 0.020000\n",
+      "2021-09-21 21:17:56,850 epoch 2 - iter 32/41 - loss 0.59008071 - samples/sec: 5.52 - lr: 0.020000\n",
+      "2021-09-21 21:17:57,397 epoch 2 - iter 36/41 - loss 0.61578504 - samples/sec: 7.31 - lr: 0.020000\n",
+      "2021-09-21 21:17:58,029 epoch 2 - iter 40/41 - loss 0.61831126 - samples/sec: 6.34 - lr: 0.020000\n",
+      "2021-09-21 21:17:58,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:17:58,167 EPOCH 2 done: loss 0.6110 - lr 0.0200000\n",
+      "2021-09-21 21:17:58,735 DEV : loss 1.0443528890609741 - score 0.25\n",
+      "2021-09-21 21:17:58,737 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:45:57,125 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:45:57,492 epoch 4 - iter 4/41 - loss 0.65982494 - samples/sec: 11.38 - lr: 0.020000\n",
-      "2021-09-08 01:45:57,886 epoch 4 - iter 8/41 - loss 0.64692546 - samples/sec: 10.16 - lr: 0.020000\n",
-      "2021-09-08 01:45:58,178 epoch 4 - iter 12/41 - loss 0.65697133 - samples/sec: 13.75 - lr: 0.020000\n",
-      "2021-09-08 01:45:58,596 epoch 4 - iter 16/41 - loss 0.65097198 - samples/sec: 9.59 - lr: 0.020000\n",
-      "2021-09-08 01:45:58,834 epoch 4 - iter 20/41 - loss 0.65149536 - samples/sec: 16.81 - lr: 0.020000\n",
-      "2021-09-08 01:45:59,127 epoch 4 - iter 24/41 - loss 0.65649352 - samples/sec: 13.69 - lr: 0.020000\n",
-      "2021-09-08 01:45:59,368 epoch 4 - iter 28/41 - loss 0.65079261 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 01:45:59,701 epoch 4 - iter 32/41 - loss 0.64957766 - samples/sec: 12.04 - lr: 0.020000\n",
-      "2021-09-08 01:46:00,026 epoch 4 - iter 36/41 - loss 0.64893920 - samples/sec: 12.35 - lr: 0.020000\n",
-      "2021-09-08 01:46:00,288 epoch 4 - iter 40/41 - loss 0.65136008 - samples/sec: 15.32 - lr: 0.020000\n",
-      "2021-09-08 01:46:00,430 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:00,431 EPOCH 4 done: loss 0.6521 - lr 0.0200000\n",
-      "2021-09-08 01:46:00,717 DEV : loss 0.5343062877655029 - score 0.0\n",
-      "2021-09-08 01:46:00,718 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:46:00,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:01,029 epoch 5 - iter 4/41 - loss 0.64970222 - samples/sec: 18.78 - lr: 0.020000\n",
-      "2021-09-08 01:46:01,257 epoch 5 - iter 8/41 - loss 0.64127433 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:46:01,470 epoch 5 - iter 12/41 - loss 0.63752061 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 01:46:01,850 epoch 5 - iter 16/41 - loss 0.64355231 - samples/sec: 10.52 - lr: 0.020000\n",
-      "2021-09-08 01:46:02,183 epoch 5 - iter 20/41 - loss 0.63970248 - samples/sec: 12.07 - lr: 0.020000\n",
-      "2021-09-08 01:46:02,521 epoch 5 - iter 24/41 - loss 0.63903084 - samples/sec: 11.84 - lr: 0.020000\n",
-      "2021-09-08 01:46:02,872 epoch 5 - iter 28/41 - loss 0.63768111 - samples/sec: 11.40 - lr: 0.020000\n",
-      "2021-09-08 01:46:03,212 epoch 5 - iter 32/41 - loss 0.64267325 - samples/sec: 11.81 - lr: 0.020000\n",
-      "2021-09-08 01:46:03,486 epoch 5 - iter 36/41 - loss 0.64451108 - samples/sec: 14.61 - lr: 0.020000\n",
-      "2021-09-08 01:46:03,823 epoch 5 - iter 40/41 - loss 0.64343403 - samples/sec: 11.90 - lr: 0.020000\n",
-      "2021-09-08 01:46:03,872 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:03,873 EPOCH 5 done: loss 0.6435 - lr 0.0200000\n",
-      "2021-09-08 01:46:04,074 DEV : loss 0.5285306572914124 - score 0.0\n",
-      "2021-09-08 01:46:04,075 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:46:04,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:04,379 epoch 6 - iter 4/41 - loss 0.63889106 - samples/sec: 14.24 - lr: 0.020000\n",
-      "2021-09-08 01:46:04,673 epoch 6 - iter 8/41 - loss 0.64486008 - samples/sec: 13.63 - lr: 0.020000\n",
-      "2021-09-08 01:46:05,072 epoch 6 - iter 12/41 - loss 0.64475846 - samples/sec: 10.04 - lr: 0.020000\n",
-      "2021-09-08 01:46:05,401 epoch 6 - iter 16/41 - loss 0.64184992 - samples/sec: 12.19 - lr: 0.020000\n",
-      "2021-09-08 01:46:05,708 epoch 6 - iter 20/41 - loss 0.64199962 - samples/sec: 13.04 - lr: 0.020000\n",
-      "2021-09-08 01:46:05,982 epoch 6 - iter 24/41 - loss 0.64520961 - samples/sec: 14.61 - lr: 0.020000\n",
-      "2021-09-08 01:46:06,238 epoch 6 - iter 28/41 - loss 0.64293631 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:46:06,558 epoch 6 - iter 32/41 - loss 0.64176923 - samples/sec: 12.52 - lr: 0.020000\n",
-      "2021-09-08 01:46:06,806 epoch 6 - iter 36/41 - loss 0.64226951 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 01:46:07,092 epoch 6 - iter 40/41 - loss 0.64080250 - samples/sec: 14.03 - lr: 0.020000\n"
+      "2021-09-21 21:18:13,291 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:13,661 epoch 3 - iter 4/41 - loss 0.24132791 - samples/sec: 11.51 - lr: 0.020000\n",
+      "2021-09-21 21:18:14,078 epoch 3 - iter 8/41 - loss 0.69380803 - samples/sec: 9.61 - lr: 0.020000\n",
+      "2021-09-21 21:18:14,444 epoch 3 - iter 12/41 - loss 0.60210660 - samples/sec: 10.96 - lr: 0.020000\n",
+      "2021-09-21 21:18:14,757 epoch 3 - iter 16/41 - loss 0.66261773 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 21:18:15,137 epoch 3 - iter 20/41 - loss 0.65151971 - samples/sec: 10.53 - lr: 0.020000\n",
+      "2021-09-21 21:18:15,703 epoch 3 - iter 24/41 - loss 0.60208608 - samples/sec: 7.08 - lr: 0.020000\n",
+      "2021-09-21 21:18:16,158 epoch 3 - iter 28/41 - loss 0.65775497 - samples/sec: 8.81 - lr: 0.020000\n",
+      "2021-09-21 21:18:16,444 epoch 3 - iter 32/41 - loss 0.59411585 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 21:18:16,774 epoch 3 - iter 36/41 - loss 0.60570424 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 21:18:17,455 epoch 3 - iter 40/41 - loss 0.63263346 - samples/sec: 5.88 - lr: 0.020000\n",
+      "2021-09-21 21:18:17,526 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:17,526 EPOCH 3 done: loss 0.6288 - lr 0.0200000\n",
+      "2021-09-21 21:18:17,800 DEV : loss 1.5820751190185547 - score 0.0\n",
+      "2021-09-21 21:18:17,801 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:18:17,803 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:18,123 epoch 4 - iter 4/41 - loss 0.68594711 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 21:18:18,392 epoch 4 - iter 8/41 - loss 0.60351697 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 21:18:18,851 epoch 4 - iter 12/41 - loss 0.73297755 - samples/sec: 8.72 - lr: 0.020000\n",
+      "2021-09-21 21:18:19,255 epoch 4 - iter 16/41 - loss 0.62566467 - samples/sec: 9.93 - lr: 0.020000\n",
+      "2021-09-21 21:18:19,612 epoch 4 - iter 20/41 - loss 0.55097135 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 21:18:20,124 epoch 4 - iter 24/41 - loss 0.57705173 - samples/sec: 7.82 - lr: 0.020000\n",
+      "2021-09-21 21:18:20,427 epoch 4 - iter 28/41 - loss 0.55633407 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 21:18:20,736 epoch 4 - iter 32/41 - loss 0.50675044 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 21:18:21,099 epoch 4 - iter 36/41 - loss 0.52063744 - samples/sec: 11.04 - lr: 0.020000\n",
+      "2021-09-21 21:18:21,381 epoch 4 - iter 40/41 - loss 0.47639542 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 21:18:21,464 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:21,465 EPOCH 4 done: loss 0.4675 - lr 0.0200000\n",
+      "2021-09-21 21:18:22,505 DEV : loss 1.4538369178771973 - score 0.0\n",
+      "2021-09-21 21:18:22,506 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:18:22,516 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:23,125 epoch 5 - iter 4/41 - loss 1.71304947 - samples/sec: 6.88 - lr: 0.020000\n",
+      "2021-09-21 21:18:23,614 epoch 5 - iter 8/41 - loss 0.90304113 - samples/sec: 8.19 - lr: 0.020000\n",
+      "2021-09-21 21:18:24,005 epoch 5 - iter 12/41 - loss 0.86625014 - samples/sec: 10.25 - lr: 0.020000\n",
+      "2021-09-21 21:18:24,525 epoch 5 - iter 16/41 - loss 0.80400602 - samples/sec: 7.70 - lr: 0.020000\n",
+      "2021-09-21 21:18:25,001 epoch 5 - iter 20/41 - loss 0.73899728 - samples/sec: 8.41 - lr: 0.020000\n",
+      "2021-09-21 21:18:25,388 epoch 5 - iter 24/41 - loss 0.68624144 - samples/sec: 10.37 - lr: 0.020000\n",
+      "2021-09-21 21:18:25,860 epoch 5 - iter 28/41 - loss 0.67816785 - samples/sec: 8.48 - lr: 0.020000\n",
+      "2021-09-21 21:18:26,292 epoch 5 - iter 32/41 - loss 0.70534334 - samples/sec: 9.26 - lr: 0.020000\n",
+      "2021-09-21 21:18:26,604 epoch 5 - iter 36/41 - loss 0.68423113 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 21:18:26,883 epoch 5 - iter 40/41 - loss 0.65607991 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 21:18:26,941 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:46:07,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:07,192 EPOCH 6 done: loss 0.6397 - lr 0.0200000\n",
-      "2021-09-08 01:46:07,347 DEV : loss 0.5229767560958862 - score 0.0\n",
-      "2021-09-08 01:46:07,348 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:46:07,350 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:07,668 epoch 7 - iter 4/41 - loss 0.62930320 - samples/sec: 13.17 - lr: 0.020000\n",
-      "2021-09-08 01:46:08,039 epoch 7 - iter 8/41 - loss 0.63255673 - samples/sec: 10.83 - lr: 0.020000\n",
-      "2021-09-08 01:46:08,286 epoch 7 - iter 12/41 - loss 0.64772213 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 01:46:08,555 epoch 7 - iter 16/41 - loss 0.65209853 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 01:46:08,942 epoch 7 - iter 20/41 - loss 0.65098765 - samples/sec: 10.34 - lr: 0.020000\n",
-      "2021-09-08 01:46:09,187 epoch 7 - iter 24/41 - loss 0.64705770 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:46:09,481 epoch 7 - iter 28/41 - loss 0.65059799 - samples/sec: 13.65 - lr: 0.020000\n",
-      "2021-09-08 01:46:09,694 epoch 7 - iter 32/41 - loss 0.64861331 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 01:46:10,124 epoch 7 - iter 36/41 - loss 0.65014449 - samples/sec: 9.32 - lr: 0.020000\n",
-      "2021-09-08 01:46:10,423 epoch 7 - iter 40/41 - loss 0.64821771 - samples/sec: 13.38 - lr: 0.020000\n",
-      "2021-09-08 01:46:10,479 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:10,480 EPOCH 7 done: loss 0.6479 - lr 0.0200000\n",
-      "2021-09-08 01:46:10,642 DEV : loss 0.5465486645698547 - score 0.0\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:46:10,643 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:46:10,645 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:10,977 epoch 8 - iter 4/41 - loss 0.63020395 - samples/sec: 13.46 - lr: 0.010000\n",
-      "2021-09-08 01:46:11,306 epoch 8 - iter 8/41 - loss 0.64165522 - samples/sec: 12.19 - lr: 0.010000\n",
-      "2021-09-08 01:46:11,514 epoch 8 - iter 12/41 - loss 0.64660455 - samples/sec: 19.27 - lr: 0.010000\n",
-      "2021-09-08 01:46:11,974 epoch 8 - iter 16/41 - loss 0.64391842 - samples/sec: 8.71 - lr: 0.010000\n",
-      "2021-09-08 01:46:12,163 epoch 8 - iter 20/41 - loss 0.64465350 - samples/sec: 21.32 - lr: 0.010000\n",
-      "2021-09-08 01:46:12,498 epoch 8 - iter 24/41 - loss 0.64555075 - samples/sec: 11.94 - lr: 0.010000\n",
-      "2021-09-08 01:46:12,686 epoch 8 - iter 28/41 - loss 0.64740285 - samples/sec: 21.43 - lr: 0.010000\n",
-      "2021-09-08 01:46:12,945 epoch 8 - iter 32/41 - loss 0.64474519 - samples/sec: 15.48 - lr: 0.010000\n",
-      "2021-09-08 01:46:13,253 epoch 8 - iter 36/41 - loss 0.64515786 - samples/sec: 13.07 - lr: 0.010000\n",
-      "2021-09-08 01:46:13,627 epoch 8 - iter 40/41 - loss 0.64557470 - samples/sec: 10.72 - lr: 0.010000\n",
-      "2021-09-08 01:46:13,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:13,673 EPOCH 8 done: loss 0.6447 - lr 0.0100000\n",
-      "2021-09-08 01:46:13,894 DEV : loss 0.5696865916252136 - score 0.0\n",
-      "2021-09-08 01:46:13,895 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:46:13,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:14,213 epoch 9 - iter 4/41 - loss 0.63220315 - samples/sec: 13.29 - lr: 0.010000\n",
-      "2021-09-08 01:46:14,601 epoch 9 - iter 8/41 - loss 0.63501336 - samples/sec: 10.32 - lr: 0.010000\n",
-      "2021-09-08 01:46:14,889 epoch 9 - iter 12/41 - loss 0.63669434 - samples/sec: 13.95 - lr: 0.010000\n",
-      "2021-09-08 01:46:15,195 epoch 9 - iter 16/41 - loss 0.63587372 - samples/sec: 13.09 - lr: 0.010000\n",
-      "2021-09-08 01:46:15,459 epoch 9 - iter 20/41 - loss 0.63607774 - samples/sec: 15.19 - lr: 0.010000\n",
-      "2021-09-08 01:46:15,814 epoch 9 - iter 24/41 - loss 0.63479788 - samples/sec: 11.31 - lr: 0.010000\n",
-      "2021-09-08 01:46:16,178 epoch 9 - iter 28/41 - loss 0.63432632 - samples/sec: 11.02 - lr: 0.010000\n",
-      "2021-09-08 01:46:16,454 epoch 9 - iter 32/41 - loss 0.63409762 - samples/sec: 14.53 - lr: 0.010000\n",
-      "2021-09-08 01:46:16,696 epoch 9 - iter 36/41 - loss 0.63327704 - samples/sec: 16.55 - lr: 0.010000\n",
-      "2021-09-08 01:46:16,990 epoch 9 - iter 40/41 - loss 0.63238886 - samples/sec: 13.63 - lr: 0.010000\n",
-      "2021-09-08 01:46:17,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:17,036 EPOCH 9 done: loss 0.6324 - lr 0.0100000\n",
-      "2021-09-08 01:46:17,517 DEV : loss 0.5720214247703552 - score 0.0\n",
-      "2021-09-08 01:46:17,518 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:46:17,522 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:17,857 epoch 10 - iter 4/41 - loss 0.61575197 - samples/sec: 12.50 - lr: 0.010000\n",
-      "2021-09-08 01:46:18,112 epoch 10 - iter 8/41 - loss 0.63065141 - samples/sec: 15.74 - lr: 0.010000\n",
-      "2021-09-08 01:46:18,333 epoch 10 - iter 12/41 - loss 0.63852180 - samples/sec: 18.16 - lr: 0.010000\n",
-      "2021-09-08 01:46:18,584 epoch 10 - iter 16/41 - loss 0.63640353 - samples/sec: 15.94 - lr: 0.010000\n",
-      "2021-09-08 01:46:18,865 epoch 10 - iter 20/41 - loss 0.63033566 - samples/sec: 14.29 - lr: 0.010000\n",
-      "2021-09-08 01:46:19,054 epoch 10 - iter 24/41 - loss 0.63109031 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 01:46:19,362 epoch 10 - iter 28/41 - loss 0.63047459 - samples/sec: 13.00 - lr: 0.010000\n",
-      "2021-09-08 01:46:19,724 epoch 10 - iter 32/41 - loss 0.63158685 - samples/sec: 11.07 - lr: 0.010000\n",
-      "2021-09-08 01:46:19,957 epoch 10 - iter 36/41 - loss 0.63173217 - samples/sec: 17.29 - lr: 0.010000\n",
-      "2021-09-08 01:46:20,398 epoch 10 - iter 40/41 - loss 0.63192034 - samples/sec: 9.08 - lr: 0.010000\n",
-      "2021-09-08 01:46:20,470 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:20,470 EPOCH 10 done: loss 0.6318 - lr 0.0100000\n",
-      "2021-09-08 01:46:20,649 DEV : loss 0.5442641973495483 - score 0.0\n",
-      "2021-09-08 01:46:20,650 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:46:24,495 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:46:24,496 Testing using best model ...\n",
-      "2021-09-08 01:46:24,520 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:18:26,942 EPOCH 5 done: loss 0.6402 - lr 0.0200000\n",
+      "2021-09-21 21:18:27,260 DEV : loss 1.1782851219177246 - score 0.0\n",
+      "2021-09-21 21:18:27,261 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:18:27,333 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:27,641 epoch 6 - iter 4/41 - loss 0.80301968 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:18:27,988 epoch 6 - iter 8/41 - loss 0.81731492 - samples/sec: 11.52 - lr: 0.020000\n",
+      "2021-09-21 21:18:28,283 epoch 6 - iter 12/41 - loss 0.57928919 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 21:18:28,708 epoch 6 - iter 16/41 - loss 0.48428706 - samples/sec: 9.43 - lr: 0.020000\n",
+      "2021-09-21 21:18:28,968 epoch 6 - iter 20/41 - loss 0.45083410 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 21:18:29,453 epoch 6 - iter 24/41 - loss 0.44366400 - samples/sec: 8.26 - lr: 0.020000\n",
+      "2021-09-21 21:18:29,817 epoch 6 - iter 28/41 - loss 0.44122847 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 21:18:30,467 epoch 6 - iter 32/41 - loss 0.46242292 - samples/sec: 6.16 - lr: 0.020000\n",
+      "2021-09-21 21:18:30,953 epoch 6 - iter 36/41 - loss 0.41847110 - samples/sec: 8.23 - lr: 0.020000\n",
+      "2021-09-21 21:18:31,315 epoch 6 - iter 40/41 - loss 0.42300437 - samples/sec: 11.08 - lr: 0.020000\n",
+      "2021-09-21 21:18:31,608 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:31,609 EPOCH 6 done: loss 0.4357 - lr 0.0200000\n",
+      "2021-09-21 21:18:34,763 DEV : loss 1.4211915731430054 - score 0.25\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:18:34,764 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:18:34,768 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:35,192 epoch 7 - iter 4/41 - loss 0.65582414 - samples/sec: 10.00 - lr: 0.010000\n",
+      "2021-09-21 21:18:35,755 epoch 7 - iter 8/41 - loss 0.78554107 - samples/sec: 7.12 - lr: 0.010000\n",
+      "2021-09-21 21:18:36,094 epoch 7 - iter 12/41 - loss 0.77179727 - samples/sec: 11.80 - lr: 0.010000\n",
+      "2021-09-21 21:18:36,619 epoch 7 - iter 16/41 - loss 0.72231502 - samples/sec: 7.63 - lr: 0.010000\n",
+      "2021-09-21 21:18:37,120 epoch 7 - iter 20/41 - loss 0.74295066 - samples/sec: 7.99 - lr: 0.010000\n",
+      "2021-09-21 21:18:37,422 epoch 7 - iter 24/41 - loss 0.64426016 - samples/sec: 13.28 - lr: 0.010000\n",
+      "2021-09-21 21:18:37,765 epoch 7 - iter 28/41 - loss 0.59069083 - samples/sec: 11.67 - lr: 0.010000\n",
+      "2021-09-21 21:18:38,160 epoch 7 - iter 32/41 - loss 0.53178262 - samples/sec: 10.15 - lr: 0.010000\n",
+      "2021-09-21 21:18:38,526 epoch 7 - iter 36/41 - loss 0.53102356 - samples/sec: 10.94 - lr: 0.010000\n",
+      "2021-09-21 21:18:39,044 epoch 7 - iter 40/41 - loss 0.52797546 - samples/sec: 7.73 - lr: 0.010000\n",
+      "2021-09-21 21:18:39,196 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:39,196 EPOCH 7 done: loss 0.5210 - lr 0.0100000\n",
+      "2021-09-21 21:18:39,615 DEV : loss 1.1940869092941284 - score 0.25\n",
+      "2021-09-21 21:18:39,618 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:18:39,620 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:40,070 epoch 8 - iter 4/41 - loss 0.04795344 - samples/sec: 10.06 - lr: 0.010000\n",
+      "2021-09-21 21:18:40,584 epoch 8 - iter 8/41 - loss 0.29342288 - samples/sec: 7.78 - lr: 0.010000\n",
+      "2021-09-21 21:18:41,186 epoch 8 - iter 12/41 - loss 0.34049542 - samples/sec: 6.66 - lr: 0.010000\n",
+      "2021-09-21 21:18:41,729 epoch 8 - iter 16/41 - loss 0.25886870 - samples/sec: 7.37 - lr: 0.010000\n",
+      "2021-09-21 21:18:42,545 epoch 8 - iter 20/41 - loss 0.23992611 - samples/sec: 4.90 - lr: 0.010000\n",
+      "2021-09-21 21:18:43,109 epoch 8 - iter 24/41 - loss 0.24149270 - samples/sec: 7.10 - lr: 0.010000\n",
+      "2021-09-21 21:18:43,586 epoch 8 - iter 28/41 - loss 0.25679776 - samples/sec: 8.40 - lr: 0.010000\n",
+      "2021-09-21 21:18:44,145 epoch 8 - iter 32/41 - loss 0.27478126 - samples/sec: 7.16 - lr: 0.010000\n",
+      "2021-09-21 21:18:44,830 epoch 8 - iter 36/41 - loss 0.31467570 - samples/sec: 5.85 - lr: 0.010000\n",
+      "2021-09-21 21:18:45,230 epoch 8 - iter 40/41 - loss 0.31625986 - samples/sec: 10.03 - lr: 0.010000\n",
+      "2021-09-21 21:18:45,586 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:45,587 EPOCH 8 done: loss 0.3158 - lr 0.0100000\n",
+      "2021-09-21 21:18:45,939 DEV : loss 1.1864736080169678 - score 0.25\n",
+      "2021-09-21 21:18:45,941 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:18:45,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:46,296 epoch 9 - iter 4/41 - loss 0.17336981 - samples/sec: 12.30 - lr: 0.010000\n",
+      "2021-09-21 21:18:46,772 epoch 9 - iter 8/41 - loss 0.37514000 - samples/sec: 8.41 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,102 epoch 9 - iter 12/41 - loss 0.32833028 - samples/sec: 12.13 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,512 epoch 9 - iter 16/41 - loss 0.37321689 - samples/sec: 9.78 - lr: 0.010000\n",
+      "2021-09-21 21:18:47,927 epoch 9 - iter 20/41 - loss 0.35929670 - samples/sec: 9.66 - lr: 0.010000\n",
+      "2021-09-21 21:18:48,340 epoch 9 - iter 24/41 - loss 0.32985187 - samples/sec: 9.70 - lr: 0.010000\n",
+      "2021-09-21 21:18:48,634 epoch 9 - iter 28/41 - loss 0.38696106 - samples/sec: 13.61 - lr: 0.010000\n",
+      "2021-09-21 21:18:49,147 epoch 9 - iter 32/41 - loss 0.39851753 - samples/sec: 7.80 - lr: 0.010000\n",
+      "2021-09-21 21:18:49,375 epoch 9 - iter 36/41 - loss 0.38190853 - samples/sec: 17.62 - lr: 0.010000\n",
+      "2021-09-21 21:18:49,631 epoch 9 - iter 40/41 - loss 0.39800442 - samples/sec: 15.65 - lr: 0.010000\n",
+      "2021-09-21 21:18:49,680 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:49,680 EPOCH 9 done: loss 0.4018 - lr 0.0100000\n",
+      "2021-09-21 21:18:50,531 DEV : loss 1.1261473894119263 - score 0.0\n",
+      "2021-09-21 21:18:50,532 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:18:50,602 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:51,103 epoch 10 - iter 4/41 - loss 0.27550022 - samples/sec: 8.27 - lr: 0.010000\n",
+      "2021-09-21 21:18:51,422 epoch 10 - iter 8/41 - loss 0.17862163 - samples/sec: 12.58 - lr: 0.010000\n",
+      "2021-09-21 21:18:51,722 epoch 10 - iter 12/41 - loss 0.22900838 - samples/sec: 13.35 - lr: 0.010000\n",
+      "2021-09-21 21:18:52,032 epoch 10 - iter 16/41 - loss 0.23512246 - samples/sec: 12.93 - lr: 0.010000\n",
+      "2021-09-21 21:18:52,376 epoch 10 - iter 20/41 - loss 0.22427232 - samples/sec: 11.63 - lr: 0.010000\n",
+      "2021-09-21 21:18:52,682 epoch 10 - iter 24/41 - loss 0.20643613 - samples/sec: 13.12 - lr: 0.010000\n",
+      "2021-09-21 21:18:52,971 epoch 10 - iter 28/41 - loss 0.21250700 - samples/sec: 13.89 - lr: 0.010000\n",
+      "2021-09-21 21:18:53,501 epoch 10 - iter 32/41 - loss 0.24854369 - samples/sec: 7.54 - lr: 0.010000\n",
+      "2021-09-21 21:18:53,817 epoch 10 - iter 36/41 - loss 0.23819388 - samples/sec: 12.71 - lr: 0.010000\n",
+      "2021-09-21 21:18:54,061 epoch 10 - iter 40/41 - loss 0.22946589 - samples/sec: 16.43 - lr: 0.010000\n",
+      "2021-09-21 21:18:54,125 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:18:54,125 EPOCH 10 done: loss 0.2272 - lr 0.0100000\n",
+      "2021-09-21 21:18:54,355 DEV : loss 1.3537718057632446 - score 0.25\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:18:54,357 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:19:03,628 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:19:03,629 Testing using best model ...\n",
+      "2021-09-21 21:19:03,630 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:46:33,048 \t0.2\n",
-      "2021-09-08 01:46:33,049 \n",
+      "2021-09-21 21:19:13,811 \t0.6\n",
+      "2021-09-21 21:19:13,811 \n",
       "Results:\n",
-      "- F-score (micro) 0.2\n",
-      "- F-score (macro) 0.2\n",
-      "- Accuracy 0.2\n",
+      "- F-score (micro) 0.6\n",
+      "- F-score (macro) 0.2667\n",
+      "- Accuracy 0.6\n",
       "\n",
       "By class:\n",
       "                                          precision    recall  f1-score   support\n",
       "\n",
-      "  The product has been reviewed as awful     0.0000    0.0000    0.0000         2\n",
+      "  The product has been reviewed as awful     0.0000    0.0000    0.0000         0\n",
       "    The product has been reviewed as bad     0.0000    0.0000    0.0000         0\n",
-      "The product has been reviewed as neutral     1.0000    1.0000    1.0000         1\n",
+      "The product has been reviewed as neutral     0.5000    1.0000    0.6667         2\n",
       "   The product has been reviewed as good     0.0000    0.0000    0.0000         1\n",
-      "  The product has been reviewed as great     0.0000    0.0000    0.0000         1\n",
+      "  The product has been reviewed as great     1.0000    0.5000    0.6667         2\n",
       "\n",
-      "                               micro avg     0.2000    0.2000    0.2000         5\n",
-      "                               macro avg     0.2000    0.2000    0.2000         5\n",
-      "                            weighted avg     0.2000    0.2000    0.2000         5\n",
-      "                             samples avg     0.2000    0.2000    0.2000         5\n",
-      "\n",
-      "2021-09-08 01:46:33,049 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:20,929 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "                               micro avg     0.6000    0.6000    0.6000         5\n",
+      "                               macro avg     0.3000    0.3000    0.2667         5\n",
+      "                            weighted avg     0.6000    0.6000    0.5333         5\n",
+      "                             samples avg     0.6000    0.6000    0.6000         5\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:19:13,812 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:16,741 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:47:25,230 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:20:21,220 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 18394.32it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 16937.76it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:25,234 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 01:47:25,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:25,405 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:20:21,225 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
+      "2021-09-21 21:20:21,236 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:21,239 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4353,24 +4349,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:25,406 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:25,406 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:47:25,407 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:25,407 Parameters:\n",
-      "2021-09-08 01:47:25,407  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:47:25,408  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:47:25,408  - patience: \"3\"\n",
-      "2021-09-08 01:47:25,408  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:47:25,409  - max_epochs: \"10\"\n",
-      "2021-09-08 01:47:25,409  - shuffle: \"True\"\n",
-      "2021-09-08 01:47:25,409  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:47:25,409  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:47:25,410 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:25,410 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:47:25,410 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:25,411 Device: cuda:0\n",
-      "2021-09-08 01:47:25,411 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:25,411 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:20:21,239 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:21,240 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:20:21,240 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:21,241 Parameters:\n",
+      "2021-09-21 21:20:21,241  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:20:21,242  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:20:21,242  - patience: \"3\"\n",
+      "2021-09-21 21:20:21,243  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:20:21,243  - max_epochs: \"10\"\n",
+      "2021-09-21 21:20:21,244  - shuffle: \"True\"\n",
+      "2021-09-21 21:20:21,244  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:20:21,244  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:20:21,245 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:21,246 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:20:21,246 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:21,247 Device: cuda:0\n",
+      "2021-09-21 21:20:21,247 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:21,248 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:20:21,256 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4384,211 +4381,212 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:25,611 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:25,838 epoch 1 - iter 4/41 - loss 0.60396726 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,227 epoch 1 - iter 8/41 - loss 0.51197192 - samples/sec: 10.30 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,437 epoch 1 - iter 12/41 - loss 0.57840957 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,682 epoch 1 - iter 16/41 - loss 0.61287940 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:47:26,916 epoch 1 - iter 20/41 - loss 0.60815622 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,177 epoch 1 - iter 24/41 - loss 0.59708124 - samples/sec: 15.39 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,456 epoch 1 - iter 28/41 - loss 0.68183997 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 01:47:27,736 epoch 1 - iter 32/41 - loss 0.67058210 - samples/sec: 14.30 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,072 epoch 1 - iter 36/41 - loss 0.65794493 - samples/sec: 11.95 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,308 epoch 1 - iter 40/41 - loss 0.63057772 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 01:47:28,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:28,383 EPOCH 1 done: loss 0.6179 - lr 0.0200000\n",
-      "2021-09-08 01:47:29,163 DEV : loss 1.3859672546386719 - score 0.0\n",
-      "2021-09-08 01:47:29,164 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:20:21,688 epoch 1 - iter 4/41 - loss 0.88187048 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,068 epoch 1 - iter 8/41 - loss 0.72865379 - samples/sec: 10.55 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,453 epoch 1 - iter 12/41 - loss 0.69159442 - samples/sec: 10.41 - lr: 0.020000\n",
+      "2021-09-21 21:20:22,802 epoch 1 - iter 16/41 - loss 0.72921135 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:20:23,105 epoch 1 - iter 20/41 - loss 0.71767622 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 21:20:23,359 epoch 1 - iter 24/41 - loss 0.70633546 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 21:20:23,832 epoch 1 - iter 28/41 - loss 0.72053765 - samples/sec: 8.48 - lr: 0.020000\n",
+      "2021-09-21 21:20:24,271 epoch 1 - iter 32/41 - loss 0.71872572 - samples/sec: 9.12 - lr: 0.020000\n",
+      "2021-09-21 21:20:24,660 epoch 1 - iter 36/41 - loss 0.70546989 - samples/sec: 10.30 - lr: 0.020000\n",
+      "2021-09-21 21:20:24,894 epoch 1 - iter 40/41 - loss 0.70278607 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 21:20:24,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:24,954 EPOCH 1 done: loss 0.7018 - lr 0.0200000\n",
+      "2021-09-21 21:20:25,178 DEV : loss 0.5073000192642212 - score 0.25\n",
+      "2021-09-21 21:20:25,179 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:20:29,544 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:29,870 epoch 2 - iter 4/41 - loss 0.62727830 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 21:20:30,123 epoch 2 - iter 8/41 - loss 0.66011904 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 21:20:30,654 epoch 2 - iter 12/41 - loss 0.65299191 - samples/sec: 7.54 - lr: 0.020000\n",
+      "2021-09-21 21:20:30,945 epoch 2 - iter 16/41 - loss 0.65645882 - samples/sec: 13.79 - lr: 0.020000\n",
+      "2021-09-21 21:20:31,370 epoch 2 - iter 20/41 - loss 0.65683461 - samples/sec: 9.42 - lr: 0.020000\n",
+      "2021-09-21 21:20:31,623 epoch 2 - iter 24/41 - loss 0.65683050 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:20:31,865 epoch 2 - iter 28/41 - loss 0.66114848 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 21:20:32,122 epoch 2 - iter 32/41 - loss 0.66189786 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 21:20:32,692 epoch 2 - iter 36/41 - loss 0.66165607 - samples/sec: 7.03 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,024 epoch 2 - iter 40/41 - loss 0.66107434 - samples/sec: 12.09 - lr: 0.020000\n",
+      "2021-09-21 21:20:33,087 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:33,088 EPOCH 2 done: loss 0.6604 - lr 0.0200000\n",
+      "2021-09-21 21:20:33,221 DEV : loss 0.44767433404922485 - score 0.5\n",
+      "2021-09-21 21:20:33,222 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:20:37,014 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:37,702 epoch 3 - iter 4/41 - loss 0.65553589 - samples/sec: 6.11 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,058 epoch 3 - iter 8/41 - loss 0.63560788 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,535 epoch 3 - iter 12/41 - loss 0.64420054 - samples/sec: 8.40 - lr: 0.020000\n",
+      "2021-09-21 21:20:38,911 epoch 3 - iter 16/41 - loss 0.61456053 - samples/sec: 10.64 - lr: 0.020000\n",
+      "2021-09-21 21:20:39,272 epoch 3 - iter 20/41 - loss 0.62481491 - samples/sec: 11.10 - lr: 0.020000\n",
+      "2021-09-21 21:20:39,793 epoch 3 - iter 24/41 - loss 0.62441882 - samples/sec: 7.68 - lr: 0.020000\n",
+      "2021-09-21 21:20:40,220 epoch 3 - iter 28/41 - loss 0.61364023 - samples/sec: 9.40 - lr: 0.020000\n",
+      "2021-09-21 21:20:40,519 epoch 3 - iter 32/41 - loss 0.62104118 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 21:20:41,037 epoch 3 - iter 36/41 - loss 0.62383785 - samples/sec: 7.72 - lr: 0.020000\n",
+      "2021-09-21 21:20:41,269 epoch 3 - iter 40/41 - loss 0.62802869 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 21:20:41,352 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:41,353 EPOCH 3 done: loss 0.6291 - lr 0.0200000\n",
+      "2021-09-21 21:20:41,590 DEV : loss 0.4138459265232086 - score 0.5\n",
+      "2021-09-21 21:20:41,593 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:47:37,859 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:38,142 epoch 2 - iter 4/41 - loss 0.76039764 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,451 epoch 2 - iter 8/41 - loss 0.92715098 - samples/sec: 12.97 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,720 epoch 2 - iter 12/41 - loss 0.80924282 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 01:47:38,985 epoch 2 - iter 16/41 - loss 0.73566303 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,213 epoch 2 - iter 20/41 - loss 0.68678250 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,504 epoch 2 - iter 24/41 - loss 0.65750552 - samples/sec: 13.76 - lr: 0.020000\n",
-      "2021-09-08 01:47:39,771 epoch 2 - iter 28/41 - loss 0.70397406 - samples/sec: 15.01 - lr: 0.020000\n",
-      "2021-09-08 01:47:40,056 epoch 2 - iter 32/41 - loss 0.66892495 - samples/sec: 14.08 - lr: 0.020000\n",
-      "2021-09-08 01:47:40,268 epoch 2 - iter 36/41 - loss 0.69956948 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 01:47:40,584 epoch 2 - iter 40/41 - loss 0.69441169 - samples/sec: 12.70 - lr: 0.020000\n",
-      "2021-09-08 01:47:40,645 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:40,645 EPOCH 2 done: loss 0.6998 - lr 0.0200000\n",
-      "2021-09-08 01:47:40,797 DEV : loss 0.3835522532463074 - score 0.25\n",
-      "2021-09-08 01:47:40,798 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:20:46,085 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:46,696 epoch 4 - iter 4/41 - loss 0.58939070 - samples/sec: 7.02 - lr: 0.020000\n",
+      "2021-09-21 21:20:47,009 epoch 4 - iter 8/41 - loss 0.57959830 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 21:20:47,438 epoch 4 - iter 12/41 - loss 0.58568318 - samples/sec: 9.35 - lr: 0.020000\n",
+      "2021-09-21 21:20:47,718 epoch 4 - iter 16/41 - loss 0.59941765 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,039 epoch 4 - iter 20/41 - loss 0.62862034 - samples/sec: 12.49 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,344 epoch 4 - iter 24/41 - loss 0.61642005 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,643 epoch 4 - iter 28/41 - loss 0.60925669 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 21:20:48,967 epoch 4 - iter 32/41 - loss 0.58888420 - samples/sec: 12.37 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,379 epoch 4 - iter 36/41 - loss 0.60841214 - samples/sec: 9.73 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,869 epoch 4 - iter 40/41 - loss 0.61994450 - samples/sec: 8.17 - lr: 0.020000\n",
+      "2021-09-21 21:20:49,949 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:20:49,949 EPOCH 4 done: loss 0.6182 - lr 0.0200000\n",
+      "2021-09-21 21:20:50,551 DEV : loss 0.36229026317596436 - score 0.5\n",
+      "2021-09-21 21:20:50,552 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:47:44,426 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:44,717 epoch 3 - iter 4/41 - loss 0.43298632 - samples/sec: 14.60 - lr: 0.020000\n",
-      "2021-09-08 01:47:44,924 epoch 3 - iter 8/41 - loss 0.43423850 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 01:47:45,200 epoch 3 - iter 12/41 - loss 0.48611603 - samples/sec: 14.52 - lr: 0.020000\n",
-      "2021-09-08 01:47:45,454 epoch 3 - iter 16/41 - loss 0.52036625 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 01:47:45,720 epoch 3 - iter 20/41 - loss 0.56110927 - samples/sec: 15.09 - lr: 0.020000\n",
-      "2021-09-08 01:47:46,021 epoch 3 - iter 24/41 - loss 0.54941801 - samples/sec: 13.32 - lr: 0.020000\n",
-      "2021-09-08 01:47:46,266 epoch 3 - iter 28/41 - loss 0.53629177 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 01:47:46,682 epoch 3 - iter 32/41 - loss 0.57393470 - samples/sec: 9.62 - lr: 0.020000\n",
-      "2021-09-08 01:47:47,061 epoch 3 - iter 36/41 - loss 0.56469820 - samples/sec: 10.59 - lr: 0.020000\n",
-      "2021-09-08 01:47:47,293 epoch 3 - iter 40/41 - loss 0.54711084 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 01:47:47,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:47,353 EPOCH 3 done: loss 0.5848 - lr 0.0200000\n",
-      "2021-09-08 01:47:47,616 DEV : loss 0.6600953340530396 - score 0.0\n",
-      "2021-09-08 01:47:47,617 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:47:47,687 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:48,010 epoch 4 - iter 4/41 - loss 0.42040732 - samples/sec: 13.09 - lr: 0.020000\n",
-      "2021-09-08 01:47:48,362 epoch 4 - iter 8/41 - loss 0.43680503 - samples/sec: 11.39 - lr: 0.020000\n",
-      "2021-09-08 01:47:48,573 epoch 4 - iter 12/41 - loss 0.49569867 - samples/sec: 18.99 - lr: 0.020000\n",
-      "2021-09-08 01:47:48,847 epoch 4 - iter 16/41 - loss 0.49055018 - samples/sec: 14.63 - lr: 0.020000\n",
-      "2021-09-08 01:47:49,055 epoch 4 - iter 20/41 - loss 0.46513357 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:47:49,289 epoch 4 - iter 24/41 - loss 0.45357586 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 01:47:49,650 epoch 4 - iter 28/41 - loss 0.44839260 - samples/sec: 11.10 - lr: 0.020000\n",
-      "2021-09-08 01:47:49,872 epoch 4 - iter 32/41 - loss 0.46223730 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 01:47:50,150 epoch 4 - iter 36/41 - loss 0.44717780 - samples/sec: 14.42 - lr: 0.020000\n",
-      "2021-09-08 01:47:50,440 epoch 4 - iter 40/41 - loss 0.47050702 - samples/sec: 13.86 - lr: 0.020000\n",
-      "2021-09-08 01:47:50,495 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:50,496 EPOCH 4 done: loss 0.4818 - lr 0.0200000\n",
-      "2021-09-08 01:47:50,653 DEV : loss 0.4916534721851349 - score 0.25\n",
-      "2021-09-08 01:47:50,655 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:47:50,663 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:50,905 epoch 5 - iter 4/41 - loss 0.66980360 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 01:47:51,109 epoch 5 - iter 8/41 - loss 0.63984775 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 01:47:51,383 epoch 5 - iter 12/41 - loss 0.50215138 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 01:47:51,592 epoch 5 - iter 16/41 - loss 0.41502229 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 01:47:51,927 epoch 5 - iter 20/41 - loss 0.51277976 - samples/sec: 11.95 - lr: 0.020000\n",
-      "2021-09-08 01:47:52,200 epoch 5 - iter 24/41 - loss 0.56193265 - samples/sec: 14.67 - lr: 0.020000\n",
-      "2021-09-08 01:47:52,503 epoch 5 - iter 28/41 - loss 0.53005161 - samples/sec: 13.24 - lr: 0.020000\n",
-      "2021-09-08 01:47:52,788 epoch 5 - iter 32/41 - loss 0.48724078 - samples/sec: 14.08 - lr: 0.020000\n",
-      "2021-09-08 01:47:52,976 epoch 5 - iter 36/41 - loss 0.46662888 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 01:47:53,361 epoch 5 - iter 40/41 - loss 0.53189489 - samples/sec: 10.39 - lr: 0.020000\n",
-      "2021-09-08 01:47:53,413 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:53,413 EPOCH 5 done: loss 0.5239 - lr 0.0200000\n",
-      "2021-09-08 01:47:53,548 DEV : loss 0.559822678565979 - score 0.25\n",
-      "2021-09-08 01:47:53,551 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:47:53,552 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:53,814 epoch 6 - iter 4/41 - loss 0.17689203 - samples/sec: 16.26 - lr: 0.020000\n",
-      "2021-09-08 01:47:54,012 epoch 6 - iter 8/41 - loss 0.15461222 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:47:54,275 epoch 6 - iter 12/41 - loss 0.30884202 - samples/sec: 15.27 - lr: 0.020000\n",
-      "2021-09-08 01:47:54,514 epoch 6 - iter 16/41 - loss 0.29054538 - samples/sec: 16.77 - lr: 0.020000\n",
-      "2021-09-08 01:47:54,821 epoch 6 - iter 20/41 - loss 0.43085169 - samples/sec: 13.04 - lr: 0.020000\n",
-      "2021-09-08 01:47:55,074 epoch 6 - iter 24/41 - loss 0.39723104 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 01:47:55,378 epoch 6 - iter 28/41 - loss 0.43697658 - samples/sec: 13.22 - lr: 0.020000\n",
-      "2021-09-08 01:47:55,579 epoch 6 - iter 32/41 - loss 0.42924183 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 01:47:55,850 epoch 6 - iter 36/41 - loss 0.42050234 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 01:47:56,144 epoch 6 - iter 40/41 - loss 0.41096564 - samples/sec: 13.64 - lr: 0.020000\n"
+      "2021-09-21 21:20:59,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:00,397 epoch 5 - iter 4/41 - loss 0.50369682 - samples/sec: 9.38 - lr: 0.020000\n",
+      "2021-09-21 21:21:00,918 epoch 5 - iter 8/41 - loss 0.46224156 - samples/sec: 7.69 - lr: 0.020000\n",
+      "2021-09-21 21:21:01,208 epoch 5 - iter 12/41 - loss 0.50479322 - samples/sec: 13.82 - lr: 0.020000\n",
+      "2021-09-21 21:21:01,812 epoch 5 - iter 16/41 - loss 0.50055526 - samples/sec: 6.63 - lr: 0.020000\n",
+      "2021-09-21 21:21:02,131 epoch 5 - iter 20/41 - loss 0.45863664 - samples/sec: 12.56 - lr: 0.020000\n",
+      "2021-09-21 21:21:02,513 epoch 5 - iter 24/41 - loss 0.52991623 - samples/sec: 10.49 - lr: 0.020000\n",
+      "2021-09-21 21:21:02,860 epoch 5 - iter 28/41 - loss 0.54404401 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 21:21:03,183 epoch 5 - iter 32/41 - loss 0.55846969 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 21:21:03,537 epoch 5 - iter 36/41 - loss 0.56774458 - samples/sec: 11.32 - lr: 0.020000\n",
+      "2021-09-21 21:21:03,834 epoch 5 - iter 40/41 - loss 0.57780037 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 21:21:03,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:03,940 EPOCH 5 done: loss 0.5854 - lr 0.0200000\n",
+      "2021-09-21 21:21:04,189 DEV : loss 0.3848566710948944 - score 0.5\n",
+      "2021-09-21 21:21:04,190 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:21:04,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:04,729 epoch 6 - iter 4/41 - loss 0.65215319 - samples/sec: 9.54 - lr: 0.020000\n",
+      "2021-09-21 21:21:05,009 epoch 6 - iter 8/41 - loss 0.57185857 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 21:21:05,279 epoch 6 - iter 12/41 - loss 0.55600579 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:21:05,688 epoch 6 - iter 16/41 - loss 0.58627738 - samples/sec: 9.81 - lr: 0.020000\n",
+      "2021-09-21 21:21:06,050 epoch 6 - iter 20/41 - loss 0.58572971 - samples/sec: 11.06 - lr: 0.020000\n",
+      "2021-09-21 21:21:06,468 epoch 6 - iter 24/41 - loss 0.56753696 - samples/sec: 9.59 - lr: 0.020000\n",
+      "2021-09-21 21:21:06,736 epoch 6 - iter 28/41 - loss 0.58334015 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 21:21:07,089 epoch 6 - iter 32/41 - loss 0.57009781 - samples/sec: 11.35 - lr: 0.020000\n",
+      "2021-09-21 21:21:07,468 epoch 6 - iter 36/41 - loss 0.54800086 - samples/sec: 10.57 - lr: 0.020000\n",
+      "2021-09-21 21:21:07,864 epoch 6 - iter 40/41 - loss 0.52687145 - samples/sec: 10.12 - lr: 0.020000\n",
+      "2021-09-21 21:21:07,944 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:47:56,229 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:56,230 EPOCH 6 done: loss 0.4168 - lr 0.0200000\n",
-      "2021-09-08 01:47:56,378 DEV : loss 0.6307583451271057 - score 0.25\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:47:56,379 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:47:56,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:56,735 epoch 7 - iter 4/41 - loss 0.21342656 - samples/sec: 11.74 - lr: 0.010000\n",
-      "2021-09-08 01:47:56,935 epoch 7 - iter 8/41 - loss 0.33180951 - samples/sec: 20.02 - lr: 0.010000\n",
-      "2021-09-08 01:47:57,260 epoch 7 - iter 12/41 - loss 0.29861451 - samples/sec: 12.36 - lr: 0.010000\n",
-      "2021-09-08 01:47:57,477 epoch 7 - iter 16/41 - loss 0.40134468 - samples/sec: 18.44 - lr: 0.010000\n",
-      "2021-09-08 01:47:57,776 epoch 7 - iter 20/41 - loss 0.33929736 - samples/sec: 13.42 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,003 epoch 7 - iter 24/41 - loss 0.37676719 - samples/sec: 17.66 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,222 epoch 7 - iter 28/41 - loss 0.41177646 - samples/sec: 18.33 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,442 epoch 7 - iter 32/41 - loss 0.39277732 - samples/sec: 18.25 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,647 epoch 7 - iter 36/41 - loss 0.37252237 - samples/sec: 19.56 - lr: 0.010000\n",
-      "2021-09-08 01:47:58,997 epoch 7 - iter 40/41 - loss 0.39307355 - samples/sec: 11.42 - lr: 0.010000\n",
-      "2021-09-08 01:47:59,056 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:59,057 EPOCH 7 done: loss 0.3890 - lr 0.0100000\n",
-      "2021-09-08 01:47:59,206 DEV : loss 0.5761427879333496 - score 0.25\n",
-      "2021-09-08 01:47:59,209 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:47:59,211 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:47:59,494 epoch 8 - iter 4/41 - loss 0.07412882 - samples/sec: 15.28 - lr: 0.010000\n",
-      "2021-09-08 01:47:59,828 epoch 8 - iter 8/41 - loss 0.15395475 - samples/sec: 12.00 - lr: 0.010000\n",
-      "2021-09-08 01:48:00,045 epoch 8 - iter 12/41 - loss 0.26148176 - samples/sec: 18.49 - lr: 0.010000\n",
-      "2021-09-08 01:48:00,367 epoch 8 - iter 16/41 - loss 0.30163098 - samples/sec: 12.43 - lr: 0.010000\n",
-      "2021-09-08 01:48:00,568 epoch 8 - iter 20/41 - loss 0.36512156 - samples/sec: 19.97 - lr: 0.010000\n",
-      "2021-09-08 01:48:00,919 epoch 8 - iter 24/41 - loss 0.41183758 - samples/sec: 11.42 - lr: 0.010000\n",
-      "2021-09-08 01:48:01,116 epoch 8 - iter 28/41 - loss 0.37393620 - samples/sec: 20.37 - lr: 0.010000\n",
-      "2021-09-08 01:48:01,458 epoch 8 - iter 32/41 - loss 0.36564688 - samples/sec: 11.71 - lr: 0.010000\n",
-      "2021-09-08 01:48:01,674 epoch 8 - iter 36/41 - loss 0.32966444 - samples/sec: 18.56 - lr: 0.010000\n",
-      "2021-09-08 01:48:01,892 epoch 8 - iter 40/41 - loss 0.30196639 - samples/sec: 18.44 - lr: 0.010000\n",
-      "2021-09-08 01:48:01,940 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:01,940 EPOCH 8 done: loss 0.2947 - lr 0.0100000\n",
-      "2021-09-08 01:48:02,143 DEV : loss 0.6519190669059753 - score 0.25\n",
-      "2021-09-08 01:48:02,144 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:48:02,147 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:02,506 epoch 9 - iter 4/41 - loss 0.11794205 - samples/sec: 11.68 - lr: 0.010000\n",
-      "2021-09-08 01:48:02,818 epoch 9 - iter 8/41 - loss 0.31656586 - samples/sec: 12.86 - lr: 0.010000\n",
-      "2021-09-08 01:48:03,042 epoch 9 - iter 12/41 - loss 0.21870918 - samples/sec: 17.86 - lr: 0.010000\n",
-      "2021-09-08 01:48:03,280 epoch 9 - iter 16/41 - loss 0.34668230 - samples/sec: 16.84 - lr: 0.010000\n",
-      "2021-09-08 01:48:03,511 epoch 9 - iter 20/41 - loss 0.34733626 - samples/sec: 17.38 - lr: 0.010000\n",
-      "2021-09-08 01:48:03,741 epoch 9 - iter 24/41 - loss 0.31844466 - samples/sec: 17.45 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,020 epoch 9 - iter 28/41 - loss 0.28866431 - samples/sec: 14.35 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,219 epoch 9 - iter 32/41 - loss 0.29791790 - samples/sec: 20.17 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,536 epoch 9 - iter 36/41 - loss 0.32071680 - samples/sec: 12.62 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,767 epoch 9 - iter 40/41 - loss 0.28932293 - samples/sec: 17.34 - lr: 0.010000\n",
-      "2021-09-08 01:48:04,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:04,883 EPOCH 9 done: loss 0.2826 - lr 0.0100000\n",
-      "2021-09-08 01:48:05,050 DEV : loss 0.7485843896865845 - score 0.25\n",
-      "2021-09-08 01:48:05,053 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:48:05,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:05,359 epoch 10 - iter 4/41 - loss 0.68463428 - samples/sec: 14.49 - lr: 0.010000\n",
-      "2021-09-08 01:48:05,617 epoch 10 - iter 8/41 - loss 0.48317582 - samples/sec: 15.55 - lr: 0.010000\n",
-      "2021-09-08 01:48:05,855 epoch 10 - iter 12/41 - loss 0.33865803 - samples/sec: 16.88 - lr: 0.010000\n",
-      "2021-09-08 01:48:06,077 epoch 10 - iter 16/41 - loss 0.29082145 - samples/sec: 18.03 - lr: 0.010000\n",
-      "2021-09-08 01:48:06,318 epoch 10 - iter 20/41 - loss 0.24672062 - samples/sec: 16.60 - lr: 0.010000\n",
-      "2021-09-08 01:48:06,565 epoch 10 - iter 24/41 - loss 0.23524494 - samples/sec: 16.29 - lr: 0.010000\n",
-      "2021-09-08 01:48:06,848 epoch 10 - iter 28/41 - loss 0.20292548 - samples/sec: 14.16 - lr: 0.010000\n",
-      "2021-09-08 01:48:07,073 epoch 10 - iter 32/41 - loss 0.19931341 - samples/sec: 17.76 - lr: 0.010000\n",
-      "2021-09-08 01:48:07,313 epoch 10 - iter 36/41 - loss 0.19335057 - samples/sec: 16.72 - lr: 0.010000\n",
-      "2021-09-08 01:48:07,667 epoch 10 - iter 40/41 - loss 0.19896132 - samples/sec: 11.32 - lr: 0.010000\n",
-      "2021-09-08 01:48:07,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:07,737 EPOCH 10 done: loss 0.1941 - lr 0.0100000\n",
-      "2021-09-08 01:48:07,996 DEV : loss 0.5117360949516296 - score 0.5\n",
-      "2021-09-08 01:48:07,997 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:21:07,944 EPOCH 6 done: loss 0.5182 - lr 0.0200000\n",
+      "2021-09-21 21:21:08,107 DEV : loss 0.5269594192504883 - score 0.25\n",
+      "2021-09-21 21:21:08,108 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:21:08,110 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:08,554 epoch 7 - iter 4/41 - loss 0.62201855 - samples/sec: 9.87 - lr: 0.020000\n",
+      "2021-09-21 21:21:09,114 epoch 7 - iter 8/41 - loss 0.57361269 - samples/sec: 7.15 - lr: 0.020000\n",
+      "2021-09-21 21:21:09,544 epoch 7 - iter 12/41 - loss 0.56918833 - samples/sec: 9.33 - lr: 0.020000\n",
+      "2021-09-21 21:21:09,873 epoch 7 - iter 16/41 - loss 0.51833163 - samples/sec: 12.16 - lr: 0.020000\n",
+      "2021-09-21 21:21:10,214 epoch 7 - iter 20/41 - loss 0.51562555 - samples/sec: 11.75 - lr: 0.020000\n",
+      "2021-09-21 21:21:10,920 epoch 7 - iter 24/41 - loss 0.46497275 - samples/sec: 5.67 - lr: 0.020000\n",
+      "2021-09-21 21:21:11,289 epoch 7 - iter 28/41 - loss 0.46891916 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 21:21:11,632 epoch 7 - iter 32/41 - loss 0.48965555 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 21:21:12,063 epoch 7 - iter 36/41 - loss 0.51961811 - samples/sec: 9.31 - lr: 0.020000\n",
+      "2021-09-21 21:21:12,398 epoch 7 - iter 40/41 - loss 0.52793488 - samples/sec: 11.96 - lr: 0.020000\n",
+      "2021-09-21 21:21:12,480 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:12,481 EPOCH 7 done: loss 0.5222 - lr 0.0200000\n",
+      "2021-09-21 21:21:12,598 DEV : loss 0.36756256222724915 - score 0.5\n",
+      "2021-09-21 21:21:12,600 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:21:12,602 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:12,880 epoch 8 - iter 4/41 - loss 0.82132261 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:21:13,263 epoch 8 - iter 8/41 - loss 0.60768311 - samples/sec: 10.45 - lr: 0.020000\n",
+      "2021-09-21 21:21:13,522 epoch 8 - iter 12/41 - loss 0.57594238 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 21:21:13,770 epoch 8 - iter 16/41 - loss 0.51930382 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 21:21:14,029 epoch 8 - iter 20/41 - loss 0.50298405 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:21:14,354 epoch 8 - iter 24/41 - loss 0.46995896 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 21:21:14,665 epoch 8 - iter 28/41 - loss 0.49850447 - samples/sec: 12.88 - lr: 0.020000\n",
+      "2021-09-21 21:21:14,974 epoch 8 - iter 32/41 - loss 0.50733896 - samples/sec: 12.97 - lr: 0.020000\n",
+      "2021-09-21 21:21:15,480 epoch 8 - iter 36/41 - loss 0.55131430 - samples/sec: 7.92 - lr: 0.020000\n",
+      "2021-09-21 21:21:15,911 epoch 8 - iter 40/41 - loss 0.56345598 - samples/sec: 9.29 - lr: 0.020000\n",
+      "2021-09-21 21:21:15,969 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:15,969 EPOCH 8 done: loss 0.5698 - lr 0.0200000\n",
+      "2021-09-21 21:21:16,524 DEV : loss 0.3752533793449402 - score 0.5\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:21:16,526 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:21:16,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:17,053 epoch 9 - iter 4/41 - loss 0.54759721 - samples/sec: 8.06 - lr: 0.010000\n",
+      "2021-09-21 21:21:17,603 epoch 9 - iter 8/41 - loss 0.63802186 - samples/sec: 7.28 - lr: 0.010000\n",
+      "2021-09-21 21:21:17,946 epoch 9 - iter 12/41 - loss 0.55172528 - samples/sec: 11.71 - lr: 0.010000\n",
+      "2021-09-21 21:21:18,449 epoch 9 - iter 16/41 - loss 0.61791885 - samples/sec: 7.95 - lr: 0.010000\n",
+      "2021-09-21 21:21:18,811 epoch 9 - iter 20/41 - loss 0.57963810 - samples/sec: 11.07 - lr: 0.010000\n",
+      "2021-09-21 21:21:19,172 epoch 9 - iter 24/41 - loss 0.56509402 - samples/sec: 11.11 - lr: 0.010000\n",
+      "2021-09-21 21:21:19,463 epoch 9 - iter 28/41 - loss 0.50696569 - samples/sec: 13.74 - lr: 0.010000\n",
+      "2021-09-21 21:21:19,868 epoch 9 - iter 32/41 - loss 0.51624718 - samples/sec: 9.89 - lr: 0.010000\n",
+      "2021-09-21 21:21:20,119 epoch 9 - iter 36/41 - loss 0.51238095 - samples/sec: 16.01 - lr: 0.010000\n",
+      "2021-09-21 21:21:20,503 epoch 9 - iter 40/41 - loss 0.52072642 - samples/sec: 10.41 - lr: 0.010000\n",
+      "2021-09-21 21:21:20,562 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:20,563 EPOCH 9 done: loss 0.5097 - lr 0.0100000\n",
+      "2021-09-21 21:21:21,179 DEV : loss 0.39725059270858765 - score 0.75\n",
+      "2021-09-21 21:21:21,180 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:48:17,368 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:48:17,368 Testing using best model ...\n",
-      "2021-09-08 01:48:17,370 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:21:25,834 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:26,241 epoch 10 - iter 4/41 - loss 0.28095708 - samples/sec: 10.36 - lr: 0.010000\n",
+      "2021-09-21 21:21:26,603 epoch 10 - iter 8/41 - loss 0.42717353 - samples/sec: 11.07 - lr: 0.010000\n",
+      "2021-09-21 21:21:26,966 epoch 10 - iter 12/41 - loss 0.33944401 - samples/sec: 11.02 - lr: 0.010000\n",
+      "2021-09-21 21:21:27,401 epoch 10 - iter 16/41 - loss 0.34945951 - samples/sec: 9.22 - lr: 0.010000\n",
+      "2021-09-21 21:21:27,748 epoch 10 - iter 20/41 - loss 0.31821687 - samples/sec: 11.56 - lr: 0.010000\n",
+      "2021-09-21 21:21:28,143 epoch 10 - iter 24/41 - loss 0.36826760 - samples/sec: 10.14 - lr: 0.010000\n",
+      "2021-09-21 21:21:28,703 epoch 10 - iter 28/41 - loss 0.37349586 - samples/sec: 7.15 - lr: 0.010000\n",
+      "2021-09-21 21:21:29,185 epoch 10 - iter 32/41 - loss 0.36129498 - samples/sec: 8.32 - lr: 0.010000\n",
+      "2021-09-21 21:21:29,512 epoch 10 - iter 36/41 - loss 0.39380624 - samples/sec: 12.27 - lr: 0.010000\n",
+      "2021-09-21 21:21:30,050 epoch 10 - iter 40/41 - loss 0.40348847 - samples/sec: 7.44 - lr: 0.010000\n",
+      "2021-09-21 21:21:30,187 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:30,188 EPOCH 10 done: loss 0.3994 - lr 0.0100000\n",
+      "2021-09-21 21:21:30,319 DEV : loss 0.5019863843917847 - score 0.5\n",
+      "2021-09-21 21:21:30,320 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:21:36,424 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:21:36,425 Testing using best model ...\n",
+      "2021-09-21 21:21:36,427 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:48:22,926 \t0.4\n",
-      "2021-09-08 01:48:22,927 \n",
+      "2021-09-21 21:21:41,633 \t0.2\n",
+      "2021-09-21 21:21:41,633 \n",
       "Results:\n",
-      "- F-score (micro) 0.4\n",
-      "- F-score (macro) 0.3\n",
-      "- Accuracy 0.4\n",
+      "- F-score (micro) 0.2\n",
+      "- F-score (macro) 0.1333\n",
+      "- Accuracy 0.2\n",
       "\n",
       "By class:\n",
       "                                          precision    recall  f1-score   support\n",
       "\n",
-      "  The product has been reviewed as awful     1.0000    0.3333    0.5000         3\n",
-      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         0\n",
-      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         1\n",
-      "   The product has been reviewed as good     1.0000    1.0000    1.0000         1\n",
-      "  The product has been reviewed as great     0.0000    0.0000    0.0000         0\n",
+      "  The product has been reviewed as awful     0.5000    1.0000    0.6667         1\n",
+      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         1\n",
+      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         0\n",
+      "   The product has been reviewed as good     0.0000    0.0000    0.0000         2\n",
+      "  The product has been reviewed as great     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                               micro avg     0.4000    0.4000    0.4000         5\n",
-      "                               macro avg     0.4000    0.2667    0.3000         5\n",
-      "                            weighted avg     0.8000    0.4000    0.5000         5\n",
-      "                             samples avg     0.4000    0.4000    0.4000         5\n",
+      "                               micro avg     0.2000    0.2000    0.2000         5\n",
+      "                               macro avg     0.1000    0.2000    0.1333         5\n",
+      "                            weighted avg     0.1000    0.2000    0.1333         5\n",
+      "                             samples avg     0.2000    0.2000    0.2000         5\n",
       "\n",
-      "2021-09-08 01:48:22,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:10,870 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:21:41,634 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:42,416 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:49:14,906 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:22:46,382 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 17086.25it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 18591.06it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:14,911 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 01:49:14,921 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,923 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:22:46,387 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
+      "2021-09-21 21:22:46,397 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:46,399 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4901,25 +4899,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:14,923 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,924 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:49:14,924 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,924 Parameters:\n",
-      "2021-09-08 01:49:14,925  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:49:14,925  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:49:14,925  - patience: \"3\"\n",
-      "2021-09-08 01:49:14,925  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:49:14,926  - max_epochs: \"10\"\n",
-      "2021-09-08 01:49:14,926  - shuffle: \"True\"\n",
-      "2021-09-08 01:49:14,926  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:49:14,927  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:49:14,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,927 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:49:14,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,928 Device: cuda:0\n",
-      "2021-09-08 01:49:14,928 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:14,928 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:49:14,935 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:22:46,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:46,400 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:22:46,400 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:46,400 Parameters:\n",
+      "2021-09-21 21:22:46,401  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:22:46,401  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:22:46,401  - patience: \"3\"\n",
+      "2021-09-21 21:22:46,401  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:22:46,402  - max_epochs: \"10\"\n",
+      "2021-09-21 21:22:46,402  - shuffle: \"True\"\n",
+      "2021-09-21 21:22:46,402  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:22:46,403  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:22:46,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:46,403 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:22:46,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:46,404 Device: cuda:0\n",
+      "2021-09-21 21:22:46,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:46,405 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:22:46,411 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4933,210 +4931,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:15,202 epoch 1 - iter 4/41 - loss 0.60109635 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 01:49:15,432 epoch 1 - iter 8/41 - loss 0.40762097 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:49:15,715 epoch 1 - iter 12/41 - loss 0.50824635 - samples/sec: 14.14 - lr: 0.020000\n",
-      "2021-09-08 01:49:16,049 epoch 1 - iter 16/41 - loss 0.58606929 - samples/sec: 12.00 - lr: 0.020000\n",
-      "2021-09-08 01:49:16,248 epoch 1 - iter 20/41 - loss 0.61902547 - samples/sec: 20.13 - lr: 0.020000\n",
-      "2021-09-08 01:49:16,652 epoch 1 - iter 24/41 - loss 0.62095104 - samples/sec: 9.93 - lr: 0.020000\n",
-      "2021-09-08 01:49:16,919 epoch 1 - iter 28/41 - loss 0.62721464 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:49:17,197 epoch 1 - iter 32/41 - loss 0.62544555 - samples/sec: 14.40 - lr: 0.020000\n",
-      "2021-09-08 01:49:17,494 epoch 1 - iter 36/41 - loss 0.61669117 - samples/sec: 13.49 - lr: 0.020000\n",
-      "2021-09-08 01:49:17,794 epoch 1 - iter 40/41 - loss 0.62851164 - samples/sec: 13.39 - lr: 0.020000\n",
-      "2021-09-08 01:49:17,871 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:17,871 EPOCH 1 done: loss 0.6269 - lr 0.0200000\n",
-      "2021-09-08 01:49:18,096 DEV : loss 0.5795844197273254 - score 0.5\n",
-      "2021-09-08 01:49:18,097 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:22:46,650 epoch 1 - iter 4/41 - loss 0.67411423 - samples/sec: 17.96 - lr: 0.020000\n",
+      "2021-09-21 21:22:46,888 epoch 1 - iter 8/41 - loss 0.51910499 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 21:22:47,135 epoch 1 - iter 12/41 - loss 0.66822027 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:22:47,370 epoch 1 - iter 16/41 - loss 0.67713106 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 21:22:47,604 epoch 1 - iter 20/41 - loss 0.67138715 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 21:22:47,835 epoch 1 - iter 24/41 - loss 0.61880589 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 21:22:48,109 epoch 1 - iter 28/41 - loss 0.64545883 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 21:22:48,336 epoch 1 - iter 32/41 - loss 0.66666644 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:22:48,566 epoch 1 - iter 36/41 - loss 0.66815464 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 21:22:48,798 epoch 1 - iter 40/41 - loss 0.67435747 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 21:22:48,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:48,950 EPOCH 1 done: loss 0.6765 - lr 0.0200000\n",
+      "2021-09-21 21:22:49,183 DEV : loss 0.5967254042625427 - score 0.75\n",
+      "2021-09-21 21:22:49,184 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:49:23,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:23,411 epoch 2 - iter 4/41 - loss 0.71555907 - samples/sec: 13.01 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,706 epoch 2 - iter 8/41 - loss 0.62702272 - samples/sec: 13.59 - lr: 0.020000\n",
-      "2021-09-08 01:49:23,972 epoch 2 - iter 12/41 - loss 0.66779194 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,257 epoch 2 - iter 16/41 - loss 0.65669864 - samples/sec: 14.08 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,453 epoch 2 - iter 20/41 - loss 0.55269590 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,732 epoch 2 - iter 24/41 - loss 0.61164612 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 01:49:24,967 epoch 2 - iter 28/41 - loss 0.60393527 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,268 epoch 2 - iter 32/41 - loss 0.59554731 - samples/sec: 13.33 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,617 epoch 2 - iter 36/41 - loss 0.59473690 - samples/sec: 11.49 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,857 epoch 2 - iter 40/41 - loss 0.60438309 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 01:49:25,941 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:25,941 EPOCH 2 done: loss 0.6044 - lr 0.0200000\n",
-      "2021-09-08 01:49:26,084 DEV : loss 0.5906362533569336 - score 0.75\n",
-      "2021-09-08 01:49:26,085 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:49:35,242 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:35,481 epoch 3 - iter 4/41 - loss 0.48832298 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 01:49:35,805 epoch 3 - iter 8/41 - loss 0.37334646 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,022 epoch 3 - iter 12/41 - loss 0.40609340 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,334 epoch 3 - iter 16/41 - loss 0.39522838 - samples/sec: 12.84 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,579 epoch 3 - iter 20/41 - loss 0.42937896 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 01:49:36,906 epoch 3 - iter 24/41 - loss 0.42854447 - samples/sec: 12.26 - lr: 0.020000\n",
-      "2021-09-08 01:49:37,117 epoch 3 - iter 28/41 - loss 0.43083326 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 01:49:37,477 epoch 3 - iter 32/41 - loss 0.43598937 - samples/sec: 11.11 - lr: 0.020000\n",
-      "2021-09-08 01:49:37,778 epoch 3 - iter 36/41 - loss 0.47967278 - samples/sec: 13.31 - lr: 0.020000\n",
-      "2021-09-08 01:49:38,107 epoch 3 - iter 40/41 - loss 0.52438863 - samples/sec: 12.18 - lr: 0.020000\n",
-      "2021-09-08 01:49:38,187 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:38,188 EPOCH 3 done: loss 0.5230 - lr 0.0200000\n",
-      "2021-09-08 01:49:40,284 DEV : loss 0.4188448190689087 - score 0.5\n",
-      "2021-09-08 01:49:40,285 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:49:40,316 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:40,580 epoch 4 - iter 4/41 - loss 0.10354342 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 01:49:40,857 epoch 4 - iter 8/41 - loss 0.39046949 - samples/sec: 14.43 - lr: 0.020000\n",
-      "2021-09-08 01:49:41,097 epoch 4 - iter 12/41 - loss 0.41773404 - samples/sec: 16.75 - lr: 0.020000\n",
-      "2021-09-08 01:49:41,360 epoch 4 - iter 16/41 - loss 0.38570946 - samples/sec: 15.21 - lr: 0.020000\n",
-      "2021-09-08 01:49:41,651 epoch 4 - iter 20/41 - loss 0.42708604 - samples/sec: 13.81 - lr: 0.020000\n",
-      "2021-09-08 01:49:41,858 epoch 4 - iter 24/41 - loss 0.41052883 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 01:49:42,231 epoch 4 - iter 28/41 - loss 0.40563926 - samples/sec: 10.74 - lr: 0.020000\n",
-      "2021-09-08 01:49:42,664 epoch 4 - iter 32/41 - loss 0.44865187 - samples/sec: 9.24 - lr: 0.020000\n",
-      "2021-09-08 01:49:42,882 epoch 4 - iter 36/41 - loss 0.49549361 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 01:49:43,145 epoch 4 - iter 40/41 - loss 0.47827022 - samples/sec: 15.21 - lr: 0.020000\n",
-      "2021-09-08 01:49:43,208 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:43,209 EPOCH 4 done: loss 0.4722 - lr 0.0200000\n",
-      "2021-09-08 01:49:43,420 DEV : loss 0.3162915110588074 - score 0.75\n",
-      "2021-09-08 01:49:43,421 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:49:51,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:51,802 epoch 5 - iter 4/41 - loss 0.34074013 - samples/sec: 13.20 - lr: 0.020000\n",
-      "2021-09-08 01:49:52,025 epoch 5 - iter 8/41 - loss 0.54030401 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:49:52,335 epoch 5 - iter 12/41 - loss 0.54815188 - samples/sec: 12.91 - lr: 0.020000\n",
-      "2021-09-08 01:49:52,546 epoch 5 - iter 16/41 - loss 0.45722222 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:49:52,892 epoch 5 - iter 20/41 - loss 0.39795399 - samples/sec: 11.57 - lr: 0.020000\n",
-      "2021-09-08 01:49:53,132 epoch 5 - iter 24/41 - loss 0.37758506 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 01:49:53,400 epoch 5 - iter 28/41 - loss 0.40629546 - samples/sec: 14.92 - lr: 0.020000\n",
-      "2021-09-08 01:49:53,795 epoch 5 - iter 32/41 - loss 0.40033929 - samples/sec: 10.15 - lr: 0.020000\n",
-      "2021-09-08 01:49:54,206 epoch 5 - iter 36/41 - loss 0.40562362 - samples/sec: 9.76 - lr: 0.020000\n",
-      "2021-09-08 01:49:54,436 epoch 5 - iter 40/41 - loss 0.45597902 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:49:54,541 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:54,542 EPOCH 5 done: loss 0.4460 - lr 0.0200000\n",
-      "2021-09-08 01:49:55,523 DEV : loss 0.5340270400047302 - score 0.5\n",
-      "2021-09-08 01:49:55,524 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:49:55,618 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:55,870 epoch 6 - iter 4/41 - loss 0.02055355 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:49:56,134 epoch 6 - iter 8/41 - loss 0.23024094 - samples/sec: 15.19 - lr: 0.020000\n",
-      "2021-09-08 01:49:56,474 epoch 6 - iter 12/41 - loss 0.30312091 - samples/sec: 11.80 - lr: 0.020000\n",
-      "2021-09-08 01:49:56,842 epoch 6 - iter 16/41 - loss 0.27473995 - samples/sec: 10.89 - lr: 0.020000\n",
-      "2021-09-08 01:49:57,105 epoch 6 - iter 20/41 - loss 0.34747771 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 01:49:57,359 epoch 6 - iter 24/41 - loss 0.41668440 - samples/sec: 15.83 - lr: 0.020000\n",
-      "2021-09-08 01:49:57,651 epoch 6 - iter 28/41 - loss 0.43992331 - samples/sec: 13.71 - lr: 0.020000\n",
-      "2021-09-08 01:49:57,901 epoch 6 - iter 32/41 - loss 0.41808657 - samples/sec: 16.08 - lr: 0.020000\n",
-      "2021-09-08 01:49:58,136 epoch 6 - iter 36/41 - loss 0.39522673 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 01:49:58,382 epoch 6 - iter 40/41 - loss 0.40308197 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:49:58,461 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:22:56,735 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:56,998 epoch 2 - iter 4/41 - loss 0.62939648 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 21:22:57,231 epoch 2 - iter 8/41 - loss 0.61885397 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 21:22:57,464 epoch 2 - iter 12/41 - loss 0.67736650 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:22:57,671 epoch 2 - iter 16/41 - loss 0.66195954 - samples/sec: 19.43 - lr: 0.020000\n",
+      "2021-09-21 21:22:57,890 epoch 2 - iter 20/41 - loss 0.64770020 - samples/sec: 18.28 - lr: 0.020000\n",
+      "2021-09-21 21:22:58,106 epoch 2 - iter 24/41 - loss 0.64982158 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 21:22:58,315 epoch 2 - iter 28/41 - loss 0.60429980 - samples/sec: 19.25 - lr: 0.020000\n",
+      "2021-09-21 21:22:58,638 epoch 2 - iter 32/41 - loss 0.59766545 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:22:58,976 epoch 2 - iter 36/41 - loss 0.64509470 - samples/sec: 11.85 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,256 epoch 2 - iter 40/41 - loss 0.66216891 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 21:22:59,325 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:59,326 EPOCH 2 done: loss 0.6649 - lr 0.0200000\n",
+      "2021-09-21 21:22:59,477 DEV : loss 0.6448163390159607 - score 0.25\n",
+      "2021-09-21 21:22:59,477 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:22:59,480 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:22:59,774 epoch 3 - iter 4/41 - loss 0.65181595 - samples/sec: 14.93 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,113 epoch 3 - iter 8/41 - loss 0.64762629 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,368 epoch 3 - iter 12/41 - loss 0.61271206 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,610 epoch 3 - iter 16/41 - loss 0.60009483 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 21:23:00,943 epoch 3 - iter 20/41 - loss 0.62915702 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,194 epoch 3 - iter 24/41 - loss 0.63026514 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,462 epoch 3 - iter 28/41 - loss 0.63478884 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 21:23:01,850 epoch 3 - iter 32/41 - loss 0.61421461 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,175 epoch 3 - iter 36/41 - loss 0.63155196 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,474 epoch 3 - iter 40/41 - loss 0.62601003 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:23:02,545 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:02,546 EPOCH 3 done: loss 0.6243 - lr 0.0200000\n",
+      "2021-09-21 21:23:02,737 DEV : loss 0.6531649231910706 - score 0.5\n",
+      "2021-09-21 21:23:02,738 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:23:02,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:03,088 epoch 4 - iter 4/41 - loss 0.53232484 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 21:23:03,348 epoch 4 - iter 8/41 - loss 0.54853030 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 21:23:03,590 epoch 4 - iter 12/41 - loss 0.57381760 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 21:23:03,861 epoch 4 - iter 16/41 - loss 0.57224625 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 21:23:04,103 epoch 4 - iter 20/41 - loss 0.58636870 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 21:23:04,342 epoch 4 - iter 24/41 - loss 0.56634369 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 21:23:04,616 epoch 4 - iter 28/41 - loss 0.57795918 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 21:23:05,007 epoch 4 - iter 32/41 - loss 0.57505608 - samples/sec: 10.24 - lr: 0.020000\n",
+      "2021-09-21 21:23:05,235 epoch 4 - iter 36/41 - loss 0.55620442 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:23:05,502 epoch 4 - iter 40/41 - loss 0.56553920 - samples/sec: 15.00 - lr: 0.020000\n",
+      "2021-09-21 21:23:05,564 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:05,565 EPOCH 4 done: loss 0.5587 - lr 0.0200000\n",
+      "2021-09-21 21:23:05,797 DEV : loss 0.5034574270248413 - score 0.5\n",
+      "2021-09-21 21:23:05,798 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:23:05,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:06,142 epoch 5 - iter 4/41 - loss 0.31969022 - samples/sec: 16.17 - lr: 0.020000\n",
+      "2021-09-21 21:23:06,435 epoch 5 - iter 8/41 - loss 0.42017671 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 21:23:06,698 epoch 5 - iter 12/41 - loss 0.44477453 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 21:23:06,930 epoch 5 - iter 16/41 - loss 0.37410542 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 21:23:07,152 epoch 5 - iter 20/41 - loss 0.48849143 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 21:23:07,395 epoch 5 - iter 24/41 - loss 0.45579376 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 21:23:07,888 epoch 5 - iter 28/41 - loss 0.46399659 - samples/sec: 8.12 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,190 epoch 5 - iter 32/41 - loss 0.44366770 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,472 epoch 5 - iter 36/41 - loss 0.43622451 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,859 epoch 5 - iter 40/41 - loss 0.44276778 - samples/sec: 10.36 - lr: 0.020000\n",
+      "2021-09-21 21:23:08,945 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:08,945 EPOCH 5 done: loss 0.4401 - lr 0.0200000\n",
+      "2021-09-21 21:23:09,114 DEV : loss 0.8154255151748657 - score 0.25\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:23:09,115 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:23:09,117 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:09,367 epoch 6 - iter 4/41 - loss 0.32312253 - samples/sec: 17.45 - lr: 0.010000\n",
+      "2021-09-21 21:23:09,646 epoch 6 - iter 8/41 - loss 0.39987322 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 21:23:09,925 epoch 6 - iter 12/41 - loss 0.35286797 - samples/sec: 14.41 - lr: 0.010000\n",
+      "2021-09-21 21:23:10,191 epoch 6 - iter 16/41 - loss 0.38352633 - samples/sec: 15.05 - lr: 0.010000\n",
+      "2021-09-21 21:23:10,431 epoch 6 - iter 20/41 - loss 0.38702242 - samples/sec: 16.69 - lr: 0.010000\n",
+      "2021-09-21 21:23:10,785 epoch 6 - iter 24/41 - loss 0.41374033 - samples/sec: 11.32 - lr: 0.010000\n",
+      "2021-09-21 21:23:11,018 epoch 6 - iter 28/41 - loss 0.41751212 - samples/sec: 17.21 - lr: 0.010000\n",
+      "2021-09-21 21:23:11,248 epoch 6 - iter 32/41 - loss 0.41517984 - samples/sec: 17.41 - lr: 0.010000\n",
+      "2021-09-21 21:23:11,483 epoch 6 - iter 36/41 - loss 0.43378873 - samples/sec: 17.10 - lr: 0.010000\n",
+      "2021-09-21 21:23:11,696 epoch 6 - iter 40/41 - loss 0.39235719 - samples/sec: 18.81 - lr: 0.010000\n",
+      "2021-09-21 21:23:11,752 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:49:58,462 EPOCH 6 done: loss 0.3962 - lr 0.0200000\n",
-      "2021-09-08 01:49:58,661 DEV : loss 0.53187096118927 - score 0.25\n",
-      "2021-09-08 01:49:58,662 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:49:58,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:49:58,983 epoch 7 - iter 4/41 - loss 0.16248684 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 01:49:59,387 epoch 7 - iter 8/41 - loss 0.49546217 - samples/sec: 9.91 - lr: 0.020000\n",
-      "2021-09-08 01:49:59,647 epoch 7 - iter 12/41 - loss 0.36863392 - samples/sec: 15.45 - lr: 0.020000\n",
-      "2021-09-08 01:49:59,879 epoch 7 - iter 16/41 - loss 0.35236721 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 01:50:00,242 epoch 7 - iter 20/41 - loss 0.40039497 - samples/sec: 11.05 - lr: 0.020000\n",
-      "2021-09-08 01:50:00,489 epoch 7 - iter 24/41 - loss 0.37026484 - samples/sec: 16.26 - lr: 0.020000\n",
-      "2021-09-08 01:50:00,779 epoch 7 - iter 28/41 - loss 0.34550282 - samples/sec: 13.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:01,073 epoch 7 - iter 32/41 - loss 0.31736243 - samples/sec: 13.62 - lr: 0.020000\n",
-      "2021-09-08 01:50:01,310 epoch 7 - iter 36/41 - loss 0.35993187 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 01:50:01,554 epoch 7 - iter 40/41 - loss 0.34342413 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:50:01,616 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:01,616 EPOCH 7 done: loss 0.3366 - lr 0.0200000\n",
-      "2021-09-08 01:50:01,718 DEV : loss 0.9159401059150696 - score 0.25\n",
-      "2021-09-08 01:50:01,718 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:50:01,721 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:02,002 epoch 8 - iter 4/41 - loss 0.05134014 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 01:50:02,341 epoch 8 - iter 8/41 - loss 0.18205813 - samples/sec: 11.83 - lr: 0.020000\n",
-      "2021-09-08 01:50:02,705 epoch 8 - iter 12/41 - loss 0.28432027 - samples/sec: 11.00 - lr: 0.020000\n",
-      "2021-09-08 01:50:03,015 epoch 8 - iter 16/41 - loss 0.24584204 - samples/sec: 12.97 - lr: 0.020000\n",
-      "2021-09-08 01:50:03,269 epoch 8 - iter 20/41 - loss 0.28871426 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:50:03,493 epoch 8 - iter 24/41 - loss 0.26254591 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 01:50:03,764 epoch 8 - iter 28/41 - loss 0.28489449 - samples/sec: 14.79 - lr: 0.020000\n",
-      "2021-09-08 01:50:04,030 epoch 8 - iter 32/41 - loss 0.25938389 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:50:04,310 epoch 8 - iter 36/41 - loss 0.25650289 - samples/sec: 14.32 - lr: 0.020000\n",
-      "2021-09-08 01:50:04,500 epoch 8 - iter 40/41 - loss 0.25095871 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 01:50:04,578 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:04,578 EPOCH 8 done: loss 0.2695 - lr 0.0200000\n",
-      "2021-09-08 01:50:04,728 DEV : loss 0.5332455635070801 - score 0.25\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:50:04,729 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:50:04,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:05,015 epoch 9 - iter 4/41 - loss 0.12614099 - samples/sec: 14.80 - lr: 0.010000\n",
-      "2021-09-08 01:50:05,262 epoch 9 - iter 8/41 - loss 0.27343375 - samples/sec: 16.25 - lr: 0.010000\n",
-      "2021-09-08 01:50:05,617 epoch 9 - iter 12/41 - loss 0.19270955 - samples/sec: 11.29 - lr: 0.010000\n",
-      "2021-09-08 01:50:05,870 epoch 9 - iter 16/41 - loss 0.16134643 - samples/sec: 15.82 - lr: 0.010000\n",
-      "2021-09-08 01:50:06,095 epoch 9 - iter 20/41 - loss 0.13873170 - samples/sec: 17.86 - lr: 0.010000\n",
-      "2021-09-08 01:50:06,376 epoch 9 - iter 24/41 - loss 0.11614385 - samples/sec: 14.29 - lr: 0.010000\n",
-      "2021-09-08 01:50:06,587 epoch 9 - iter 28/41 - loss 0.16343489 - samples/sec: 18.98 - lr: 0.010000\n",
-      "2021-09-08 01:50:06,914 epoch 9 - iter 32/41 - loss 0.16988127 - samples/sec: 12.27 - lr: 0.010000\n",
-      "2021-09-08 01:50:07,131 epoch 9 - iter 36/41 - loss 0.16104023 - samples/sec: 18.45 - lr: 0.010000\n",
-      "2021-09-08 01:50:07,472 epoch 9 - iter 40/41 - loss 0.15473341 - samples/sec: 11.75 - lr: 0.010000\n",
-      "2021-09-08 01:50:07,519 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:07,519 EPOCH 9 done: loss 0.1510 - lr 0.0100000\n",
-      "2021-09-08 01:50:07,664 DEV : loss 0.6315481066703796 - score 0.25\n",
-      "2021-09-08 01:50:07,665 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:50:07,667 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:07,919 epoch 10 - iter 4/41 - loss 0.01313579 - samples/sec: 17.49 - lr: 0.010000\n",
-      "2021-09-08 01:50:08,204 epoch 10 - iter 8/41 - loss 0.04019142 - samples/sec: 14.09 - lr: 0.010000\n",
-      "2021-09-08 01:50:08,387 epoch 10 - iter 12/41 - loss 0.05887603 - samples/sec: 21.93 - lr: 0.010000\n",
-      "2021-09-08 01:50:08,655 epoch 10 - iter 16/41 - loss 0.05312133 - samples/sec: 14.98 - lr: 0.010000\n",
-      "2021-09-08 01:50:08,901 epoch 10 - iter 20/41 - loss 0.04697141 - samples/sec: 16.25 - lr: 0.010000\n",
-      "2021-09-08 01:50:09,231 epoch 10 - iter 24/41 - loss 0.08908124 - samples/sec: 12.15 - lr: 0.010000\n",
-      "2021-09-08 01:50:09,507 epoch 10 - iter 28/41 - loss 0.07800191 - samples/sec: 14.55 - lr: 0.010000\n",
-      "2021-09-08 01:50:09,872 epoch 10 - iter 32/41 - loss 0.11609757 - samples/sec: 10.96 - lr: 0.010000\n",
-      "2021-09-08 01:50:10,119 epoch 10 - iter 36/41 - loss 0.10342961 - samples/sec: 16.26 - lr: 0.010000\n",
-      "2021-09-08 01:50:10,441 epoch 10 - iter 40/41 - loss 0.11268409 - samples/sec: 12.42 - lr: 0.010000\n",
-      "2021-09-08 01:50:10,490 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:10,491 EPOCH 10 done: loss 0.1403 - lr 0.0100000\n",
-      "2021-09-08 01:50:10,598 DEV : loss 0.8806406855583191 - score 0.25\n",
-      "2021-09-08 01:50:10,598 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:50:16,499 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:50:16,500 Testing using best model ...\n",
-      "2021-09-08 01:50:16,550 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:23:11,753 EPOCH 6 done: loss 0.4124 - lr 0.0100000\n",
+      "2021-09-21 21:23:11,986 DEV : loss 0.6755971908569336 - score 0.0\n",
+      "2021-09-21 21:23:11,987 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:23:12,064 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:12,322 epoch 7 - iter 4/41 - loss 0.23565417 - samples/sec: 16.56 - lr: 0.010000\n",
+      "2021-09-21 21:23:12,630 epoch 7 - iter 8/41 - loss 0.18753720 - samples/sec: 13.03 - lr: 0.010000\n",
+      "2021-09-21 21:23:12,877 epoch 7 - iter 12/41 - loss 0.20820097 - samples/sec: 16.19 - lr: 0.010000\n",
+      "2021-09-21 21:23:13,113 epoch 7 - iter 16/41 - loss 0.16354818 - samples/sec: 17.00 - lr: 0.010000\n",
+      "2021-09-21 21:23:13,409 epoch 7 - iter 20/41 - loss 0.19442129 - samples/sec: 13.53 - lr: 0.010000\n",
+      "2021-09-21 21:23:13,683 epoch 7 - iter 24/41 - loss 0.24344584 - samples/sec: 14.64 - lr: 0.010000\n",
+      "2021-09-21 21:23:14,174 epoch 7 - iter 28/41 - loss 0.23027357 - samples/sec: 8.15 - lr: 0.010000\n",
+      "2021-09-21 21:23:14,472 epoch 7 - iter 32/41 - loss 0.24711633 - samples/sec: 13.44 - lr: 0.010000\n",
+      "2021-09-21 21:23:14,780 epoch 7 - iter 36/41 - loss 0.27503909 - samples/sec: 13.01 - lr: 0.010000\n",
+      "2021-09-21 21:23:15,114 epoch 7 - iter 40/41 - loss 0.27213126 - samples/sec: 12.01 - lr: 0.010000\n",
+      "2021-09-21 21:23:15,186 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:15,186 EPOCH 7 done: loss 0.2656 - lr 0.0100000\n",
+      "2021-09-21 21:23:15,372 DEV : loss 0.6877073049545288 - score 0.0\n",
+      "2021-09-21 21:23:15,373 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:23:15,375 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:15,705 epoch 8 - iter 4/41 - loss 0.27871747 - samples/sec: 14.14 - lr: 0.010000\n",
+      "2021-09-21 21:23:16,006 epoch 8 - iter 8/41 - loss 0.22481660 - samples/sec: 13.31 - lr: 0.010000\n",
+      "2021-09-21 21:23:16,476 epoch 8 - iter 12/41 - loss 0.23519913 - samples/sec: 8.53 - lr: 0.010000\n",
+      "2021-09-21 21:23:16,729 epoch 8 - iter 16/41 - loss 0.29581018 - samples/sec: 15.85 - lr: 0.010000\n",
+      "2021-09-21 21:23:16,997 epoch 8 - iter 20/41 - loss 0.33815153 - samples/sec: 14.97 - lr: 0.010000\n",
+      "2021-09-21 21:23:17,226 epoch 8 - iter 24/41 - loss 0.31384078 - samples/sec: 17.46 - lr: 0.010000\n",
+      "2021-09-21 21:23:17,447 epoch 8 - iter 28/41 - loss 0.28801016 - samples/sec: 18.18 - lr: 0.010000\n",
+      "2021-09-21 21:23:17,695 epoch 8 - iter 32/41 - loss 0.26899400 - samples/sec: 16.16 - lr: 0.010000\n",
+      "2021-09-21 21:23:17,971 epoch 8 - iter 36/41 - loss 0.26136859 - samples/sec: 14.53 - lr: 0.010000\n",
+      "2021-09-21 21:23:18,216 epoch 8 - iter 40/41 - loss 0.25615332 - samples/sec: 16.32 - lr: 0.010000\n",
+      "2021-09-21 21:23:18,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:18,271 EPOCH 8 done: loss 0.2500 - lr 0.0100000\n",
+      "2021-09-21 21:23:18,484 DEV : loss 0.8928017616271973 - score 0.0\n",
+      "2021-09-21 21:23:18,484 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:23:18,556 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:18,808 epoch 9 - iter 4/41 - loss 0.27774160 - samples/sec: 17.57 - lr: 0.010000\n",
+      "2021-09-21 21:23:19,035 epoch 9 - iter 8/41 - loss 0.44567127 - samples/sec: 17.64 - lr: 0.010000\n",
+      "2021-09-21 21:23:19,277 epoch 9 - iter 12/41 - loss 0.40554664 - samples/sec: 16.62 - lr: 0.010000\n",
+      "2021-09-21 21:23:19,532 epoch 9 - iter 16/41 - loss 0.30978895 - samples/sec: 15.72 - lr: 0.010000\n",
+      "2021-09-21 21:23:19,770 epoch 9 - iter 20/41 - loss 0.29548357 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 21:23:20,123 epoch 9 - iter 24/41 - loss 0.31510856 - samples/sec: 11.32 - lr: 0.010000\n",
+      "2021-09-21 21:23:20,473 epoch 9 - iter 28/41 - loss 0.30981745 - samples/sec: 11.45 - lr: 0.010000\n",
+      "2021-09-21 21:23:20,753 epoch 9 - iter 32/41 - loss 0.30281974 - samples/sec: 14.34 - lr: 0.010000\n",
+      "2021-09-21 21:23:21,112 epoch 9 - iter 36/41 - loss 0.28788868 - samples/sec: 11.15 - lr: 0.010000\n",
+      "2021-09-21 21:23:21,422 epoch 9 - iter 40/41 - loss 0.26875780 - samples/sec: 12.94 - lr: 0.010000\n",
+      "2021-09-21 21:23:21,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:21,497 EPOCH 9 done: loss 0.2624 - lr 0.0100000\n",
+      "2021-09-21 21:23:21,676 DEV : loss 0.7183616161346436 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:23:21,677 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:23:21,679 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:22,094 epoch 10 - iter 4/41 - loss 0.28691437 - samples/sec: 10.07 - lr: 0.005000\n",
+      "2021-09-21 21:23:22,374 epoch 10 - iter 8/41 - loss 0.18834156 - samples/sec: 14.33 - lr: 0.005000\n",
+      "2021-09-21 21:23:22,675 epoch 10 - iter 12/41 - loss 0.18222905 - samples/sec: 13.30 - lr: 0.005000\n",
+      "2021-09-21 21:23:22,939 epoch 10 - iter 16/41 - loss 0.19755265 - samples/sec: 15.17 - lr: 0.005000\n",
+      "2021-09-21 21:23:23,245 epoch 10 - iter 20/41 - loss 0.19899546 - samples/sec: 13.13 - lr: 0.005000\n",
+      "2021-09-21 21:23:23,574 epoch 10 - iter 24/41 - loss 0.16789579 - samples/sec: 12.17 - lr: 0.005000\n",
+      "2021-09-21 21:23:23,850 epoch 10 - iter 28/41 - loss 0.19538686 - samples/sec: 14.54 - lr: 0.005000\n",
+      "2021-09-21 21:23:24,367 epoch 10 - iter 32/41 - loss 0.22303871 - samples/sec: 7.73 - lr: 0.005000\n",
+      "2021-09-21 21:23:24,692 epoch 10 - iter 36/41 - loss 0.23936691 - samples/sec: 12.34 - lr: 0.005000\n",
+      "2021-09-21 21:23:25,036 epoch 10 - iter 40/41 - loss 0.22876097 - samples/sec: 11.63 - lr: 0.005000\n",
+      "2021-09-21 21:23:25,101 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:25,102 EPOCH 10 done: loss 0.2342 - lr 0.0050000\n",
+      "2021-09-21 21:23:25,299 DEV : loss 0.9133368134498596 - score 0.0\n",
+      "2021-09-21 21:23:25,300 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:23:32,195 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:23:32,196 Testing using best model ...\n",
+      "2021-09-21 21:23:32,198 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:50:21,911 \t0.0\n",
-      "2021-09-08 01:50:21,911 \n",
+      "2021-09-21 21:23:39,380 \t0.4\n",
+      "2021-09-21 21:23:39,381 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.4\n",
+      "- F-score (macro) 0.2333\n",
+      "- Accuracy 0.4\n",
       "\n",
       "By class:\n",
       "                                          precision    recall  f1-score   support\n",
       "\n",
-      "  The product has been reviewed as awful     0.0000    0.0000    0.0000         1\n",
-      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         0\n",
-      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         2\n",
-      "   The product has been reviewed as good     0.0000    0.0000    0.0000         0\n",
-      "  The product has been reviewed as great     0.0000    0.0000    0.0000         2\n",
+      "  The product has been reviewed as awful     0.0000    0.0000    0.0000         0\n",
+      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         1\n",
+      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         1\n",
+      "   The product has been reviewed as good     0.5000    0.5000    0.5000         2\n",
+      "  The product has been reviewed as great     0.5000    1.0000    0.6667         1\n",
       "\n",
-      "                               micro avg     0.0000    0.0000    0.0000         5\n",
-      "                               macro avg     0.0000    0.0000    0.0000         5\n",
-      "                            weighted avg     0.0000    0.0000    0.0000         5\n",
-      "                             samples avg     0.0000    0.0000    0.0000         5\n",
+      "                               micro avg     0.4000    0.4000    0.4000         5\n",
+      "                               macro avg     0.2000    0.3000    0.2333         5\n",
+      "                            weighted avg     0.3000    0.4000    0.3333         5\n",
+      "                             samples avg     0.4000    0.4000    0.4000         5\n",
       "\n",
-      "2021-09-08 01:50:21,912 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:13,006 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:23:39,381 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:30,774 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:51:17,046 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:24:34,717 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 17700.73it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 13964.82it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:17,050 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 01:51:17,060 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:17,062 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:24:34,723 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
+      "2021-09-21 21:24:34,736 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:34,737 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5449,25 +5446,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:17,063 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:17,063 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:51:17,064 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:17,064 Parameters:\n",
-      "2021-09-08 01:51:17,064  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:51:17,065  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:51:17,065  - patience: \"3\"\n",
-      "2021-09-08 01:51:17,065  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:51:17,066  - max_epochs: \"10\"\n",
-      "2021-09-08 01:51:17,066  - shuffle: \"True\"\n",
-      "2021-09-08 01:51:17,066  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:51:17,067  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:51:17,067 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:17,067 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:51:17,067 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:17,068 Device: cuda:0\n",
-      "2021-09-08 01:51:17,068 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:17,068 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:51:17,075 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:24:34,738 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:34,738 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:24:34,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:34,739 Parameters:\n",
+      "2021-09-21 21:24:34,740  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:24:34,740  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:24:34,741  - patience: \"3\"\n",
+      "2021-09-21 21:24:34,741  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:24:34,742  - max_epochs: \"10\"\n",
+      "2021-09-21 21:24:34,742  - shuffle: \"True\"\n",
+      "2021-09-21 21:24:34,743  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:24:34,743  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:24:34,743 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:34,744 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:24:34,744 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:34,745 Device: cuda:0\n",
+      "2021-09-21 21:24:34,745 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:34,746 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:24:34,754 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -5481,192 +5478,193 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:17,322 epoch 1 - iter 4/41 - loss 0.83297583 - samples/sec: 18.82 - lr: 0.020000\n",
-      "2021-09-08 01:51:17,563 epoch 1 - iter 8/41 - loss 0.66434785 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 01:51:17,791 epoch 1 - iter 12/41 - loss 0.69264606 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:51:18,126 epoch 1 - iter 16/41 - loss 0.66661448 - samples/sec: 11.94 - lr: 0.020000\n",
-      "2021-09-08 01:51:18,407 epoch 1 - iter 20/41 - loss 0.71331919 - samples/sec: 14.26 - lr: 0.020000\n",
-      "2021-09-08 01:51:18,716 epoch 1 - iter 24/41 - loss 0.67348116 - samples/sec: 12.97 - lr: 0.020000\n",
-      "2021-09-08 01:51:18,970 epoch 1 - iter 28/41 - loss 0.62872697 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 01:51:19,252 epoch 1 - iter 32/41 - loss 0.59091131 - samples/sec: 14.23 - lr: 0.020000\n",
-      "2021-09-08 01:51:19,510 epoch 1 - iter 36/41 - loss 0.67676088 - samples/sec: 15.53 - lr: 0.020000\n",
-      "2021-09-08 01:51:19,741 epoch 1 - iter 40/41 - loss 0.70118552 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 01:51:19,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:19,831 EPOCH 1 done: loss 0.6995 - lr 0.0200000\n",
-      "2021-09-08 01:51:19,903 DEV : loss 0.4985821843147278 - score 0.5\n",
-      "2021-09-08 01:51:19,904 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:51:24,020 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:24,236 epoch 2 - iter 4/41 - loss 0.59968811 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 01:51:24,503 epoch 2 - iter 8/41 - loss 0.64920529 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:51:24,795 epoch 2 - iter 12/41 - loss 0.66864457 - samples/sec: 13.74 - lr: 0.020000\n",
-      "2021-09-08 01:51:24,999 epoch 2 - iter 16/41 - loss 0.67086069 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 01:51:25,207 epoch 2 - iter 20/41 - loss 0.65980190 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:51:25,441 epoch 2 - iter 24/41 - loss 0.66136925 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 01:51:25,673 epoch 2 - iter 28/41 - loss 0.66990582 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 01:51:25,860 epoch 2 - iter 32/41 - loss 0.67316386 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 01:51:26,098 epoch 2 - iter 36/41 - loss 0.67221642 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 01:51:26,299 epoch 2 - iter 40/41 - loss 0.67585860 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 01:51:26,347 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:26,348 EPOCH 2 done: loss 0.6692 - lr 0.0200000\n",
-      "2021-09-08 01:51:29,223 DEV : loss 0.3914779722690582 - score 0.5\n",
-      "2021-09-08 01:51:29,224 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:24:35,037 epoch 1 - iter 4/41 - loss 0.57097955 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:24:35,322 epoch 1 - iter 8/41 - loss 0.58722109 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 21:24:35,744 epoch 1 - iter 12/41 - loss 0.69466234 - samples/sec: 9.49 - lr: 0.020000\n",
+      "2021-09-21 21:24:36,029 epoch 1 - iter 16/41 - loss 0.70198173 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:24:36,336 epoch 1 - iter 20/41 - loss 0.72177566 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 21:24:36,583 epoch 1 - iter 24/41 - loss 0.66960240 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:24:36,821 epoch 1 - iter 28/41 - loss 0.67212579 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:24:37,050 epoch 1 - iter 32/41 - loss 0.64738836 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 21:24:37,291 epoch 1 - iter 36/41 - loss 0.68019778 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 21:24:37,544 epoch 1 - iter 40/41 - loss 0.65189003 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 21:24:37,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:37,603 EPOCH 1 done: loss 0.6732 - lr 0.0200000\n",
+      "2021-09-21 21:24:37,802 DEV : loss 0.3840422034263611 - score 0.5\n",
+      "2021-09-21 21:24:37,803 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:51:35,012 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:35,255 epoch 3 - iter 4/41 - loss 0.41775389 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:51:35,623 epoch 3 - iter 8/41 - loss 0.49847423 - samples/sec: 10.89 - lr: 0.020000\n",
-      "2021-09-08 01:51:35,936 epoch 3 - iter 12/41 - loss 0.49170547 - samples/sec: 12.80 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,227 epoch 3 - iter 16/41 - loss 0.55793212 - samples/sec: 13.78 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,477 epoch 3 - iter 20/41 - loss 0.57349531 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 01:51:36,869 epoch 3 - iter 24/41 - loss 0.60208785 - samples/sec: 10.20 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,093 epoch 3 - iter 28/41 - loss 0.59253926 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,395 epoch 3 - iter 32/41 - loss 0.59125599 - samples/sec: 13.26 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,606 epoch 3 - iter 36/41 - loss 0.61737591 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,851 epoch 3 - iter 40/41 - loss 0.61269609 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:51:37,899 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:37,899 EPOCH 3 done: loss 0.6102 - lr 0.0200000\n",
-      "2021-09-08 01:51:38,002 DEV : loss 0.5423006415367126 - score 0.75\n",
-      "2021-09-08 01:51:38,003 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:24:42,464 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:42,822 epoch 2 - iter 4/41 - loss 0.59566063 - samples/sec: 11.84 - lr: 0.020000\n",
+      "2021-09-21 21:24:43,271 epoch 2 - iter 8/41 - loss 0.67853050 - samples/sec: 8.92 - lr: 0.020000\n",
+      "2021-09-21 21:24:43,557 epoch 2 - iter 12/41 - loss 0.65714320 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 21:24:43,921 epoch 2 - iter 16/41 - loss 0.66871777 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 21:24:44,163 epoch 2 - iter 20/41 - loss 0.65602546 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:24:44,402 epoch 2 - iter 24/41 - loss 0.63547172 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 21:24:44,641 epoch 2 - iter 28/41 - loss 0.62742126 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 21:24:44,873 epoch 2 - iter 32/41 - loss 0.62958403 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 21:24:45,100 epoch 2 - iter 36/41 - loss 0.61898129 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 21:24:45,368 epoch 2 - iter 40/41 - loss 0.62241108 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 21:24:45,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:45,448 EPOCH 2 done: loss 0.6295 - lr 0.0200000\n",
+      "2021-09-21 21:24:45,645 DEV : loss 0.34156447649002075 - score 0.75\n",
+      "2021-09-21 21:24:45,646 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:51:41,622 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:42,033 epoch 4 - iter 4/41 - loss 0.63595560 - samples/sec: 10.79 - lr: 0.020000\n",
-      "2021-09-08 01:51:42,365 epoch 4 - iter 8/41 - loss 0.60185189 - samples/sec: 12.05 - lr: 0.020000\n",
-      "2021-09-08 01:51:42,614 epoch 4 - iter 12/41 - loss 0.59550399 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 01:51:42,959 epoch 4 - iter 16/41 - loss 0.56104119 - samples/sec: 11.63 - lr: 0.020000\n",
-      "2021-09-08 01:51:43,329 epoch 4 - iter 20/41 - loss 0.57126465 - samples/sec: 10.81 - lr: 0.020000\n",
-      "2021-09-08 01:51:43,570 epoch 4 - iter 24/41 - loss 0.58889552 - samples/sec: 16.70 - lr: 0.020000\n",
-      "2021-09-08 01:51:43,860 epoch 4 - iter 28/41 - loss 0.56720755 - samples/sec: 13.82 - lr: 0.020000\n",
-      "2021-09-08 01:51:44,068 epoch 4 - iter 32/41 - loss 0.56588611 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 01:51:44,332 epoch 4 - iter 36/41 - loss 0.54494550 - samples/sec: 15.20 - lr: 0.020000\n",
-      "2021-09-08 01:51:44,558 epoch 4 - iter 40/41 - loss 0.58063877 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 01:51:44,621 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:44,622 EPOCH 4 done: loss 0.5725 - lr 0.0200000\n",
-      "2021-09-08 01:51:44,759 DEV : loss 0.4496671259403229 - score 0.5\n",
-      "2021-09-08 01:51:44,759 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:51:44,761 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:45,091 epoch 5 - iter 4/41 - loss 0.37923784 - samples/sec: 12.68 - lr: 0.020000\n",
-      "2021-09-08 01:51:45,331 epoch 5 - iter 8/41 - loss 0.29612373 - samples/sec: 16.69 - lr: 0.020000\n",
-      "2021-09-08 01:51:45,621 epoch 5 - iter 12/41 - loss 0.36056155 - samples/sec: 13.81 - lr: 0.020000\n",
-      "2021-09-08 01:51:45,920 epoch 5 - iter 16/41 - loss 0.40641175 - samples/sec: 13.42 - lr: 0.020000\n",
-      "2021-09-08 01:51:46,115 epoch 5 - iter 20/41 - loss 0.41377551 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 01:51:46,387 epoch 5 - iter 24/41 - loss 0.49782318 - samples/sec: 14.71 - lr: 0.020000\n",
-      "2021-09-08 01:51:46,620 epoch 5 - iter 28/41 - loss 0.44600117 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 01:51:46,918 epoch 5 - iter 32/41 - loss 0.46480111 - samples/sec: 13.46 - lr: 0.020000\n",
-      "2021-09-08 01:51:47,175 epoch 5 - iter 36/41 - loss 0.49281570 - samples/sec: 15.57 - lr: 0.020000\n",
-      "2021-09-08 01:51:47,733 epoch 5 - iter 40/41 - loss 0.49189957 - samples/sec: 7.18 - lr: 0.020000\n",
-      "2021-09-08 01:51:47,782 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:47,782 EPOCH 5 done: loss 0.4959 - lr 0.0200000\n",
-      "2021-09-08 01:51:47,981 DEV : loss 0.38733646273612976 - score 0.5\n",
-      "2021-09-08 01:51:47,982 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:51:48,056 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:48,482 epoch 6 - iter 4/41 - loss 0.69899919 - samples/sec: 10.38 - lr: 0.020000\n",
-      "2021-09-08 01:51:48,784 epoch 6 - iter 8/41 - loss 0.55834113 - samples/sec: 13.26 - lr: 0.020000\n",
-      "2021-09-08 01:51:49,050 epoch 6 - iter 12/41 - loss 0.42075598 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 01:51:49,284 epoch 6 - iter 16/41 - loss 0.34093565 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 01:51:49,545 epoch 6 - iter 20/41 - loss 0.31397991 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 01:51:49,774 epoch 6 - iter 24/41 - loss 0.31104226 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 01:51:50,110 epoch 6 - iter 28/41 - loss 0.41519318 - samples/sec: 11.95 - lr: 0.020000\n",
-      "2021-09-08 01:51:50,354 epoch 6 - iter 32/41 - loss 0.39274872 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:51:50,582 epoch 6 - iter 36/41 - loss 0.39257384 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:51:50,793 epoch 6 - iter 40/41 - loss 0.41866509 - samples/sec: 19.00 - lr: 0.020000\n",
-      "2021-09-08 01:51:50,897 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:24:55,157 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:55,492 epoch 3 - iter 4/41 - loss 0.32764068 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 21:24:55,785 epoch 3 - iter 8/41 - loss 0.45670104 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 21:24:56,081 epoch 3 - iter 12/41 - loss 0.51431965 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:24:56,353 epoch 3 - iter 16/41 - loss 0.55158758 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 21:24:56,871 epoch 3 - iter 20/41 - loss 0.59218901 - samples/sec: 7.72 - lr: 0.020000\n",
+      "2021-09-21 21:24:57,173 epoch 3 - iter 24/41 - loss 0.58534966 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 21:24:57,466 epoch 3 - iter 28/41 - loss 0.56274136 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 21:24:57,830 epoch 3 - iter 32/41 - loss 0.53377965 - samples/sec: 11.01 - lr: 0.020000\n",
+      "2021-09-21 21:24:58,145 epoch 3 - iter 36/41 - loss 0.50618956 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 21:24:58,474 epoch 3 - iter 40/41 - loss 0.47834096 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 21:24:58,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:58,544 EPOCH 3 done: loss 0.4776 - lr 0.0200000\n",
+      "2021-09-21 21:24:58,684 DEV : loss 0.5169371366500854 - score 0.75\n",
+      "2021-09-21 21:24:58,685 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:24:58,687 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:24:59,069 epoch 4 - iter 4/41 - loss 0.40762851 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 21:24:59,342 epoch 4 - iter 8/41 - loss 0.29834139 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 21:24:59,656 epoch 4 - iter 12/41 - loss 0.32840320 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 21:24:59,913 epoch 4 - iter 16/41 - loss 0.32808012 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 21:25:00,208 epoch 4 - iter 20/41 - loss 0.37501006 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 21:25:00,484 epoch 4 - iter 24/41 - loss 0.35251501 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 21:25:00,788 epoch 4 - iter 28/41 - loss 0.37162614 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 21:25:01,239 epoch 4 - iter 32/41 - loss 0.38181471 - samples/sec: 8.87 - lr: 0.020000\n",
+      "2021-09-21 21:25:01,546 epoch 4 - iter 36/41 - loss 0.39535250 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 21:25:01,903 epoch 4 - iter 40/41 - loss 0.44050795 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 21:25:01,992 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:01,992 EPOCH 4 done: loss 0.4726 - lr 0.0200000\n",
+      "2021-09-21 21:25:02,146 DEV : loss 0.42539939284324646 - score 0.5\n",
+      "2021-09-21 21:25:02,147 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:25:02,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:02,517 epoch 5 - iter 4/41 - loss 0.62731414 - samples/sec: 11.68 - lr: 0.020000\n",
+      "2021-09-21 21:25:02,967 epoch 5 - iter 8/41 - loss 0.37319753 - samples/sec: 8.90 - lr: 0.020000\n",
+      "2021-09-21 21:25:03,254 epoch 5 - iter 12/41 - loss 0.44239610 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:25:03,597 epoch 5 - iter 16/41 - loss 0.37271628 - samples/sec: 11.70 - lr: 0.020000\n",
+      "2021-09-21 21:25:03,878 epoch 5 - iter 20/41 - loss 0.37055933 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 21:25:04,192 epoch 5 - iter 24/41 - loss 0.42184437 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:25:04,450 epoch 5 - iter 28/41 - loss 0.39597956 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:25:04,757 epoch 5 - iter 32/41 - loss 0.42351088 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 21:25:05,060 epoch 5 - iter 36/41 - loss 0.43573621 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:25:05,354 epoch 5 - iter 40/41 - loss 0.41597076 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:25:05,499 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:05,500 EPOCH 5 done: loss 0.4098 - lr 0.0200000\n",
+      "2021-09-21 21:25:05,654 DEV : loss 0.3792118728160858 - score 0.5\n",
+      "2021-09-21 21:25:05,655 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:25:05,657 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:05,949 epoch 6 - iter 4/41 - loss 0.47684953 - samples/sec: 15.00 - lr: 0.020000\n",
+      "2021-09-21 21:25:06,361 epoch 6 - iter 8/41 - loss 0.34578967 - samples/sec: 9.73 - lr: 0.020000\n",
+      "2021-09-21 21:25:06,783 epoch 6 - iter 12/41 - loss 0.26037005 - samples/sec: 9.49 - lr: 0.020000\n",
+      "2021-09-21 21:25:07,136 epoch 6 - iter 16/41 - loss 0.27484483 - samples/sec: 11.34 - lr: 0.020000\n",
+      "2021-09-21 21:25:07,443 epoch 6 - iter 20/41 - loss 0.27435046 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 21:25:07,721 epoch 6 - iter 24/41 - loss 0.25892938 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,020 epoch 6 - iter 28/41 - loss 0.25928674 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,311 epoch 6 - iter 32/41 - loss 0.24079760 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,656 epoch 6 - iter 36/41 - loss 0.27860520 - samples/sec: 11.61 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,932 epoch 6 - iter 40/41 - loss 0.31704966 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:25:08,994 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:51:50,897 EPOCH 6 done: loss 0.4788 - lr 0.0200000\n",
-      "2021-09-08 01:51:51,044 DEV : loss 0.4458385705947876 - score 0.5\n",
-      "2021-09-08 01:51:51,045 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:51:51,047 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:51,420 epoch 7 - iter 4/41 - loss 0.26251563 - samples/sec: 11.14 - lr: 0.020000\n",
-      "2021-09-08 01:51:51,608 epoch 7 - iter 8/41 - loss 0.29522666 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 01:51:51,873 epoch 7 - iter 12/41 - loss 0.26270215 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 01:51:52,277 epoch 7 - iter 16/41 - loss 0.33337671 - samples/sec: 9.91 - lr: 0.020000\n",
-      "2021-09-08 01:51:52,584 epoch 7 - iter 20/41 - loss 0.39210234 - samples/sec: 13.08 - lr: 0.020000\n",
-      "2021-09-08 01:51:52,849 epoch 7 - iter 24/41 - loss 0.44272007 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:51:53,087 epoch 7 - iter 28/41 - loss 0.45208923 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:51:53,352 epoch 7 - iter 32/41 - loss 0.41721583 - samples/sec: 15.16 - lr: 0.020000\n",
-      "2021-09-08 01:51:53,562 epoch 7 - iter 36/41 - loss 0.45086740 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:51:53,838 epoch 7 - iter 40/41 - loss 0.43368044 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 01:51:53,892 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:53,893 EPOCH 7 done: loss 0.4238 - lr 0.0200000\n",
-      "2021-09-08 01:51:54,135 DEV : loss 1.0994495153427124 - score 0.25\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:51:54,136 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:51:54,224 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:54,544 epoch 8 - iter 4/41 - loss 0.47606095 - samples/sec: 13.64 - lr: 0.010000\n",
-      "2021-09-08 01:51:54,796 epoch 8 - iter 8/41 - loss 0.44160441 - samples/sec: 15.88 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,190 epoch 8 - iter 12/41 - loss 0.43667039 - samples/sec: 10.17 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,414 epoch 8 - iter 16/41 - loss 0.38529022 - samples/sec: 17.88 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,694 epoch 8 - iter 20/41 - loss 0.41953145 - samples/sec: 14.30 - lr: 0.010000\n",
-      "2021-09-08 01:51:55,929 epoch 8 - iter 24/41 - loss 0.35862976 - samples/sec: 17.12 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,200 epoch 8 - iter 28/41 - loss 0.32939695 - samples/sec: 14.78 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,467 epoch 8 - iter 32/41 - loss 0.29443144 - samples/sec: 14.99 - lr: 0.010000\n",
-      "2021-09-08 01:51:56,745 epoch 8 - iter 36/41 - loss 0.31396543 - samples/sec: 14.43 - lr: 0.010000\n",
-      "2021-09-08 01:51:57,015 epoch 8 - iter 40/41 - loss 0.33956203 - samples/sec: 14.85 - lr: 0.010000\n",
-      "2021-09-08 01:51:57,063 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:57,064 EPOCH 8 done: loss 0.3314 - lr 0.0100000\n",
-      "2021-09-08 01:51:57,242 DEV : loss 0.825310230255127 - score 0.25\n",
-      "2021-09-08 01:51:57,243 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:51:57,321 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:51:57,592 epoch 9 - iter 4/41 - loss 0.24859056 - samples/sec: 15.68 - lr: 0.010000\n",
-      "2021-09-08 01:51:57,885 epoch 9 - iter 8/41 - loss 0.24388681 - samples/sec: 13.71 - lr: 0.010000\n",
-      "2021-09-08 01:51:58,212 epoch 9 - iter 12/41 - loss 0.23111244 - samples/sec: 12.26 - lr: 0.010000\n",
-      "2021-09-08 01:51:58,493 epoch 9 - iter 16/41 - loss 0.18979761 - samples/sec: 14.24 - lr: 0.010000\n",
-      "2021-09-08 01:51:58,695 epoch 9 - iter 20/41 - loss 0.15233545 - samples/sec: 19.88 - lr: 0.010000\n",
-      "2021-09-08 01:51:58,934 epoch 9 - iter 24/41 - loss 0.18566066 - samples/sec: 16.80 - lr: 0.010000\n",
-      "2021-09-08 01:51:59,194 epoch 9 - iter 28/41 - loss 0.22633184 - samples/sec: 15.40 - lr: 0.010000\n",
-      "2021-09-08 01:51:59,452 epoch 9 - iter 32/41 - loss 0.21922847 - samples/sec: 15.58 - lr: 0.010000\n",
-      "2021-09-08 01:51:59,794 epoch 9 - iter 36/41 - loss 0.26633093 - samples/sec: 11.70 - lr: 0.010000\n",
-      "2021-09-08 01:52:00,180 epoch 9 - iter 40/41 - loss 0.25531106 - samples/sec: 10.37 - lr: 0.010000\n",
-      "2021-09-08 01:52:00,225 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:52:00,226 EPOCH 9 done: loss 0.2492 - lr 0.0100000\n",
-      "2021-09-08 01:52:00,643 DEV : loss 0.9184228181838989 - score 0.0\n",
-      "2021-09-08 01:52:00,644 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:52:00,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:52:01,023 epoch 10 - iter 4/41 - loss 0.13339828 - samples/sec: 12.80 - lr: 0.010000\n",
-      "2021-09-08 01:52:01,251 epoch 10 - iter 8/41 - loss 0.07017968 - samples/sec: 17.59 - lr: 0.010000\n",
-      "2021-09-08 01:52:01,536 epoch 10 - iter 12/41 - loss 0.05217171 - samples/sec: 14.04 - lr: 0.010000\n",
-      "2021-09-08 01:52:01,840 epoch 10 - iter 16/41 - loss 0.12347009 - samples/sec: 13.19 - lr: 0.010000\n",
-      "2021-09-08 01:52:02,057 epoch 10 - iter 20/41 - loss 0.19859696 - samples/sec: 18.52 - lr: 0.010000\n",
-      "2021-09-08 01:52:02,294 epoch 10 - iter 24/41 - loss 0.20786546 - samples/sec: 16.91 - lr: 0.010000\n",
-      "2021-09-08 01:52:02,493 epoch 10 - iter 28/41 - loss 0.21375632 - samples/sec: 20.18 - lr: 0.010000\n",
-      "2021-09-08 01:52:02,739 epoch 10 - iter 32/41 - loss 0.23406832 - samples/sec: 16.30 - lr: 0.010000\n",
-      "2021-09-08 01:52:03,148 epoch 10 - iter 36/41 - loss 0.22458315 - samples/sec: 9.80 - lr: 0.010000\n",
-      "2021-09-08 01:52:03,491 epoch 10 - iter 40/41 - loss 0.21994740 - samples/sec: 11.68 - lr: 0.010000\n",
-      "2021-09-08 01:52:03,546 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:52:03,546 EPOCH 10 done: loss 0.2155 - lr 0.0100000\n",
-      "2021-09-08 01:52:03,647 DEV : loss 1.0662720203399658 - score 0.25\n",
-      "2021-09-08 01:52:03,648 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:52:07,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:52:07,315 Testing using best model ...\n",
-      "2021-09-08 01:52:07,317 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:25:08,995 EPOCH 6 done: loss 0.3206 - lr 0.0200000\n",
+      "2021-09-21 21:25:09,145 DEV : loss 0.23137731850147247 - score 0.5\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:25:09,146 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:25:09,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:09,479 epoch 7 - iter 4/41 - loss 0.23683904 - samples/sec: 13.14 - lr: 0.010000\n",
+      "2021-09-21 21:25:09,781 epoch 7 - iter 8/41 - loss 0.20866725 - samples/sec: 13.26 - lr: 0.010000\n",
+      "2021-09-21 21:25:10,104 epoch 7 - iter 12/41 - loss 0.14877582 - samples/sec: 12.42 - lr: 0.010000\n",
+      "2021-09-21 21:25:10,353 epoch 7 - iter 16/41 - loss 0.17945835 - samples/sec: 16.12 - lr: 0.010000\n",
+      "2021-09-21 21:25:10,687 epoch 7 - iter 20/41 - loss 0.23380727 - samples/sec: 11.99 - lr: 0.010000\n",
+      "2021-09-21 21:25:11,168 epoch 7 - iter 24/41 - loss 0.24833669 - samples/sec: 8.33 - lr: 0.010000\n",
+      "2021-09-21 21:25:11,455 epoch 7 - iter 28/41 - loss 0.27480401 - samples/sec: 13.94 - lr: 0.010000\n",
+      "2021-09-21 21:25:11,763 epoch 7 - iter 32/41 - loss 0.27302981 - samples/sec: 13.01 - lr: 0.010000\n",
+      "2021-09-21 21:25:12,109 epoch 7 - iter 36/41 - loss 0.26255501 - samples/sec: 11.58 - lr: 0.010000\n",
+      "2021-09-21 21:25:12,431 epoch 7 - iter 40/41 - loss 0.34665514 - samples/sec: 12.47 - lr: 0.010000\n",
+      "2021-09-21 21:25:12,497 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:12,498 EPOCH 7 done: loss 0.3506 - lr 0.0100000\n",
+      "2021-09-21 21:25:12,646 DEV : loss 0.15013253688812256 - score 0.75\n",
+      "2021-09-21 21:25:12,649 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:25:16,892 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:17,186 epoch 8 - iter 4/41 - loss 0.15660852 - samples/sec: 15.08 - lr: 0.010000\n",
+      "2021-09-21 21:25:17,437 epoch 8 - iter 8/41 - loss 0.15757775 - samples/sec: 15.98 - lr: 0.010000\n",
+      "2021-09-21 21:25:17,780 epoch 8 - iter 12/41 - loss 0.26796034 - samples/sec: 11.69 - lr: 0.010000\n",
+      "2021-09-21 21:25:18,033 epoch 8 - iter 16/41 - loss 0.24100079 - samples/sec: 15.84 - lr: 0.010000\n",
+      "2021-09-21 21:25:18,355 epoch 8 - iter 20/41 - loss 0.24167038 - samples/sec: 12.42 - lr: 0.010000\n",
+      "2021-09-21 21:25:18,793 epoch 8 - iter 24/41 - loss 0.23452413 - samples/sec: 9.15 - lr: 0.010000\n",
+      "2021-09-21 21:25:19,096 epoch 8 - iter 28/41 - loss 0.27254877 - samples/sec: 13.24 - lr: 0.010000\n",
+      "2021-09-21 21:25:19,394 epoch 8 - iter 32/41 - loss 0.23899131 - samples/sec: 13.45 - lr: 0.010000\n",
+      "2021-09-21 21:25:19,729 epoch 8 - iter 36/41 - loss 0.23526837 - samples/sec: 11.95 - lr: 0.010000\n",
+      "2021-09-21 21:25:20,033 epoch 8 - iter 40/41 - loss 0.25562399 - samples/sec: 13.18 - lr: 0.010000\n",
+      "2021-09-21 21:25:20,119 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:20,120 EPOCH 8 done: loss 0.2496 - lr 0.0100000\n",
+      "2021-09-21 21:25:20,268 DEV : loss 0.1518421471118927 - score 0.75\n",
+      "2021-09-21 21:25:20,269 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:25:20,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:20,595 epoch 9 - iter 4/41 - loss 0.18443493 - samples/sec: 13.44 - lr: 0.010000\n",
+      "2021-09-21 21:25:20,945 epoch 9 - iter 8/41 - loss 0.15843988 - samples/sec: 11.45 - lr: 0.010000\n",
+      "2021-09-21 21:25:21,211 epoch 9 - iter 12/41 - loss 0.17952423 - samples/sec: 15.09 - lr: 0.010000\n",
+      "2021-09-21 21:25:21,473 epoch 9 - iter 16/41 - loss 0.18023746 - samples/sec: 15.29 - lr: 0.010000\n",
+      "2021-09-21 21:25:21,773 epoch 9 - iter 20/41 - loss 0.15090905 - samples/sec: 13.37 - lr: 0.010000\n",
+      "2021-09-21 21:25:22,046 epoch 9 - iter 24/41 - loss 0.21131236 - samples/sec: 14.67 - lr: 0.010000\n",
+      "2021-09-21 21:25:22,305 epoch 9 - iter 28/41 - loss 0.22577284 - samples/sec: 15.50 - lr: 0.010000\n",
+      "2021-09-21 21:25:22,653 epoch 9 - iter 32/41 - loss 0.21254278 - samples/sec: 11.51 - lr: 0.010000\n",
+      "2021-09-21 21:25:22,946 epoch 9 - iter 36/41 - loss 0.19495293 - samples/sec: 13.66 - lr: 0.010000\n",
+      "2021-09-21 21:25:23,428 epoch 9 - iter 40/41 - loss 0.18786005 - samples/sec: 8.32 - lr: 0.010000\n",
+      "2021-09-21 21:25:23,499 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:23,499 EPOCH 9 done: loss 0.1835 - lr 0.0100000\n",
+      "2021-09-21 21:25:23,638 DEV : loss 0.131852924823761 - score 0.75\n",
+      "2021-09-21 21:25:23,639 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:25:27,858 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:28,136 epoch 10 - iter 4/41 - loss 0.01803097 - samples/sec: 15.36 - lr: 0.010000\n",
+      "2021-09-21 21:25:28,372 epoch 10 - iter 8/41 - loss 0.02592955 - samples/sec: 16.95 - lr: 0.010000\n",
+      "2021-09-21 21:25:28,605 epoch 10 - iter 12/41 - loss 0.08464861 - samples/sec: 17.25 - lr: 0.010000\n",
+      "2021-09-21 21:25:28,975 epoch 10 - iter 16/41 - loss 0.09507548 - samples/sec: 10.81 - lr: 0.010000\n",
+      "2021-09-21 21:25:29,221 epoch 10 - iter 20/41 - loss 0.11914930 - samples/sec: 16.33 - lr: 0.010000\n",
+      "2021-09-21 21:25:29,445 epoch 10 - iter 24/41 - loss 0.15195646 - samples/sec: 17.90 - lr: 0.010000\n",
+      "2021-09-21 21:25:29,665 epoch 10 - iter 28/41 - loss 0.15051983 - samples/sec: 18.23 - lr: 0.010000\n",
+      "2021-09-21 21:25:29,904 epoch 10 - iter 32/41 - loss 0.18037786 - samples/sec: 16.74 - lr: 0.010000\n",
+      "2021-09-21 21:25:30,139 epoch 10 - iter 36/41 - loss 0.18791736 - samples/sec: 17.08 - lr: 0.010000\n",
+      "2021-09-21 21:25:30,351 epoch 10 - iter 40/41 - loss 0.17245270 - samples/sec: 18.95 - lr: 0.010000\n",
+      "2021-09-21 21:25:30,424 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:30,424 EPOCH 10 done: loss 0.1754 - lr 0.0100000\n",
+      "2021-09-21 21:25:30,532 DEV : loss 0.25687339901924133 - score 0.75\n",
+      "2021-09-21 21:25:30,533 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:25:34,878 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:25:34,878 Testing using best model ...\n",
+      "2021-09-21 21:25:34,880 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:52:12,264 \t0.2\n",
-      "2021-09-08 01:52:12,265 \n",
+      "2021-09-21 21:25:39,831 \t0.4\n",
+      "2021-09-21 21:25:39,831 \n",
       "Results:\n",
-      "- F-score (micro) 0.2\n",
-      "- F-score (macro) 0.1\n",
-      "- Accuracy 0.2\n",
+      "- F-score (micro) 0.4\n",
+      "- F-score (macro) 0.2333\n",
+      "- Accuracy 0.4\n",
       "\n",
       "By class:\n",
       "                                          precision    recall  f1-score   support\n",
       "\n",
-      "  The product has been reviewed as awful     0.3333    1.0000    0.5000         1\n",
-      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         2\n",
-      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         1\n",
-      "   The product has been reviewed as good     0.0000    0.0000    0.0000         0\n",
-      "  The product has been reviewed as great     0.0000    0.0000    0.0000         1\n",
+      "  The product has been reviewed as awful     1.0000    0.5000    0.6667         2\n",
+      "    The product has been reviewed as bad     0.0000    0.0000    0.0000         0\n",
+      "The product has been reviewed as neutral     0.0000    0.0000    0.0000         0\n",
+      "   The product has been reviewed as good     0.0000    0.0000    0.0000         2\n",
+      "  The product has been reviewed as great     0.3333    1.0000    0.5000         1\n",
       "\n",
-      "                               micro avg     0.2000    0.2000    0.2000         5\n",
-      "                               macro avg     0.0667    0.2000    0.1000         5\n",
-      "                            weighted avg     0.0667    0.2000    0.1000         5\n",
-      "                             samples avg     0.2000    0.2000    0.2000         5\n",
+      "                               micro avg     0.4000    0.4000    0.4000         5\n",
+      "                               macro avg     0.2667    0.3000    0.2333         5\n",
+      "                            weighted avg     0.4667    0.4000    0.3667         5\n",
+      "                             samples avg     0.4000    0.4000    0.4000         5\n",
       "\n",
-      "2021-09-08 01:52:12,265 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.32713393476752256\n"
+      "2021-09-21 21:25:39,832 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.3707147814018043\n"
      ]
     }
    ],
@@ -5736,11 +5734,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "86988d74",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.4108258154059681, 0.369882026370576, 0.34073560027758504, 0.3469812630117974, 0.3851492019430951]\n",
+      "0.025612388495552686\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -5752,7 +5762,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5760,25 +5770,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:03,870 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:26:26,982 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:53:07,809 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:26:30,985 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 17746.32it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 14663.17it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:07,813 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 01:53:07,823 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:07,825 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:26:30,991 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 21:26:31,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:31,004 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6091,25 +6101,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:07,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:07,826 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:53:07,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:07,827 Parameters:\n",
-      "2021-09-08 01:53:07,827  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:53:07,827  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:53:07,827  - patience: \"3\"\n",
-      "2021-09-08 01:53:07,828  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:53:07,828  - max_epochs: \"10\"\n",
-      "2021-09-08 01:53:07,828  - shuffle: \"True\"\n",
-      "2021-09-08 01:53:07,829  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:53:07,829  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:53:07,829 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:07,830 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:53:07,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:07,830 Device: cuda:0\n",
-      "2021-09-08 01:53:07,831 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:07,831 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:53:07,837 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:26:31,005 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:31,005 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:26:31,006 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:31,006 Parameters:\n",
+      "2021-09-21 21:26:31,007  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:26:31,007  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:26:31,008  - patience: \"3\"\n",
+      "2021-09-21 21:26:31,008  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:26:31,009  - max_epochs: \"10\"\n",
+      "2021-09-21 21:26:31,009  - shuffle: \"True\"\n",
+      "2021-09-21 21:26:31,009  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:26:31,010  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:26:31,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:31,011 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:26:31,011 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:31,012 Device: cuda:0\n",
+      "2021-09-21 21:26:31,012 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:31,013 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:26:31,021 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -6123,209 +6133,210 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:08,135 epoch 1 - iter 4/41 - loss 1.04611470 - samples/sec: 14.69 - lr: 0.020000\n",
-      "2021-09-08 01:53:08,416 epoch 1 - iter 8/41 - loss 0.80406637 - samples/sec: 14.28 - lr: 0.020000\n",
-      "2021-09-08 01:53:08,652 epoch 1 - iter 12/41 - loss 0.74646758 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 01:53:08,885 epoch 1 - iter 16/41 - loss 0.64559728 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 01:53:09,270 epoch 1 - iter 20/41 - loss 0.65988983 - samples/sec: 10.38 - lr: 0.020000\n",
-      "2021-09-08 01:53:09,599 epoch 1 - iter 24/41 - loss 0.67367212 - samples/sec: 12.20 - lr: 0.020000\n",
-      "2021-09-08 01:53:09,901 epoch 1 - iter 28/41 - loss 0.66819651 - samples/sec: 13.26 - lr: 0.020000\n",
-      "2021-09-08 01:53:10,162 epoch 1 - iter 32/41 - loss 0.61786167 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 01:53:10,472 epoch 1 - iter 36/41 - loss 0.63050376 - samples/sec: 12.93 - lr: 0.020000\n",
-      "2021-09-08 01:53:10,714 epoch 1 - iter 40/41 - loss 0.63135186 - samples/sec: 16.59 - lr: 0.020000\n",
-      "2021-09-08 01:53:10,767 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:10,768 EPOCH 1 done: loss 0.6203 - lr 0.0200000\n",
-      "2021-09-08 01:53:10,924 DEV : loss 0.47158968448638916 - score 0.75\n",
-      "2021-09-08 01:53:10,925 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:26:31,313 epoch 1 - iter 4/41 - loss 0.91094072 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 21:26:31,623 epoch 1 - iter 8/41 - loss 0.76712456 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:26:31,893 epoch 1 - iter 12/41 - loss 0.78731743 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 21:26:32,270 epoch 1 - iter 16/41 - loss 0.71369644 - samples/sec: 10.63 - lr: 0.020000\n",
+      "2021-09-21 21:26:32,591 epoch 1 - iter 20/41 - loss 0.71975707 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 21:26:32,902 epoch 1 - iter 24/41 - loss 0.67277643 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 21:26:33,170 epoch 1 - iter 28/41 - loss 0.68454486 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 21:26:33,446 epoch 1 - iter 32/41 - loss 0.63292538 - samples/sec: 14.50 - lr: 0.020000\n",
+      "2021-09-21 21:26:33,724 epoch 1 - iter 36/41 - loss 0.61959234 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:26:34,204 epoch 1 - iter 40/41 - loss 0.60527201 - samples/sec: 8.34 - lr: 0.020000\n",
+      "2021-09-21 21:26:34,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:34,281 EPOCH 1 done: loss 0.6200 - lr 0.0200000\n",
+      "2021-09-21 21:26:34,424 DEV : loss 0.6209092140197754 - score 0.5\n",
+      "2021-09-21 21:26:34,425 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:53:15,030 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:15,284 epoch 2 - iter 4/41 - loss 1.74014408 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 01:53:15,568 epoch 2 - iter 8/41 - loss 1.19018926 - samples/sec: 14.08 - lr: 0.020000\n",
-      "2021-09-08 01:53:15,875 epoch 2 - iter 12/41 - loss 1.02071447 - samples/sec: 13.09 - lr: 0.020000\n",
-      "2021-09-08 01:53:16,118 epoch 2 - iter 16/41 - loss 1.03489621 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:53:16,437 epoch 2 - iter 20/41 - loss 0.93752453 - samples/sec: 12.55 - lr: 0.020000\n",
-      "2021-09-08 01:53:16,646 epoch 2 - iter 24/41 - loss 0.88194154 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:53:17,000 epoch 2 - iter 28/41 - loss 0.79023391 - samples/sec: 11.32 - lr: 0.020000\n",
-      "2021-09-08 01:53:17,225 epoch 2 - iter 32/41 - loss 0.73683802 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:53:17,523 epoch 2 - iter 36/41 - loss 0.72911147 - samples/sec: 13.44 - lr: 0.020000\n",
-      "2021-09-08 01:53:17,880 epoch 2 - iter 40/41 - loss 0.71748936 - samples/sec: 11.22 - lr: 0.020000\n",
-      "2021-09-08 01:53:17,928 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:17,929 EPOCH 2 done: loss 0.7050 - lr 0.0200000\n",
-      "2021-09-08 01:53:18,041 DEV : loss 0.5921532511711121 - score 0.5\n",
-      "2021-09-08 01:53:18,041 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:53:18,043 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:18,353 epoch 3 - iter 4/41 - loss 0.25258330 - samples/sec: 13.57 - lr: 0.020000\n",
-      "2021-09-08 01:53:18,633 epoch 3 - iter 8/41 - loss 0.38605223 - samples/sec: 14.32 - lr: 0.020000\n",
-      "2021-09-08 01:53:18,896 epoch 3 - iter 12/41 - loss 0.45906853 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 01:53:19,181 epoch 3 - iter 16/41 - loss 0.39035802 - samples/sec: 14.07 - lr: 0.020000\n",
-      "2021-09-08 01:53:19,382 epoch 3 - iter 20/41 - loss 0.41167161 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 01:53:19,716 epoch 3 - iter 24/41 - loss 0.44130578 - samples/sec: 12.01 - lr: 0.020000\n",
-      "2021-09-08 01:53:19,927 epoch 3 - iter 28/41 - loss 0.44580552 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 01:53:20,335 epoch 3 - iter 32/41 - loss 0.49032593 - samples/sec: 9.81 - lr: 0.020000\n",
-      "2021-09-08 01:53:20,590 epoch 3 - iter 36/41 - loss 0.48918502 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:53:20,827 epoch 3 - iter 40/41 - loss 0.53617326 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 01:53:20,908 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:20,908 EPOCH 3 done: loss 0.5277 - lr 0.0200000\n",
-      "2021-09-08 01:53:21,016 DEV : loss 0.62019944190979 - score 0.25\n",
-      "2021-09-08 01:53:21,016 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:53:21,018 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:21,304 epoch 4 - iter 4/41 - loss 0.47852694 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 01:53:21,518 epoch 4 - iter 8/41 - loss 0.42527605 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:53:21,785 epoch 4 - iter 12/41 - loss 0.55785010 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 01:53:22,053 epoch 4 - iter 16/41 - loss 0.53168990 - samples/sec: 14.98 - lr: 0.020000\n",
-      "2021-09-08 01:53:22,334 epoch 4 - iter 20/41 - loss 0.47817703 - samples/sec: 14.24 - lr: 0.020000\n",
-      "2021-09-08 01:53:22,708 epoch 4 - iter 24/41 - loss 0.53625508 - samples/sec: 10.70 - lr: 0.020000\n",
-      "2021-09-08 01:53:22,921 epoch 4 - iter 28/41 - loss 0.53944924 - samples/sec: 18.88 - lr: 0.020000\n",
-      "2021-09-08 01:53:23,222 epoch 4 - iter 32/41 - loss 0.50390768 - samples/sec: 13.29 - lr: 0.020000\n",
-      "2021-09-08 01:53:23,452 epoch 4 - iter 36/41 - loss 0.47103454 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 01:53:23,819 epoch 4 - iter 40/41 - loss 0.45630979 - samples/sec: 10.89 - lr: 0.020000\n",
-      "2021-09-08 01:53:23,944 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:23,944 EPOCH 4 done: loss 0.4712 - lr 0.0200000\n",
-      "2021-09-08 01:53:24,105 DEV : loss 0.508305013179779 - score 0.75\n",
-      "2021-09-08 01:53:24,106 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:53:24,109 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:24,462 epoch 5 - iter 4/41 - loss 0.40964269 - samples/sec: 11.80 - lr: 0.020000\n",
-      "2021-09-08 01:53:24,731 epoch 5 - iter 8/41 - loss 0.32159432 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 01:53:25,080 epoch 5 - iter 12/41 - loss 0.30587484 - samples/sec: 11.48 - lr: 0.020000\n",
-      "2021-09-08 01:53:25,368 epoch 5 - iter 16/41 - loss 0.31884885 - samples/sec: 13.90 - lr: 0.020000\n",
-      "2021-09-08 01:53:25,591 epoch 5 - iter 20/41 - loss 0.31597531 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 01:53:25,821 epoch 5 - iter 24/41 - loss 0.33744033 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 01:53:26,129 epoch 5 - iter 28/41 - loss 0.31954101 - samples/sec: 13.02 - lr: 0.020000\n",
-      "2021-09-08 01:53:26,321 epoch 5 - iter 32/41 - loss 0.29211940 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 01:53:26,580 epoch 5 - iter 36/41 - loss 0.27602563 - samples/sec: 15.51 - lr: 0.020000\n",
-      "2021-09-08 01:53:26,820 epoch 5 - iter 40/41 - loss 0.30186179 - samples/sec: 16.70 - lr: 0.020000\n",
-      "2021-09-08 01:53:27,012 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:27,012 EPOCH 5 done: loss 0.3103 - lr 0.0200000\n",
-      "2021-09-08 01:53:27,108 DEV : loss 0.8740692138671875 - score 0.5\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:53:27,109 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:53:27,111 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:27,434 epoch 6 - iter 4/41 - loss 0.10301932 - samples/sec: 13.17 - lr: 0.010000\n",
-      "2021-09-08 01:53:27,677 epoch 6 - iter 8/41 - loss 0.27447553 - samples/sec: 16.47 - lr: 0.010000\n",
-      "2021-09-08 01:53:27,987 epoch 6 - iter 12/41 - loss 0.34075268 - samples/sec: 12.94 - lr: 0.010000\n",
-      "2021-09-08 01:53:28,244 epoch 6 - iter 16/41 - loss 0.36600200 - samples/sec: 15.63 - lr: 0.010000\n",
-      "2021-09-08 01:53:28,464 epoch 6 - iter 20/41 - loss 0.43315416 - samples/sec: 18.25 - lr: 0.010000\n",
-      "2021-09-08 01:53:28,757 epoch 6 - iter 24/41 - loss 0.40581210 - samples/sec: 13.67 - lr: 0.010000\n",
-      "2021-09-08 01:53:29,056 epoch 6 - iter 28/41 - loss 0.38899135 - samples/sec: 13.38 - lr: 0.010000\n",
-      "2021-09-08 01:53:29,400 epoch 6 - iter 32/41 - loss 0.38908508 - samples/sec: 11.66 - lr: 0.010000\n",
-      "2021-09-08 01:53:29,687 epoch 6 - iter 36/41 - loss 0.35681856 - samples/sec: 13.97 - lr: 0.010000\n",
-      "2021-09-08 01:53:29,944 epoch 6 - iter 40/41 - loss 0.35288810 - samples/sec: 15.63 - lr: 0.010000\n",
-      "2021-09-08 01:53:30,074 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:26:38,374 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:38,683 epoch 2 - iter 4/41 - loss 0.59708298 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 21:26:38,982 epoch 2 - iter 8/41 - loss 0.66375435 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 21:26:39,238 epoch 2 - iter 12/41 - loss 0.54166083 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:26:39,516 epoch 2 - iter 16/41 - loss 0.66067382 - samples/sec: 14.41 - lr: 0.020000\n",
+      "2021-09-21 21:26:39,863 epoch 2 - iter 20/41 - loss 0.64294169 - samples/sec: 11.57 - lr: 0.020000\n",
+      "2021-09-21 21:26:40,157 epoch 2 - iter 24/41 - loss 0.60804661 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:26:40,622 epoch 2 - iter 28/41 - loss 0.61518184 - samples/sec: 8.62 - lr: 0.020000\n",
+      "2021-09-21 21:26:40,935 epoch 2 - iter 32/41 - loss 0.62779940 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 21:26:41,276 epoch 2 - iter 36/41 - loss 0.63593141 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 21:26:41,562 epoch 2 - iter 40/41 - loss 0.63903353 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 21:26:41,623 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:41,623 EPOCH 2 done: loss 0.6415 - lr 0.0200000\n",
+      "2021-09-21 21:26:41,775 DEV : loss 0.47247549891471863 - score 0.75\n",
+      "2021-09-21 21:26:41,776 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:26:46,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:47,069 epoch 3 - iter 4/41 - loss 0.83440129 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:26:47,334 epoch 3 - iter 8/41 - loss 0.74233752 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:26:47,599 epoch 3 - iter 12/41 - loss 0.62683344 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 21:26:47,888 epoch 3 - iter 16/41 - loss 0.58995373 - samples/sec: 13.88 - lr: 0.020000\n",
+      "2021-09-21 21:26:48,214 epoch 3 - iter 20/41 - loss 0.55753026 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 21:26:48,515 epoch 3 - iter 24/41 - loss 0.53313137 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 21:26:48,739 epoch 3 - iter 28/41 - loss 0.52651400 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 21:26:48,985 epoch 3 - iter 32/41 - loss 0.52953377 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 21:26:49,246 epoch 3 - iter 36/41 - loss 0.54127990 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 21:26:49,475 epoch 3 - iter 40/41 - loss 0.52218868 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 21:26:49,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:49,530 EPOCH 3 done: loss 0.5097 - lr 0.0200000\n",
+      "2021-09-21 21:26:49,648 DEV : loss 0.6309836506843567 - score 0.0\n",
+      "2021-09-21 21:26:49,649 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:26:49,651 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:49,901 epoch 4 - iter 4/41 - loss 0.34697033 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 21:26:50,246 epoch 4 - iter 8/41 - loss 0.34260300 - samples/sec: 11.63 - lr: 0.020000\n",
+      "2021-09-21 21:26:50,524 epoch 4 - iter 12/41 - loss 0.42611355 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 21:26:50,805 epoch 4 - iter 16/41 - loss 0.48889498 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 21:26:51,083 epoch 4 - iter 20/41 - loss 0.45280928 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 21:26:51,471 epoch 4 - iter 24/41 - loss 0.50995460 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 21:26:51,726 epoch 4 - iter 28/41 - loss 0.46420584 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 21:26:52,088 epoch 4 - iter 32/41 - loss 0.47906651 - samples/sec: 11.07 - lr: 0.020000\n",
+      "2021-09-21 21:26:52,556 epoch 4 - iter 36/41 - loss 0.46972248 - samples/sec: 8.55 - lr: 0.020000\n",
+      "2021-09-21 21:26:52,831 epoch 4 - iter 40/41 - loss 0.47163316 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:26:52,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:52,944 EPOCH 4 done: loss 0.4757 - lr 0.0200000\n",
+      "2021-09-21 21:26:53,113 DEV : loss 0.4873676300048828 - score 0.5\n",
+      "2021-09-21 21:26:53,114 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:26:53,116 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:53,457 epoch 5 - iter 4/41 - loss 0.22020204 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 21:26:53,828 epoch 5 - iter 8/41 - loss 0.20656252 - samples/sec: 10.80 - lr: 0.020000\n",
+      "2021-09-21 21:26:54,103 epoch 5 - iter 12/41 - loss 0.27692511 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 21:26:54,437 epoch 5 - iter 16/41 - loss 0.33107680 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 21:26:54,839 epoch 5 - iter 20/41 - loss 0.32275587 - samples/sec: 9.97 - lr: 0.020000\n",
+      "2021-09-21 21:26:55,219 epoch 5 - iter 24/41 - loss 0.37086468 - samples/sec: 10.55 - lr: 0.020000\n",
+      "2021-09-21 21:26:55,490 epoch 5 - iter 28/41 - loss 0.39768237 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:26:55,737 epoch 5 - iter 32/41 - loss 0.40392361 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:26:55,964 epoch 5 - iter 36/41 - loss 0.43312311 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 21:26:56,187 epoch 5 - iter 40/41 - loss 0.41601447 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:26:56,244 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:56,245 EPOCH 5 done: loss 0.4157 - lr 0.0200000\n",
+      "2021-09-21 21:26:56,461 DEV : loss 0.9852070808410645 - score 0.25\n",
+      "2021-09-21 21:26:56,462 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:26:56,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:56,795 epoch 6 - iter 4/41 - loss 0.27168638 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 21:26:57,042 epoch 6 - iter 8/41 - loss 0.36196670 - samples/sec: 16.23 - lr: 0.020000\n",
+      "2021-09-21 21:26:57,259 epoch 6 - iter 12/41 - loss 0.27703187 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 21:26:57,532 epoch 6 - iter 16/41 - loss 0.37976923 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 21:26:57,770 epoch 6 - iter 20/41 - loss 0.38138380 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 21:26:58,007 epoch 6 - iter 24/41 - loss 0.33561438 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:26:58,245 epoch 6 - iter 28/41 - loss 0.32974964 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:26:58,551 epoch 6 - iter 32/41 - loss 0.34593344 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 21:26:58,793 epoch 6 - iter 36/41 - loss 0.36510431 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 21:26:59,215 epoch 6 - iter 40/41 - loss 0.38009639 - samples/sec: 9.48 - lr: 0.020000\n",
+      "2021-09-21 21:26:59,333 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:30,074 EPOCH 6 done: loss 0.3522 - lr 0.0100000\n",
-      "2021-09-08 01:53:30,262 DEV : loss 1.0070585012435913 - score 0.5\n",
-      "2021-09-08 01:53:30,262 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:53:30,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:30,567 epoch 7 - iter 4/41 - loss 0.38328181 - samples/sec: 14.37 - lr: 0.010000\n",
-      "2021-09-08 01:53:30,834 epoch 7 - iter 8/41 - loss 0.25604546 - samples/sec: 15.04 - lr: 0.010000\n",
-      "2021-09-08 01:53:31,084 epoch 7 - iter 12/41 - loss 0.22965878 - samples/sec: 16.02 - lr: 0.010000\n",
-      "2021-09-08 01:53:31,369 epoch 7 - iter 16/41 - loss 0.28394816 - samples/sec: 14.07 - lr: 0.010000\n",
-      "2021-09-08 01:53:31,682 epoch 7 - iter 20/41 - loss 0.27991618 - samples/sec: 12.81 - lr: 0.010000\n",
-      "2021-09-08 01:53:32,006 epoch 7 - iter 24/41 - loss 0.28212667 - samples/sec: 12.38 - lr: 0.010000\n",
-      "2021-09-08 01:53:32,249 epoch 7 - iter 28/41 - loss 0.25938722 - samples/sec: 16.49 - lr: 0.010000\n",
-      "2021-09-08 01:53:32,567 epoch 7 - iter 32/41 - loss 0.25690191 - samples/sec: 12.62 - lr: 0.010000\n",
-      "2021-09-08 01:53:32,855 epoch 7 - iter 36/41 - loss 0.23719967 - samples/sec: 13.91 - lr: 0.010000\n",
-      "2021-09-08 01:53:33,138 epoch 7 - iter 40/41 - loss 0.27151013 - samples/sec: 14.16 - lr: 0.010000\n",
-      "2021-09-08 01:53:33,250 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:33,250 EPOCH 7 done: loss 0.2802 - lr 0.0100000\n",
-      "2021-09-08 01:53:33,431 DEV : loss 0.5395466685295105 - score 0.5\n",
-      "2021-09-08 01:53:33,432 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:53:33,495 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:33,880 epoch 8 - iter 4/41 - loss 0.33816232 - samples/sec: 10.85 - lr: 0.010000\n",
-      "2021-09-08 01:53:34,167 epoch 8 - iter 8/41 - loss 0.52547059 - samples/sec: 13.97 - lr: 0.010000\n",
-      "2021-09-08 01:53:34,432 epoch 8 - iter 12/41 - loss 0.36680030 - samples/sec: 15.13 - lr: 0.010000\n",
-      "2021-09-08 01:53:34,705 epoch 8 - iter 16/41 - loss 0.30713482 - samples/sec: 14.68 - lr: 0.010000\n",
-      "2021-09-08 01:53:34,938 epoch 8 - iter 20/41 - loss 0.24727086 - samples/sec: 17.29 - lr: 0.010000\n",
-      "2021-09-08 01:53:35,195 epoch 8 - iter 24/41 - loss 0.23228073 - samples/sec: 15.59 - lr: 0.010000\n",
-      "2021-09-08 01:53:35,424 epoch 8 - iter 28/41 - loss 0.21624156 - samples/sec: 17.47 - lr: 0.010000\n",
-      "2021-09-08 01:53:35,701 epoch 8 - iter 32/41 - loss 0.20421627 - samples/sec: 14.47 - lr: 0.010000\n",
-      "2021-09-08 01:53:35,978 epoch 8 - iter 36/41 - loss 0.20177846 - samples/sec: 14.48 - lr: 0.010000\n",
-      "2021-09-08 01:53:36,267 epoch 8 - iter 40/41 - loss 0.20585804 - samples/sec: 13.86 - lr: 0.010000\n",
-      "2021-09-08 01:53:36,323 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:36,324 EPOCH 8 done: loss 0.2027 - lr 0.0100000\n",
-      "2021-09-08 01:53:36,505 DEV : loss 0.9104657769203186 - score 0.5\n",
-      "2021-09-08 01:53:36,506 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:53:36,508 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:36,841 epoch 9 - iter 4/41 - loss 0.20972855 - samples/sec: 12.58 - lr: 0.010000\n",
-      "2021-09-08 01:53:37,217 epoch 9 - iter 8/41 - loss 0.21468669 - samples/sec: 10.66 - lr: 0.010000\n",
-      "2021-09-08 01:53:37,492 epoch 9 - iter 12/41 - loss 0.18386151 - samples/sec: 14.54 - lr: 0.010000\n",
-      "2021-09-08 01:53:37,855 epoch 9 - iter 16/41 - loss 0.14439671 - samples/sec: 11.05 - lr: 0.010000\n",
-      "2021-09-08 01:53:38,093 epoch 9 - iter 20/41 - loss 0.14911825 - samples/sec: 16.90 - lr: 0.010000\n",
-      "2021-09-08 01:53:38,342 epoch 9 - iter 24/41 - loss 0.13341341 - samples/sec: 16.06 - lr: 0.010000\n",
-      "2021-09-08 01:53:38,646 epoch 9 - iter 28/41 - loss 0.15317616 - samples/sec: 13.23 - lr: 0.010000\n",
-      "2021-09-08 01:53:38,926 epoch 9 - iter 32/41 - loss 0.14956448 - samples/sec: 14.31 - lr: 0.010000\n",
-      "2021-09-08 01:53:39,160 epoch 9 - iter 36/41 - loss 0.15721561 - samples/sec: 17.13 - lr: 0.010000\n",
-      "2021-09-08 01:53:39,410 epoch 9 - iter 40/41 - loss 0.16500944 - samples/sec: 16.04 - lr: 0.010000\n",
-      "2021-09-08 01:53:39,460 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:39,461 EPOCH 9 done: loss 0.1611 - lr 0.0100000\n",
-      "2021-09-08 01:53:39,571 DEV : loss 1.148759365081787 - score 0.25\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:53:39,571 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:53:39,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:39,882 epoch 10 - iter 4/41 - loss 0.05885089 - samples/sec: 14.25 - lr: 0.005000\n",
-      "2021-09-08 01:53:40,147 epoch 10 - iter 8/41 - loss 0.04930687 - samples/sec: 15.14 - lr: 0.005000\n",
-      "2021-09-08 01:53:40,408 epoch 10 - iter 12/41 - loss 0.04779549 - samples/sec: 15.36 - lr: 0.005000\n",
-      "2021-09-08 01:53:40,661 epoch 10 - iter 16/41 - loss 0.04343194 - samples/sec: 15.83 - lr: 0.005000\n",
-      "2021-09-08 01:53:40,940 epoch 10 - iter 20/41 - loss 0.04469273 - samples/sec: 14.35 - lr: 0.005000\n",
-      "2021-09-08 01:53:41,275 epoch 10 - iter 24/41 - loss 0.12294782 - samples/sec: 11.99 - lr: 0.005000\n",
-      "2021-09-08 01:53:41,624 epoch 10 - iter 28/41 - loss 0.11559477 - samples/sec: 11.49 - lr: 0.005000\n",
-      "2021-09-08 01:53:41,955 epoch 10 - iter 32/41 - loss 0.10307460 - samples/sec: 12.10 - lr: 0.005000\n",
-      "2021-09-08 01:53:42,187 epoch 10 - iter 36/41 - loss 0.12057695 - samples/sec: 17.30 - lr: 0.005000\n",
-      "2021-09-08 01:53:42,416 epoch 10 - iter 40/41 - loss 0.12003667 - samples/sec: 17.55 - lr: 0.005000\n",
-      "2021-09-08 01:53:42,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:42,464 EPOCH 10 done: loss 0.1173 - lr 0.0050000\n",
-      "2021-09-08 01:53:42,643 DEV : loss 1.3849620819091797 - score 0.25\n",
-      "2021-09-08 01:53:42,644 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:53:46,390 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:46,390 Testing using best model ...\n",
-      "2021-09-08 01:53:46,416 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:26:59,334 EPOCH 6 done: loss 0.3909 - lr 0.0200000\n",
+      "2021-09-21 21:26:59,505 DEV : loss 0.5128111839294434 - score 0.5\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:26:59,506 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:26:59,508 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:26:59,810 epoch 7 - iter 4/41 - loss 0.48606857 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 21:27:00,109 epoch 7 - iter 8/41 - loss 0.33577424 - samples/sec: 13.41 - lr: 0.010000\n",
+      "2021-09-21 21:27:00,439 epoch 7 - iter 12/41 - loss 0.41914137 - samples/sec: 12.15 - lr: 0.010000\n",
+      "2021-09-21 21:27:00,878 epoch 7 - iter 16/41 - loss 0.37232463 - samples/sec: 9.12 - lr: 0.010000\n",
+      "2021-09-21 21:27:01,305 epoch 7 - iter 20/41 - loss 0.37842048 - samples/sec: 9.37 - lr: 0.010000\n",
+      "2021-09-21 21:27:01,586 epoch 7 - iter 24/41 - loss 0.32084373 - samples/sec: 14.26 - lr: 0.010000\n",
+      "2021-09-21 21:27:01,841 epoch 7 - iter 28/41 - loss 0.29379381 - samples/sec: 15.77 - lr: 0.010000\n",
+      "2021-09-21 21:27:02,195 epoch 7 - iter 32/41 - loss 0.32278134 - samples/sec: 11.32 - lr: 0.010000\n",
+      "2021-09-21 21:27:02,480 epoch 7 - iter 36/41 - loss 0.32193854 - samples/sec: 14.07 - lr: 0.010000\n",
+      "2021-09-21 21:27:02,936 epoch 7 - iter 40/41 - loss 0.32973127 - samples/sec: 8.77 - lr: 0.010000\n",
+      "2021-09-21 21:27:02,998 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:02,998 EPOCH 7 done: loss 0.3217 - lr 0.0100000\n",
+      "2021-09-21 21:27:03,371 DEV : loss 0.7274895906448364 - score 0.25\n",
+      "2021-09-21 21:27:03,372 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:27:03,374 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:03,615 epoch 8 - iter 4/41 - loss 0.35307456 - samples/sec: 17.76 - lr: 0.010000\n",
+      "2021-09-21 21:27:03,855 epoch 8 - iter 8/41 - loss 0.26312911 - samples/sec: 16.70 - lr: 0.010000\n",
+      "2021-09-21 21:27:04,181 epoch 8 - iter 12/41 - loss 0.21569630 - samples/sec: 12.30 - lr: 0.010000\n",
+      "2021-09-21 21:27:04,419 epoch 8 - iter 16/41 - loss 0.27610520 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 21:27:04,632 epoch 8 - iter 20/41 - loss 0.24419873 - samples/sec: 18.88 - lr: 0.010000\n",
+      "2021-09-21 21:27:04,846 epoch 8 - iter 24/41 - loss 0.21233727 - samples/sec: 18.74 - lr: 0.010000\n",
+      "2021-09-21 21:27:05,166 epoch 8 - iter 28/41 - loss 0.20328623 - samples/sec: 12.51 - lr: 0.010000\n",
+      "2021-09-21 21:27:05,424 epoch 8 - iter 32/41 - loss 0.22886757 - samples/sec: 15.55 - lr: 0.010000\n",
+      "2021-09-21 21:27:05,708 epoch 8 - iter 36/41 - loss 0.22506132 - samples/sec: 14.13 - lr: 0.010000\n",
+      "2021-09-21 21:27:05,973 epoch 8 - iter 40/41 - loss 0.22463668 - samples/sec: 15.13 - lr: 0.010000\n",
+      "2021-09-21 21:27:06,028 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:06,029 EPOCH 8 done: loss 0.2192 - lr 0.0100000\n",
+      "2021-09-21 21:27:06,241 DEV : loss 0.8876001834869385 - score 0.25\n",
+      "2021-09-21 21:27:06,242 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:27:06,333 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:06,577 epoch 9 - iter 4/41 - loss 0.42856743 - samples/sec: 17.56 - lr: 0.010000\n",
+      "2021-09-21 21:27:06,836 epoch 9 - iter 8/41 - loss 0.27519325 - samples/sec: 15.46 - lr: 0.010000\n",
+      "2021-09-21 21:27:07,101 epoch 9 - iter 12/41 - loss 0.20789624 - samples/sec: 15.13 - lr: 0.010000\n",
+      "2021-09-21 21:27:07,418 epoch 9 - iter 16/41 - loss 0.21280420 - samples/sec: 12.65 - lr: 0.010000\n",
+      "2021-09-21 21:27:07,728 epoch 9 - iter 20/41 - loss 0.20581510 - samples/sec: 12.94 - lr: 0.010000\n",
+      "2021-09-21 21:27:08,032 epoch 9 - iter 24/41 - loss 0.22578263 - samples/sec: 13.16 - lr: 0.010000\n",
+      "2021-09-21 21:27:08,318 epoch 9 - iter 28/41 - loss 0.20649971 - samples/sec: 14.04 - lr: 0.010000\n",
+      "2021-09-21 21:27:08,627 epoch 9 - iter 32/41 - loss 0.19429288 - samples/sec: 12.97 - lr: 0.010000\n",
+      "2021-09-21 21:27:08,914 epoch 9 - iter 36/41 - loss 0.18135005 - samples/sec: 13.96 - lr: 0.010000\n",
+      "2021-09-21 21:27:09,350 epoch 9 - iter 40/41 - loss 0.16654923 - samples/sec: 9.19 - lr: 0.010000\n",
+      "2021-09-21 21:27:09,456 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:09,457 EPOCH 9 done: loss 0.1756 - lr 0.0100000\n",
+      "2021-09-21 21:27:09,624 DEV : loss 1.1317461729049683 - score 0.25\n",
+      "2021-09-21 21:27:09,625 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:27:09,627 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:09,939 epoch 10 - iter 4/41 - loss 0.04040472 - samples/sec: 13.56 - lr: 0.010000\n",
+      "2021-09-21 21:27:10,261 epoch 10 - iter 8/41 - loss 0.05655873 - samples/sec: 12.45 - lr: 0.010000\n",
+      "2021-09-21 21:27:10,532 epoch 10 - iter 12/41 - loss 0.14971497 - samples/sec: 14.79 - lr: 0.010000\n",
+      "2021-09-21 21:27:10,809 epoch 10 - iter 16/41 - loss 0.20919727 - samples/sec: 14.47 - lr: 0.010000\n",
+      "2021-09-21 21:27:11,095 epoch 10 - iter 20/41 - loss 0.17027546 - samples/sec: 14.02 - lr: 0.010000\n",
+      "2021-09-21 21:27:11,384 epoch 10 - iter 24/41 - loss 0.20351003 - samples/sec: 13.85 - lr: 0.010000\n",
+      "2021-09-21 21:27:11,866 epoch 10 - iter 28/41 - loss 0.23602504 - samples/sec: 8.31 - lr: 0.010000\n",
+      "2021-09-21 21:27:12,146 epoch 10 - iter 32/41 - loss 0.20987981 - samples/sec: 14.34 - lr: 0.010000\n",
+      "2021-09-21 21:27:12,511 epoch 10 - iter 36/41 - loss 0.20921290 - samples/sec: 10.95 - lr: 0.010000\n",
+      "2021-09-21 21:27:12,781 epoch 10 - iter 40/41 - loss 0.18963456 - samples/sec: 14.88 - lr: 0.010000\n",
+      "2021-09-21 21:27:12,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:12,938 EPOCH 10 done: loss 0.1917 - lr 0.0100000\n",
+      "2021-09-21 21:27:13,098 DEV : loss 1.2318404912948608 - score 0.25\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:27:13,099 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:27:17,134 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:27:17,135 Testing using best model ...\n",
+      "2021-09-21 21:27:17,137 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:53:54,757 \t0.2\n",
-      "2021-09-08 01:53:54,757 \n",
+      "2021-09-21 21:27:22,067 \t0.2\n",
+      "2021-09-21 21:27:22,068 \n",
       "Results:\n",
       "- F-score (micro) 0.2\n",
-      "- F-score (macro) 0.1\n",
+      "- F-score (macro) 0.0667\n",
       "- Accuracy 0.2\n",
       "\n",
       "By class:\n",
       "                                                                      precision    recall  f1-score   support\n",
       "\n",
-      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         2\n",
-      "that which is below standard or expectations as of ethics or decency     0.0000    0.0000    0.0000         1\n",
+      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         1\n",
+      "that which is below standard or expectations as of ethics or decency     0.2000    1.0000    0.3333         1\n",
       "                        that which is pleasing or valuable or useful     0.0000    0.0000    0.0000         1\n",
       "  remarkable or out of the ordinary in degree or magnitude or effect     0.0000    0.0000    0.0000         0\n",
-      "                                    exceptionally bad or displeasing     0.3333    1.0000    0.5000         1\n",
+      "                                    exceptionally bad or displeasing     0.0000    0.0000    0.0000         2\n",
       "\n",
       "                                                           micro avg     0.2000    0.2000    0.2000         5\n",
-      "                                                           macro avg     0.0667    0.2000    0.1000         5\n",
-      "                                                        weighted avg     0.0667    0.2000    0.1000         5\n",
+      "                                                           macro avg     0.0400    0.2000    0.0667         5\n",
+      "                                                        weighted avg     0.0400    0.2000    0.0667         5\n",
       "                                                         samples avg     0.2000    0.2000    0.2000         5\n",
       "\n",
-      "2021-09-08 01:53:54,758 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:47,684 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:27:22,068 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:06,352 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:54:51,825 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:28:10,345 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 18117.94it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 17171.41it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:54:51,829 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 01:54:51,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:51,843 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:28:10,349 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 21:28:10,359 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:10,361 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6638,25 +6649,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:54:51,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:51,844 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:54:51,845 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:51,845 Parameters:\n",
-      "2021-09-08 01:54:51,846  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:54:51,846  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:54:51,847  - patience: \"3\"\n",
-      "2021-09-08 01:54:51,847  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:54:51,848  - max_epochs: \"10\"\n",
-      "2021-09-08 01:54:51,848  - shuffle: \"True\"\n",
-      "2021-09-08 01:54:51,849  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:54:51,849  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:54:51,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:51,850 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:54:51,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:51,851 Device: cuda:0\n",
-      "2021-09-08 01:54:51,852 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:51,852 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:54:51,861 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:28:10,362 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:10,362 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:28:10,362 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:10,363 Parameters:\n",
+      "2021-09-21 21:28:10,363  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:28:10,364  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:28:10,364  - patience: \"3\"\n",
+      "2021-09-21 21:28:10,365  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:28:10,365  - max_epochs: \"10\"\n",
+      "2021-09-21 21:28:10,366  - shuffle: \"True\"\n",
+      "2021-09-21 21:28:10,366  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:28:10,367  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:28:10,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:10,367 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:28:10,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:10,368 Device: cuda:0\n",
+      "2021-09-21 21:28:10,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:10,369 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:28:10,377 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -6670,209 +6681,222 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:54:52,174 epoch 1 - iter 4/41 - loss 0.72307038 - samples/sec: 13.57 - lr: 0.020000\n",
-      "2021-09-08 01:54:52,759 epoch 1 - iter 8/41 - loss 0.63406489 - samples/sec: 6.84 - lr: 0.020000\n",
-      "2021-09-08 01:54:53,008 epoch 1 - iter 12/41 - loss 0.53743511 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:54:53,264 epoch 1 - iter 16/41 - loss 0.42829901 - samples/sec: 15.68 - lr: 0.020000\n",
-      "2021-09-08 01:54:53,561 epoch 1 - iter 20/41 - loss 0.63961373 - samples/sec: 13.52 - lr: 0.020000\n",
-      "2021-09-08 01:54:53,911 epoch 1 - iter 24/41 - loss 0.60852211 - samples/sec: 11.44 - lr: 0.020000\n",
-      "2021-09-08 01:54:54,142 epoch 1 - iter 28/41 - loss 0.57392764 - samples/sec: 17.32 - lr: 0.020000\n",
-      "2021-09-08 01:54:54,366 epoch 1 - iter 32/41 - loss 0.50992782 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:54:54,611 epoch 1 - iter 36/41 - loss 0.56729944 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 01:54:54,962 epoch 1 - iter 40/41 - loss 0.52461759 - samples/sec: 11.41 - lr: 0.020000\n",
-      "2021-09-08 01:54:55,035 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:55,035 EPOCH 1 done: loss 0.5141 - lr 0.0200000\n",
-      "2021-09-08 01:54:55,234 DEV : loss 1.056613564491272 - score 0.25\n",
-      "2021-09-08 01:54:55,235 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:28:10,743 epoch 1 - iter 4/41 - loss 0.72754000 - samples/sec: 11.80 - lr: 0.020000\n",
+      "2021-09-21 21:28:11,076 epoch 1 - iter 8/41 - loss 0.60622861 - samples/sec: 12.06 - lr: 0.020000\n",
+      "2021-09-21 21:28:11,382 epoch 1 - iter 12/41 - loss 0.65558382 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 21:28:11,672 epoch 1 - iter 16/41 - loss 0.52377068 - samples/sec: 13.82 - lr: 0.020000\n",
+      "2021-09-21 21:28:11,969 epoch 1 - iter 20/41 - loss 0.56027935 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 21:28:12,272 epoch 1 - iter 24/41 - loss 0.56306678 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:28:12,543 epoch 1 - iter 28/41 - loss 0.59851283 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:28:12,832 epoch 1 - iter 32/41 - loss 0.63180389 - samples/sec: 13.87 - lr: 0.020000\n",
+      "2021-09-21 21:28:13,178 epoch 1 - iter 36/41 - loss 0.64048483 - samples/sec: 11.57 - lr: 0.020000\n",
+      "2021-09-21 21:28:13,445 epoch 1 - iter 40/41 - loss 0.66087359 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 21:28:13,509 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:13,510 EPOCH 1 done: loss 0.6614 - lr 0.0200000\n",
+      "2021-09-21 21:28:13,631 DEV : loss 0.7176288366317749 - score 0.25\n",
+      "2021-09-21 21:28:13,632 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:55:01,035 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:01,329 epoch 2 - iter 4/41 - loss 0.98763628 - samples/sec: 14.68 - lr: 0.020000\n",
-      "2021-09-08 01:55:01,656 epoch 2 - iter 8/41 - loss 0.93664805 - samples/sec: 12.26 - lr: 0.020000\n",
-      "2021-09-08 01:55:02,004 epoch 2 - iter 12/41 - loss 0.75722024 - samples/sec: 11.51 - lr: 0.020000\n",
-      "2021-09-08 01:55:02,247 epoch 2 - iter 16/41 - loss 0.81531894 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 01:55:02,503 epoch 2 - iter 20/41 - loss 0.72489610 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:55:02,769 epoch 2 - iter 24/41 - loss 0.79520567 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 01:55:03,258 epoch 2 - iter 28/41 - loss 0.80211605 - samples/sec: 8.20 - lr: 0.020000\n",
-      "2021-09-08 01:55:03,514 epoch 2 - iter 32/41 - loss 0.77815222 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:55:03,901 epoch 2 - iter 36/41 - loss 0.77009282 - samples/sec: 10.36 - lr: 0.020000\n",
-      "2021-09-08 01:55:04,176 epoch 2 - iter 40/41 - loss 0.74951026 - samples/sec: 14.55 - lr: 0.020000\n",
-      "2021-09-08 01:55:04,225 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:04,226 EPOCH 2 done: loss 0.7316 - lr 0.0200000\n",
-      "2021-09-08 01:55:04,552 DEV : loss 0.6288882493972778 - score 0.0\n",
-      "2021-09-08 01:55:04,553 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:55:04,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:04,796 epoch 3 - iter 4/41 - loss 0.25303496 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 01:55:05,010 epoch 3 - iter 8/41 - loss 0.17049237 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 01:55:05,317 epoch 3 - iter 12/41 - loss 0.32828081 - samples/sec: 13.05 - lr: 0.020000\n",
-      "2021-09-08 01:55:05,649 epoch 3 - iter 16/41 - loss 0.40623459 - samples/sec: 12.07 - lr: 0.020000\n",
-      "2021-09-08 01:55:05,959 epoch 3 - iter 20/41 - loss 0.42621967 - samples/sec: 12.92 - lr: 0.020000\n",
-      "2021-09-08 01:55:06,200 epoch 3 - iter 24/41 - loss 0.43719456 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 01:55:06,543 epoch 3 - iter 28/41 - loss 0.43792837 - samples/sec: 11.70 - lr: 0.020000\n",
-      "2021-09-08 01:55:06,996 epoch 3 - iter 32/41 - loss 0.48683291 - samples/sec: 8.85 - lr: 0.020000\n",
-      "2021-09-08 01:55:07,308 epoch 3 - iter 36/41 - loss 0.51061238 - samples/sec: 12.84 - lr: 0.020000\n",
-      "2021-09-08 01:55:07,530 epoch 3 - iter 40/41 - loss 0.53287708 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 01:55:07,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:07,589 EPOCH 3 done: loss 0.5349 - lr 0.0200000\n",
-      "2021-09-08 01:55:07,812 DEV : loss 0.5699238777160645 - score 0.0\n",
-      "2021-09-08 01:55:07,813 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:55:07,815 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:08,151 epoch 4 - iter 4/41 - loss 0.69713192 - samples/sec: 14.07 - lr: 0.020000\n",
-      "2021-09-08 01:55:08,404 epoch 4 - iter 8/41 - loss 0.74939398 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 01:55:08,647 epoch 4 - iter 12/41 - loss 0.74418873 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 01:55:08,926 epoch 4 - iter 16/41 - loss 0.72989903 - samples/sec: 14.35 - lr: 0.020000\n",
-      "2021-09-08 01:55:09,435 epoch 4 - iter 20/41 - loss 0.70981232 - samples/sec: 7.87 - lr: 0.020000\n",
-      "2021-09-08 01:55:09,756 epoch 4 - iter 24/41 - loss 0.69190463 - samples/sec: 12.50 - lr: 0.020000\n",
-      "2021-09-08 01:55:10,030 epoch 4 - iter 28/41 - loss 0.68696500 - samples/sec: 14.61 - lr: 0.020000\n",
-      "2021-09-08 01:55:10,384 epoch 4 - iter 32/41 - loss 0.69032995 - samples/sec: 11.32 - lr: 0.020000\n",
-      "2021-09-08 01:55:10,671 epoch 4 - iter 36/41 - loss 0.68729077 - samples/sec: 14.00 - lr: 0.020000\n",
-      "2021-09-08 01:55:10,949 epoch 4 - iter 40/41 - loss 0.68726357 - samples/sec: 14.42 - lr: 0.020000\n",
-      "2021-09-08 01:55:10,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:10,997 EPOCH 4 done: loss 0.6870 - lr 0.0200000\n",
-      "2021-09-08 01:55:11,224 DEV : loss 0.5568382740020752 - score 0.5\n",
-      "2021-09-08 01:55:11,225 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:28:17,813 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:18,106 epoch 2 - iter 4/41 - loss 0.76397562 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:28:18,365 epoch 2 - iter 8/41 - loss 0.73028579 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 21:28:18,657 epoch 2 - iter 12/41 - loss 0.75011708 - samples/sec: 13.74 - lr: 0.020000\n",
+      "2021-09-21 21:28:18,941 epoch 2 - iter 16/41 - loss 0.72304532 - samples/sec: 14.12 - lr: 0.020000\n",
+      "2021-09-21 21:28:19,212 epoch 2 - iter 20/41 - loss 0.74484907 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 21:28:19,538 epoch 2 - iter 24/41 - loss 0.73503221 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 21:28:19,849 epoch 2 - iter 28/41 - loss 0.72073503 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 21:28:20,168 epoch 2 - iter 32/41 - loss 0.71256536 - samples/sec: 12.57 - lr: 0.020000\n",
+      "2021-09-21 21:28:20,445 epoch 2 - iter 36/41 - loss 0.70575796 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 21:28:20,727 epoch 2 - iter 40/41 - loss 0.69991757 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 21:28:20,792 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:20,793 EPOCH 2 done: loss 0.7050 - lr 0.0200000\n",
+      "2021-09-21 21:28:20,915 DEV : loss 0.6140561103820801 - score 0.0\n",
+      "2021-09-21 21:28:20,916 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:28:20,918 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:21,258 epoch 3 - iter 4/41 - loss 0.61503689 - samples/sec: 12.56 - lr: 0.020000\n",
+      "2021-09-21 21:28:21,514 epoch 3 - iter 8/41 - loss 0.62869778 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:28:21,793 epoch 3 - iter 12/41 - loss 0.65114458 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 21:28:22,067 epoch 3 - iter 16/41 - loss 0.65288636 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 21:28:22,315 epoch 3 - iter 20/41 - loss 0.64848439 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 21:28:22,574 epoch 3 - iter 24/41 - loss 0.65178440 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 21:28:22,867 epoch 3 - iter 28/41 - loss 0.65052789 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 21:28:23,130 epoch 3 - iter 32/41 - loss 0.65261689 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:28:23,463 epoch 3 - iter 36/41 - loss 0.65319238 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 21:28:23,758 epoch 3 - iter 40/41 - loss 0.65147356 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 21:28:23,820 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:23,821 EPOCH 3 done: loss 0.6517 - lr 0.0200000\n",
+      "2021-09-21 21:28:23,939 DEV : loss 0.578702449798584 - score 0.25\n",
+      "2021-09-21 21:28:23,940 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:55:15,244 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:15,564 epoch 5 - iter 4/41 - loss 0.63184483 - samples/sec: 15.67 - lr: 0.020000\n",
-      "2021-09-08 01:55:15,810 epoch 5 - iter 8/41 - loss 0.63313121 - samples/sec: 16.35 - lr: 0.020000\n",
-      "2021-09-08 01:55:16,073 epoch 5 - iter 12/41 - loss 0.62167774 - samples/sec: 15.23 - lr: 0.020000\n",
-      "2021-09-08 01:55:16,350 epoch 5 - iter 16/41 - loss 0.63575307 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 01:55:16,627 epoch 5 - iter 20/41 - loss 0.65796344 - samples/sec: 14.43 - lr: 0.020000\n",
-      "2021-09-08 01:55:16,942 epoch 5 - iter 24/41 - loss 0.65942646 - samples/sec: 12.72 - lr: 0.020000\n",
-      "2021-09-08 01:55:17,165 epoch 5 - iter 28/41 - loss 0.65596107 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 01:55:17,579 epoch 5 - iter 32/41 - loss 0.65343459 - samples/sec: 9.67 - lr: 0.020000\n",
-      "2021-09-08 01:55:17,845 epoch 5 - iter 36/41 - loss 0.64941159 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:55:18,248 epoch 5 - iter 40/41 - loss 0.64898629 - samples/sec: 9.94 - lr: 0.020000\n",
-      "2021-09-08 01:55:18,312 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:18,313 EPOCH 5 done: loss 0.6492 - lr 0.0200000\n",
-      "2021-09-08 01:55:18,503 DEV : loss 0.5394191741943359 - score 0.0\n",
-      "2021-09-08 01:55:18,504 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:55:18,506 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:18,767 epoch 6 - iter 4/41 - loss 0.62412108 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:55:19,145 epoch 6 - iter 8/41 - loss 0.61907699 - samples/sec: 10.60 - lr: 0.020000\n",
-      "2021-09-08 01:55:19,378 epoch 6 - iter 12/41 - loss 0.63306943 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 01:55:19,604 epoch 6 - iter 16/41 - loss 0.63309214 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:55:19,877 epoch 6 - iter 20/41 - loss 0.62662952 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 01:55:20,249 epoch 6 - iter 24/41 - loss 0.62285039 - samples/sec: 10.79 - lr: 0.020000\n",
-      "2021-09-08 01:55:20,491 epoch 6 - iter 28/41 - loss 0.62980385 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 01:55:20,727 epoch 6 - iter 32/41 - loss 0.63473950 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 01:55:20,995 epoch 6 - iter 36/41 - loss 0.63910305 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 01:55:21,486 epoch 6 - iter 40/41 - loss 0.63894585 - samples/sec: 8.16 - lr: 0.020000\n",
-      "2021-09-08 01:55:21,635 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:28:28,067 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:28,348 epoch 4 - iter 4/41 - loss 0.65051688 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 21:28:28,575 epoch 4 - iter 8/41 - loss 0.64951831 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 21:28:29,010 epoch 4 - iter 12/41 - loss 0.64210722 - samples/sec: 9.20 - lr: 0.020000\n",
+      "2021-09-21 21:28:29,338 epoch 4 - iter 16/41 - loss 0.64139479 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:28:29,591 epoch 4 - iter 20/41 - loss 0.64270718 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 21:28:29,949 epoch 4 - iter 24/41 - loss 0.63939555 - samples/sec: 11.18 - lr: 0.020000\n",
+      "2021-09-21 21:28:30,175 epoch 4 - iter 28/41 - loss 0.64424431 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 21:28:30,439 epoch 4 - iter 32/41 - loss 0.64411063 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:28:30,678 epoch 4 - iter 36/41 - loss 0.64691341 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:28:30,949 epoch 4 - iter 40/41 - loss 0.65278751 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 21:28:31,045 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:31,046 EPOCH 4 done: loss 0.6527 - lr 0.0200000\n",
+      "2021-09-21 21:28:31,163 DEV : loss 0.5720711946487427 - score 0.0\n",
+      "2021-09-21 21:28:31,164 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:28:31,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:31,410 epoch 5 - iter 4/41 - loss 0.64705276 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:28:31,654 epoch 5 - iter 8/41 - loss 0.63108792 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 21:28:31,885 epoch 5 - iter 12/41 - loss 0.63262435 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 21:28:32,183 epoch 5 - iter 16/41 - loss 0.63984121 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 21:28:32,484 epoch 5 - iter 20/41 - loss 0.64355032 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 21:28:32,811 epoch 5 - iter 24/41 - loss 0.63816607 - samples/sec: 12.27 - lr: 0.020000\n",
+      "2021-09-21 21:28:33,072 epoch 5 - iter 28/41 - loss 0.63842149 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 21:28:33,328 epoch 5 - iter 32/41 - loss 0.64024405 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:28:33,680 epoch 5 - iter 36/41 - loss 0.64013845 - samples/sec: 11.37 - lr: 0.020000\n",
+      "2021-09-21 21:28:33,951 epoch 5 - iter 40/41 - loss 0.63891352 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 21:28:34,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:34,007 EPOCH 5 done: loss 0.6380 - lr 0.0200000\n",
+      "2021-09-21 21:28:34,130 DEV : loss 0.5442049503326416 - score 0.0\n",
+      "2021-09-21 21:28:34,131 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:28:34,133 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:34,501 epoch 6 - iter 4/41 - loss 0.66202968 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 21:28:34,753 epoch 6 - iter 8/41 - loss 0.65116294 - samples/sec: 15.90 - lr: 0.020000\n",
+      "2021-09-21 21:28:35,022 epoch 6 - iter 12/41 - loss 0.64224044 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 21:28:35,374 epoch 6 - iter 16/41 - loss 0.64053584 - samples/sec: 11.37 - lr: 0.020000\n",
+      "2021-09-21 21:28:35,682 epoch 6 - iter 20/41 - loss 0.64291253 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 21:28:35,916 epoch 6 - iter 24/41 - loss 0.64082145 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:28:36,163 epoch 6 - iter 28/41 - loss 0.63837250 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:28:36,414 epoch 6 - iter 32/41 - loss 0.63950212 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:28:36,660 epoch 6 - iter 36/41 - loss 0.63873620 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 21:28:36,941 epoch 6 - iter 40/41 - loss 0.63788757 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 21:28:36,997 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:55:21,635 EPOCH 6 done: loss 0.6382 - lr 0.0200000\n",
-      "2021-09-08 01:55:21,831 DEV : loss 0.5251227617263794 - score 0.0\n",
-      "2021-09-08 01:55:21,832 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:55:21,834 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:22,261 epoch 7 - iter 4/41 - loss 0.64245154 - samples/sec: 10.03 - lr: 0.020000\n",
-      "2021-09-08 01:55:22,506 epoch 7 - iter 8/41 - loss 0.61809961 - samples/sec: 16.35 - lr: 0.020000\n",
-      "2021-09-08 01:55:22,906 epoch 7 - iter 12/41 - loss 0.63403795 - samples/sec: 10.01 - lr: 0.020000\n",
-      "2021-09-08 01:55:23,138 epoch 7 - iter 16/41 - loss 0.65474298 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 01:55:23,336 epoch 7 - iter 20/41 - loss 0.65317788 - samples/sec: 20.27 - lr: 0.020000\n",
-      "2021-09-08 01:55:23,594 epoch 7 - iter 24/41 - loss 0.65623183 - samples/sec: 15.50 - lr: 0.020000\n",
-      "2021-09-08 01:55:23,805 epoch 7 - iter 28/41 - loss 0.65435722 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:55:24,091 epoch 7 - iter 32/41 - loss 0.65321433 - samples/sec: 14.00 - lr: 0.020000\n",
-      "2021-09-08 01:55:24,286 epoch 7 - iter 36/41 - loss 0.65227508 - samples/sec: 20.60 - lr: 0.020000\n",
-      "2021-09-08 01:55:24,553 epoch 7 - iter 40/41 - loss 0.64880063 - samples/sec: 15.00 - lr: 0.020000\n",
-      "2021-09-08 01:55:24,598 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:24,599 EPOCH 7 done: loss 0.6491 - lr 0.0200000\n",
-      "2021-09-08 01:55:24,863 DEV : loss 0.5959142446517944 - score 0.25\n",
-      "2021-09-08 01:55:24,864 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:55:24,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:25,211 epoch 8 - iter 4/41 - loss 0.63020083 - samples/sec: 16.29 - lr: 0.020000\n",
-      "2021-09-08 01:55:25,396 epoch 8 - iter 8/41 - loss 0.63586581 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 01:55:25,669 epoch 8 - iter 12/41 - loss 0.64439894 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 01:55:25,883 epoch 8 - iter 16/41 - loss 0.63577117 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:55:26,214 epoch 8 - iter 20/41 - loss 0.63631421 - samples/sec: 12.13 - lr: 0.020000\n",
-      "2021-09-08 01:55:26,453 epoch 8 - iter 24/41 - loss 0.63945555 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 01:55:26,819 epoch 8 - iter 28/41 - loss 0.64737237 - samples/sec: 10.94 - lr: 0.020000\n",
-      "2021-09-08 01:55:27,013 epoch 8 - iter 32/41 - loss 0.64685818 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 01:55:27,252 epoch 8 - iter 36/41 - loss 0.64809937 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:55:27,499 epoch 8 - iter 40/41 - loss 0.65978742 - samples/sec: 16.19 - lr: 0.020000\n",
-      "2021-09-08 01:55:27,545 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:27,546 EPOCH 8 done: loss 0.6599 - lr 0.0200000\n",
-      "2021-09-08 01:55:27,968 DEV : loss 0.615449070930481 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:55:27,970 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:55:28,082 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:28,304 epoch 9 - iter 4/41 - loss 0.67721930 - samples/sec: 20.52 - lr: 0.010000\n",
-      "2021-09-08 01:55:28,563 epoch 9 - iter 8/41 - loss 0.66544258 - samples/sec: 15.49 - lr: 0.010000\n",
-      "2021-09-08 01:55:28,851 epoch 9 - iter 12/41 - loss 0.65228881 - samples/sec: 13.93 - lr: 0.010000\n",
-      "2021-09-08 01:55:29,117 epoch 9 - iter 16/41 - loss 0.64842176 - samples/sec: 15.09 - lr: 0.010000\n",
-      "2021-09-08 01:55:29,333 epoch 9 - iter 20/41 - loss 0.65071719 - samples/sec: 18.56 - lr: 0.010000\n",
-      "2021-09-08 01:55:29,586 epoch 9 - iter 24/41 - loss 0.64286833 - samples/sec: 15.86 - lr: 0.010000\n",
-      "2021-09-08 01:55:29,988 epoch 9 - iter 28/41 - loss 0.64670246 - samples/sec: 9.95 - lr: 0.010000\n",
-      "2021-09-08 01:55:30,179 epoch 9 - iter 32/41 - loss 0.64789671 - samples/sec: 21.05 - lr: 0.010000\n",
-      "2021-09-08 01:55:30,409 epoch 9 - iter 36/41 - loss 0.64639836 - samples/sec: 17.43 - lr: 0.010000\n",
-      "2021-09-08 01:55:30,612 epoch 9 - iter 40/41 - loss 0.64469726 - samples/sec: 19.79 - lr: 0.010000\n",
-      "2021-09-08 01:55:30,661 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:30,662 EPOCH 9 done: loss 0.6446 - lr 0.0100000\n",
-      "2021-09-08 01:55:30,895 DEV : loss 0.5448101758956909 - score 0.0\n",
-      "2021-09-08 01:55:30,896 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:55:30,904 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:31,202 epoch 10 - iter 4/41 - loss 0.62400548 - samples/sec: 14.15 - lr: 0.010000\n",
-      "2021-09-08 01:55:31,385 epoch 10 - iter 8/41 - loss 0.62838240 - samples/sec: 21.91 - lr: 0.010000\n",
-      "2021-09-08 01:55:31,637 epoch 10 - iter 12/41 - loss 0.63555119 - samples/sec: 15.90 - lr: 0.010000\n",
-      "2021-09-08 01:55:31,869 epoch 10 - iter 16/41 - loss 0.63297627 - samples/sec: 17.31 - lr: 0.010000\n",
-      "2021-09-08 01:55:32,070 epoch 10 - iter 20/41 - loss 0.63545255 - samples/sec: 19.89 - lr: 0.010000\n",
-      "2021-09-08 01:55:32,383 epoch 10 - iter 24/41 - loss 0.63494912 - samples/sec: 12.84 - lr: 0.010000\n",
-      "2021-09-08 01:55:32,579 epoch 10 - iter 28/41 - loss 0.63615547 - samples/sec: 20.45 - lr: 0.010000\n",
-      "2021-09-08 01:55:32,872 epoch 10 - iter 32/41 - loss 0.63688622 - samples/sec: 13.66 - lr: 0.010000\n",
-      "2021-09-08 01:55:33,120 epoch 10 - iter 36/41 - loss 0.64034186 - samples/sec: 16.17 - lr: 0.010000\n",
-      "2021-09-08 01:55:33,485 epoch 10 - iter 40/41 - loss 0.64172006 - samples/sec: 10.99 - lr: 0.010000\n",
-      "2021-09-08 01:55:33,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:33,591 EPOCH 10 done: loss 0.6414 - lr 0.0100000\n",
-      "2021-09-08 01:55:33,865 DEV : loss 0.5315890312194824 - score 0.0\n",
-      "2021-09-08 01:55:33,865 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:55:37,606 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:37,607 Testing using best model ...\n",
-      "2021-09-08 01:55:37,608 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:28:36,998 EPOCH 6 done: loss 0.6367 - lr 0.0200000\n",
+      "2021-09-21 21:28:37,109 DEV : loss 0.5499242544174194 - score 0.0\n",
+      "2021-09-21 21:28:37,109 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:28:37,111 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:37,430 epoch 7 - iter 4/41 - loss 0.64249462 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 21:28:37,695 epoch 7 - iter 8/41 - loss 0.63942219 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 21:28:37,975 epoch 7 - iter 12/41 - loss 0.63604848 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 21:28:38,262 epoch 7 - iter 16/41 - loss 0.63911255 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 21:28:38,520 epoch 7 - iter 20/41 - loss 0.63548270 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 21:28:38,765 epoch 7 - iter 24/41 - loss 0.63257615 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 21:28:39,060 epoch 7 - iter 28/41 - loss 0.63611621 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 21:28:39,290 epoch 7 - iter 32/41 - loss 0.63810389 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 21:28:39,594 epoch 7 - iter 36/41 - loss 0.64177338 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 21:28:39,869 epoch 7 - iter 40/41 - loss 0.64080876 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:28:39,931 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:39,931 EPOCH 7 done: loss 0.6393 - lr 0.0200000\n",
+      "2021-09-21 21:28:40,052 DEV : loss 0.5444392561912537 - score 0.0\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:28:40,052 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:28:40,054 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:40,359 epoch 8 - iter 4/41 - loss 0.65443192 - samples/sec: 14.12 - lr: 0.010000\n",
+      "2021-09-21 21:28:40,622 epoch 8 - iter 8/41 - loss 0.64717030 - samples/sec: 15.23 - lr: 0.010000\n",
+      "2021-09-21 21:28:40,890 epoch 8 - iter 12/41 - loss 0.64961983 - samples/sec: 14.97 - lr: 0.010000\n",
+      "2021-09-21 21:28:41,135 epoch 8 - iter 16/41 - loss 0.65275276 - samples/sec: 16.36 - lr: 0.010000\n",
+      "2021-09-21 21:28:41,381 epoch 8 - iter 20/41 - loss 0.64514080 - samples/sec: 16.29 - lr: 0.010000\n",
+      "2021-09-21 21:28:41,633 epoch 8 - iter 24/41 - loss 0.64545987 - samples/sec: 15.89 - lr: 0.010000\n",
+      "2021-09-21 21:28:41,932 epoch 8 - iter 28/41 - loss 0.64426961 - samples/sec: 13.40 - lr: 0.010000\n",
+      "2021-09-21 21:28:42,171 epoch 8 - iter 32/41 - loss 0.64132482 - samples/sec: 16.83 - lr: 0.010000\n",
+      "2021-09-21 21:28:42,475 epoch 8 - iter 36/41 - loss 0.64046640 - samples/sec: 13.18 - lr: 0.010000\n",
+      "2021-09-21 21:28:42,741 epoch 8 - iter 40/41 - loss 0.63846673 - samples/sec: 15.04 - lr: 0.010000\n",
+      "2021-09-21 21:28:42,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:42,875 EPOCH 8 done: loss 0.6385 - lr 0.0100000\n",
+      "2021-09-21 21:28:42,981 DEV : loss 0.5274798274040222 - score 0.0\n",
+      "2021-09-21 21:28:42,982 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:28:42,984 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:43,305 epoch 9 - iter 4/41 - loss 0.62871531 - samples/sec: 13.37 - lr: 0.010000\n",
+      "2021-09-21 21:28:43,574 epoch 9 - iter 8/41 - loss 0.61184968 - samples/sec: 14.91 - lr: 0.010000\n",
+      "2021-09-21 21:28:43,881 epoch 9 - iter 12/41 - loss 0.61388295 - samples/sec: 13.04 - lr: 0.010000\n",
+      "2021-09-21 21:28:44,116 epoch 9 - iter 16/41 - loss 0.61807520 - samples/sec: 17.07 - lr: 0.010000\n",
+      "2021-09-21 21:28:44,479 epoch 9 - iter 20/41 - loss 0.61775107 - samples/sec: 11.03 - lr: 0.010000\n",
+      "2021-09-21 21:28:44,741 epoch 9 - iter 24/41 - loss 0.61954719 - samples/sec: 15.32 - lr: 0.010000\n",
+      "2021-09-21 21:28:44,981 epoch 9 - iter 28/41 - loss 0.62168325 - samples/sec: 16.67 - lr: 0.010000\n",
+      "2021-09-21 21:28:45,324 epoch 9 - iter 32/41 - loss 0.61814569 - samples/sec: 11.71 - lr: 0.010000\n",
+      "2021-09-21 21:28:45,565 epoch 9 - iter 36/41 - loss 0.61999823 - samples/sec: 16.57 - lr: 0.010000\n",
+      "2021-09-21 21:28:45,811 epoch 9 - iter 40/41 - loss 0.62231828 - samples/sec: 16.35 - lr: 0.010000\n",
+      "2021-09-21 21:28:45,866 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:45,866 EPOCH 9 done: loss 0.6230 - lr 0.0100000\n",
+      "2021-09-21 21:28:45,988 DEV : loss 0.5417440533638 - score 0.0\n",
+      "2021-09-21 21:28:45,989 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:28:45,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:46,254 epoch 10 - iter 4/41 - loss 0.68596107 - samples/sec: 17.02 - lr: 0.010000\n",
+      "2021-09-21 21:28:46,505 epoch 10 - iter 8/41 - loss 0.64445565 - samples/sec: 15.96 - lr: 0.010000\n",
+      "2021-09-21 21:28:46,758 epoch 10 - iter 12/41 - loss 0.65005577 - samples/sec: 15.84 - lr: 0.010000\n",
+      "2021-09-21 21:28:47,121 epoch 10 - iter 16/41 - loss 0.64979249 - samples/sec: 11.05 - lr: 0.010000\n",
+      "2021-09-21 21:28:47,392 epoch 10 - iter 20/41 - loss 0.64420977 - samples/sec: 14.78 - lr: 0.010000\n",
+      "2021-09-21 21:28:47,643 epoch 10 - iter 24/41 - loss 0.64931408 - samples/sec: 15.97 - lr: 0.010000\n",
+      "2021-09-21 21:28:47,987 epoch 10 - iter 28/41 - loss 0.64680358 - samples/sec: 11.63 - lr: 0.010000\n",
+      "2021-09-21 21:28:48,233 epoch 10 - iter 32/41 - loss 0.64963330 - samples/sec: 16.30 - lr: 0.010000\n",
+      "2021-09-21 21:28:48,482 epoch 10 - iter 36/41 - loss 0.65216000 - samples/sec: 16.15 - lr: 0.010000\n",
+      "2021-09-21 21:28:48,769 epoch 10 - iter 40/41 - loss 0.64908047 - samples/sec: 13.93 - lr: 0.010000\n",
+      "2021-09-21 21:28:48,823 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:48,824 EPOCH 10 done: loss 0.6479 - lr 0.0100000\n",
+      "2021-09-21 21:28:48,943 DEV : loss 0.5193225145339966 - score 0.0\n",
+      "2021-09-21 21:28:48,944 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:28:52,895 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:28:52,895 Testing using best model ...\n",
+      "2021-09-21 21:28:52,897 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:55:42,952 \t0.0\n",
-      "2021-09-08 01:55:42,952 \n",
+      "2021-09-21 21:28:57,654 \t0.2\n",
+      "2021-09-21 21:28:57,654 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.2\n",
+      "- F-score (macro) 0.08\n",
+      "- Accuracy 0.2\n",
       "\n",
       "By class:\n",
       "                                                                      precision    recall  f1-score   support\n",
       "\n",
-      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         1\n",
-      "that which is below standard or expectations as of ethics or decency     0.0000    0.0000    0.0000         0\n",
-      "                        that which is pleasing or valuable or useful     0.0000    0.0000    0.0000         3\n",
-      "  remarkable or out of the ordinary in degree or magnitude or effect     0.0000    0.0000    0.0000         0\n",
+      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         0\n",
+      "that which is below standard or expectations as of ethics or decency     0.2500    1.0000    0.4000         1\n",
+      "                        that which is pleasing or valuable or useful     0.0000    0.0000    0.0000         0\n",
+      "  remarkable or out of the ordinary in degree or magnitude or effect     0.0000    0.0000    0.0000         3\n",
       "                                    exceptionally bad or displeasing     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                           micro avg     0.0000    0.0000    0.0000         5\n",
-      "                                                           macro avg     0.0000    0.0000    0.0000         5\n",
-      "                                                        weighted avg     0.0000    0.0000    0.0000         5\n",
-      "                                                         samples avg     0.0000    0.0000    0.0000         5\n",
+      "                                                           micro avg     0.2000    0.2000    0.2000         5\n",
+      "                                                           macro avg     0.0500    0.2000    0.0800         5\n",
+      "                                                        weighted avg     0.0500    0.2000    0.0800         5\n",
+      "                                                         samples avg     0.2000    0.2000    0.2000         5\n",
       "\n",
-      "2021-09-08 01:55:42,953 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:37,288 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:28:57,655 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:45,811 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:56:41,433 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:29:49,984 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 19733.86it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 18623.36it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:56:41,437 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 01:56:41,457 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:41,459 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:29:49,988 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:29:50,425 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,427 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7185,241 +7209,237 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:56:41,460 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:41,460 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:56:41,460 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:41,461 Parameters:\n",
-      "2021-09-08 01:56:41,461  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:56:41,461  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:56:41,462  - patience: \"3\"\n",
-      "2021-09-08 01:56:41,462  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:56:41,462  - max_epochs: \"10\"\n",
-      "2021-09-08 01:56:41,462  - shuffle: \"True\"\n",
-      "2021-09-08 01:56:41,463  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:56:41,463  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:56:41,463 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:41,464 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:56:41,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:41,464 Device: cuda:0\n",
-      "2021-09-08 01:56:41,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:41,465 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:56:41,472 ----------------------------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 01:56:41,789 epoch 1 - iter 4/41 - loss 0.73066386 - samples/sec: 13.51 - lr: 0.020000\n",
-      "2021-09-08 01:56:42,046 epoch 1 - iter 8/41 - loss 0.67893337 - samples/sec: 15.59 - lr: 0.020000\n",
-      "2021-09-08 01:56:42,376 epoch 1 - iter 12/41 - loss 0.70443038 - samples/sec: 12.12 - lr: 0.020000\n",
-      "2021-09-08 01:56:42,757 epoch 1 - iter 16/41 - loss 0.67245325 - samples/sec: 10.52 - lr: 0.020000\n",
-      "2021-09-08 01:56:43,153 epoch 1 - iter 20/41 - loss 0.69018604 - samples/sec: 10.12 - lr: 0.020000\n",
-      "2021-09-08 01:56:43,415 epoch 1 - iter 24/41 - loss 0.64014041 - samples/sec: 15.30 - lr: 0.020000\n",
-      "2021-09-08 01:56:43,750 epoch 1 - iter 28/41 - loss 0.59757002 - samples/sec: 11.95 - lr: 0.020000\n",
-      "2021-09-08 01:56:44,026 epoch 1 - iter 32/41 - loss 0.56554192 - samples/sec: 14.52 - lr: 0.020000\n",
-      "2021-09-08 01:56:44,272 epoch 1 - iter 36/41 - loss 0.61879322 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:56:44,622 epoch 1 - iter 40/41 - loss 0.58762524 - samples/sec: 11.44 - lr: 0.020000\n",
-      "2021-09-08 01:56:44,742 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:44,743 EPOCH 1 done: loss 0.5797 - lr 0.0200000\n",
-      "2021-09-08 01:56:44,942 DEV : loss 0.8123684525489807 - score 0.25\n",
-      "2021-09-08 01:56:44,943 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:29:50,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,428 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:29:50,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,429 Parameters:\n",
+      "2021-09-21 21:29:50,429  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:29:50,429  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:29:50,429  - patience: \"3\"\n",
+      "2021-09-21 21:29:50,430  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:29:50,430  - max_epochs: \"10\"\n",
+      "2021-09-21 21:29:50,430  - shuffle: \"True\"\n",
+      "2021-09-21 21:29:50,431  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:29:50,431  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:29:50,431 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,431 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:29:50,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,432 Device: cuda:0\n",
+      "2021-09-21 21:29:50,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,433 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:29:50,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:50,751 epoch 1 - iter 4/41 - loss 0.58452826 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 21:29:50,989 epoch 1 - iter 8/41 - loss 0.58688725 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 21:29:51,223 epoch 1 - iter 12/41 - loss 0.66733133 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 21:29:51,453 epoch 1 - iter 16/41 - loss 0.61876098 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:29:51,780 epoch 1 - iter 20/41 - loss 0.64648379 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 21:29:52,054 epoch 1 - iter 24/41 - loss 0.63660058 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 21:29:52,283 epoch 1 - iter 28/41 - loss 0.59018870 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 21:29:52,506 epoch 1 - iter 32/41 - loss 0.54881162 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 21:29:52,737 epoch 1 - iter 36/41 - loss 0.54546782 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 21:29:53,054 epoch 1 - iter 40/41 - loss 0.53208568 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 21:29:53,112 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:53,112 EPOCH 1 done: loss 0.5299 - lr 0.0200000\n",
+      "2021-09-21 21:29:53,206 DEV : loss 1.7230756282806396 - score 0.0\n",
+      "2021-09-21 21:29:53,206 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:29:57,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:29:57,642 epoch 2 - iter 4/41 - loss 0.28555519 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 21:29:57,930 epoch 2 - iter 8/41 - loss 0.68151611 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 21:29:58,228 epoch 2 - iter 12/41 - loss 0.63782882 - samples/sec: 13.48 - lr: 0.020000\n",
+      "2021-09-21 21:29:58,545 epoch 2 - iter 16/41 - loss 0.58416297 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 21:29:58,999 epoch 2 - iter 20/41 - loss 0.70734028 - samples/sec: 8.82 - lr: 0.020000\n",
+      "2021-09-21 21:29:59,294 epoch 2 - iter 24/41 - loss 0.67929779 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 21:29:59,686 epoch 2 - iter 28/41 - loss 0.63940989 - samples/sec: 10.24 - lr: 0.020000\n",
+      "2021-09-21 21:29:59,935 epoch 2 - iter 32/41 - loss 0.62218156 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 21:30:00,242 epoch 2 - iter 36/41 - loss 0.62556478 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 21:30:00,501 epoch 2 - iter 40/41 - loss 0.59784777 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:30:00,569 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:00,569 EPOCH 2 done: loss 0.6048 - lr 0.0200000\n",
+      "2021-09-21 21:30:00,694 DEV : loss 0.6385737061500549 - score 0.0\n",
+      "2021-09-21 21:30:00,695 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:30:09,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:10,251 epoch 3 - iter 4/41 - loss 0.81969530 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:30:10,532 epoch 3 - iter 8/41 - loss 0.75907551 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 21:30:10,901 epoch 3 - iter 12/41 - loss 0.60357145 - samples/sec: 10.88 - lr: 0.020000\n",
+      "2021-09-21 21:30:11,188 epoch 3 - iter 16/41 - loss 0.64712888 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 21:30:11,572 epoch 3 - iter 20/41 - loss 0.60168272 - samples/sec: 10.41 - lr: 0.020000\n",
+      "2021-09-21 21:30:11,858 epoch 3 - iter 24/41 - loss 0.58011927 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:30:12,143 epoch 3 - iter 28/41 - loss 0.59043068 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 21:30:12,414 epoch 3 - iter 32/41 - loss 0.55967583 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:30:12,818 epoch 3 - iter 36/41 - loss 0.53369910 - samples/sec: 9.93 - lr: 0.020000\n",
+      "2021-09-21 21:30:13,115 epoch 3 - iter 40/41 - loss 0.57142427 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:30:13,216 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:13,216 EPOCH 3 done: loss 0.5664 - lr 0.0200000\n",
+      "2021-09-21 21:30:13,350 DEV : loss 0.4462518095970154 - score 0.0\n",
+      "2021-09-21 21:30:13,353 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:56:48,854 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:49,133 epoch 2 - iter 4/41 - loss 1.00035807 - samples/sec: 15.40 - lr: 0.020000\n",
-      "2021-09-08 01:56:49,644 epoch 2 - iter 8/41 - loss 1.02177642 - samples/sec: 7.85 - lr: 0.020000\n",
-      "2021-09-08 01:56:49,896 epoch 2 - iter 12/41 - loss 0.92424255 - samples/sec: 15.90 - lr: 0.020000\n",
-      "2021-09-08 01:56:50,223 epoch 2 - iter 16/41 - loss 0.82229880 - samples/sec: 12.26 - lr: 0.020000\n",
-      "2021-09-08 01:56:50,677 epoch 2 - iter 20/41 - loss 0.78617233 - samples/sec: 8.82 - lr: 0.020000\n",
-      "2021-09-08 01:56:50,935 epoch 2 - iter 24/41 - loss 0.71017276 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 01:56:51,185 epoch 2 - iter 28/41 - loss 0.68822953 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 01:56:51,466 epoch 2 - iter 32/41 - loss 0.68941988 - samples/sec: 14.27 - lr: 0.020000\n",
-      "2021-09-08 01:56:51,821 epoch 2 - iter 36/41 - loss 0.65843250 - samples/sec: 11.29 - lr: 0.020000\n",
-      "2021-09-08 01:56:52,109 epoch 2 - iter 40/41 - loss 0.63698130 - samples/sec: 13.91 - lr: 0.020000\n",
-      "2021-09-08 01:56:52,193 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,193 EPOCH 2 done: loss 0.6304 - lr 0.0200000\n",
-      "2021-09-08 01:56:52,331 DEV : loss 0.8275392055511475 - score 0.0\n",
-      "2021-09-08 01:56:52,332 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:56:52,334 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,729 epoch 3 - iter 4/41 - loss 0.23579327 - samples/sec: 10.67 - lr: 0.020000\n",
-      "2021-09-08 01:56:53,031 epoch 3 - iter 8/41 - loss 0.64806456 - samples/sec: 13.30 - lr: 0.020000\n",
-      "2021-09-08 01:56:53,328 epoch 3 - iter 12/41 - loss 0.51027186 - samples/sec: 13.47 - lr: 0.020000\n",
-      "2021-09-08 01:56:53,628 epoch 3 - iter 16/41 - loss 0.59121073 - samples/sec: 13.37 - lr: 0.020000\n",
-      "2021-09-08 01:56:53,913 epoch 3 - iter 20/41 - loss 0.57349708 - samples/sec: 14.05 - lr: 0.020000\n",
-      "2021-09-08 01:56:54,302 epoch 3 - iter 24/41 - loss 0.63601363 - samples/sec: 10.31 - lr: 0.020000\n",
-      "2021-09-08 01:56:54,582 epoch 3 - iter 28/41 - loss 0.62403358 - samples/sec: 14.30 - lr: 0.020000\n",
-      "2021-09-08 01:56:54,847 epoch 3 - iter 32/41 - loss 0.58915928 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 01:56:55,229 epoch 3 - iter 36/41 - loss 0.59584237 - samples/sec: 10.48 - lr: 0.020000\n",
-      "2021-09-08 01:56:55,643 epoch 3 - iter 40/41 - loss 0.60224388 - samples/sec: 9.69 - lr: 0.020000\n",
-      "2021-09-08 01:56:55,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:55,709 EPOCH 3 done: loss 0.5897 - lr 0.0200000\n",
-      "2021-09-08 01:56:55,847 DEV : loss 0.6468905806541443 - score 0.25\n",
-      "2021-09-08 01:56:55,848 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:30:17,245 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:17,663 epoch 4 - iter 4/41 - loss 0.29558455 - samples/sec: 10.19 - lr: 0.020000\n",
+      "2021-09-21 21:30:18,004 epoch 4 - iter 8/41 - loss 0.27053174 - samples/sec: 11.77 - lr: 0.020000\n",
+      "2021-09-21 21:30:18,307 epoch 4 - iter 12/41 - loss 0.30347271 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 21:30:18,611 epoch 4 - iter 16/41 - loss 0.35928548 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 21:30:18,875 epoch 4 - iter 20/41 - loss 0.41736758 - samples/sec: 15.14 - lr: 0.020000\n",
+      "2021-09-21 21:30:19,283 epoch 4 - iter 24/41 - loss 0.38216230 - samples/sec: 9.83 - lr: 0.020000\n",
+      "2021-09-21 21:30:19,559 epoch 4 - iter 28/41 - loss 0.36658290 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 21:30:19,915 epoch 4 - iter 32/41 - loss 0.37400900 - samples/sec: 11.26 - lr: 0.020000\n",
+      "2021-09-21 21:30:20,169 epoch 4 - iter 36/41 - loss 0.35911882 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 21:30:20,438 epoch 4 - iter 40/41 - loss 0.39182711 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 21:30:20,501 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:20,502 EPOCH 4 done: loss 0.3993 - lr 0.0200000\n",
+      "2021-09-21 21:30:20,625 DEV : loss 0.5288999080657959 - score 0.25\n",
+      "2021-09-21 21:30:20,625 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:57:00,056 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:00,321 epoch 4 - iter 4/41 - loss 0.31340485 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 01:57:00,555 epoch 4 - iter 8/41 - loss 0.22304532 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 01:57:01,000 epoch 4 - iter 12/41 - loss 0.32790571 - samples/sec: 9.00 - lr: 0.020000\n",
-      "2021-09-08 01:57:01,317 epoch 4 - iter 16/41 - loss 0.47134951 - samples/sec: 12.64 - lr: 0.020000\n",
-      "2021-09-08 01:57:01,565 epoch 4 - iter 20/41 - loss 0.60287287 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 01:57:01,907 epoch 4 - iter 24/41 - loss 0.58674784 - samples/sec: 11.74 - lr: 0.020000\n",
-      "2021-09-08 01:57:02,421 epoch 4 - iter 28/41 - loss 0.56872803 - samples/sec: 7.80 - lr: 0.020000\n",
-      "2021-09-08 01:57:02,646 epoch 4 - iter 32/41 - loss 0.56534612 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:57:02,969 epoch 4 - iter 36/41 - loss 0.54861683 - samples/sec: 12.39 - lr: 0.020000\n",
-      "2021-09-08 01:57:03,227 epoch 4 - iter 40/41 - loss 0.50539535 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:57:03,309 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:03,310 EPOCH 4 done: loss 0.4935 - lr 0.0200000\n",
-      "2021-09-08 01:57:03,548 DEV : loss 0.7134577631950378 - score 0.25\n",
-      "2021-09-08 01:57:03,550 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:57:03,552 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:03,846 epoch 5 - iter 4/41 - loss 0.81404578 - samples/sec: 14.60 - lr: 0.020000\n",
-      "2021-09-08 01:57:04,163 epoch 5 - iter 8/41 - loss 0.53890295 - samples/sec: 12.62 - lr: 0.020000\n",
-      "2021-09-08 01:57:04,460 epoch 5 - iter 12/41 - loss 0.51316326 - samples/sec: 13.50 - lr: 0.020000\n",
-      "2021-09-08 01:57:04,862 epoch 5 - iter 16/41 - loss 0.49723941 - samples/sec: 9.98 - lr: 0.020000\n",
-      "2021-09-08 01:57:05,077 epoch 5 - iter 20/41 - loss 0.45122937 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 01:57:05,299 epoch 5 - iter 24/41 - loss 0.46125588 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 01:57:05,577 epoch 5 - iter 28/41 - loss 0.51001839 - samples/sec: 14.41 - lr: 0.020000\n",
-      "2021-09-08 01:57:05,877 epoch 5 - iter 32/41 - loss 0.48941248 - samples/sec: 13.35 - lr: 0.020000\n",
-      "2021-09-08 01:57:06,230 epoch 5 - iter 36/41 - loss 0.46804588 - samples/sec: 11.38 - lr: 0.020000\n",
-      "2021-09-08 01:57:06,498 epoch 5 - iter 40/41 - loss 0.45692026 - samples/sec: 14.93 - lr: 0.020000\n",
-      "2021-09-08 01:57:06,584 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:06,584 EPOCH 5 done: loss 0.4547 - lr 0.0200000\n",
-      "2021-09-08 01:57:06,847 DEV : loss 0.9122992157936096 - score 0.25\n",
-      "2021-09-08 01:57:06,848 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:57:07,037 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:07,334 epoch 6 - iter 4/41 - loss 0.72986880 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 01:57:07,598 epoch 6 - iter 8/41 - loss 0.45945661 - samples/sec: 15.20 - lr: 0.020000\n",
-      "2021-09-08 01:57:07,809 epoch 6 - iter 12/41 - loss 0.55329473 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 01:57:08,193 epoch 6 - iter 16/41 - loss 0.51196833 - samples/sec: 10.44 - lr: 0.020000\n",
-      "2021-09-08 01:57:08,454 epoch 6 - iter 20/41 - loss 0.44370885 - samples/sec: 15.33 - lr: 0.020000\n",
-      "2021-09-08 01:57:08,738 epoch 6 - iter 24/41 - loss 0.41734313 - samples/sec: 14.14 - lr: 0.020000\n",
-      "2021-09-08 01:57:08,982 epoch 6 - iter 28/41 - loss 0.38493060 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 01:57:09,231 epoch 6 - iter 32/41 - loss 0.35343160 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:57:09,453 epoch 6 - iter 36/41 - loss 0.35434603 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:57:09,778 epoch 6 - iter 40/41 - loss 0.33905075 - samples/sec: 12.33 - lr: 0.020000\n",
-      "2021-09-08 01:57:09,867 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:30:24,892 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:25,363 epoch 5 - iter 4/41 - loss 0.60321766 - samples/sec: 9.07 - lr: 0.020000\n",
+      "2021-09-21 21:30:25,628 epoch 5 - iter 8/41 - loss 0.41043697 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 21:30:25,915 epoch 5 - iter 12/41 - loss 0.36371557 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:30:26,206 epoch 5 - iter 16/41 - loss 0.42269437 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 21:30:26,647 epoch 5 - iter 20/41 - loss 0.40425298 - samples/sec: 9.09 - lr: 0.020000\n",
+      "2021-09-21 21:30:26,889 epoch 5 - iter 24/41 - loss 0.39374112 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 21:30:27,169 epoch 5 - iter 28/41 - loss 0.42608665 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 21:30:27,484 epoch 5 - iter 32/41 - loss 0.40878838 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 21:30:27,750 epoch 5 - iter 36/41 - loss 0.42331177 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 21:30:28,016 epoch 5 - iter 40/41 - loss 0.39280387 - samples/sec: 15.05 - lr: 0.020000\n",
+      "2021-09-21 21:30:28,072 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:57:09,867 EPOCH 6 done: loss 0.3375 - lr 0.0200000\n",
-      "2021-09-08 01:57:10,013 DEV : loss 1.0525838136672974 - score 0.25\n",
-      "2021-09-08 01:57:10,014 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:57:10,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:10,370 epoch 7 - iter 4/41 - loss 0.35173648 - samples/sec: 11.98 - lr: 0.020000\n",
-      "2021-09-08 01:57:10,760 epoch 7 - iter 8/41 - loss 0.19113272 - samples/sec: 10.26 - lr: 0.020000\n",
-      "2021-09-08 01:57:11,039 epoch 7 - iter 12/41 - loss 0.26562535 - samples/sec: 14.35 - lr: 0.020000\n",
-      "2021-09-08 01:57:11,367 epoch 7 - iter 16/41 - loss 0.24080181 - samples/sec: 12.24 - lr: 0.020000\n",
-      "2021-09-08 01:57:11,708 epoch 7 - iter 20/41 - loss 0.20258229 - samples/sec: 11.74 - lr: 0.020000\n",
-      "2021-09-08 01:57:12,049 epoch 7 - iter 24/41 - loss 0.22022853 - samples/sec: 11.74 - lr: 0.020000\n",
-      "2021-09-08 01:57:12,310 epoch 7 - iter 28/41 - loss 0.23704339 - samples/sec: 15.35 - lr: 0.020000\n",
-      "2021-09-08 01:57:12,623 epoch 7 - iter 32/41 - loss 0.21225061 - samples/sec: 12.82 - lr: 0.020000\n",
-      "2021-09-08 01:57:12,994 epoch 7 - iter 36/41 - loss 0.22066220 - samples/sec: 10.80 - lr: 0.020000\n",
-      "2021-09-08 01:57:13,257 epoch 7 - iter 40/41 - loss 0.24155933 - samples/sec: 15.23 - lr: 0.020000\n",
-      "2021-09-08 01:57:13,321 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:13,321 EPOCH 7 done: loss 0.2449 - lr 0.0200000\n",
-      "2021-09-08 01:57:13,463 DEV : loss 1.1789498329162598 - score 0.0\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:57:13,463 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:57:13,465 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:13,877 epoch 8 - iter 4/41 - loss 0.01503826 - samples/sec: 10.24 - lr: 0.010000\n",
-      "2021-09-08 01:57:14,184 epoch 8 - iter 8/41 - loss 0.08201708 - samples/sec: 13.06 - lr: 0.010000\n",
-      "2021-09-08 01:57:14,442 epoch 8 - iter 12/41 - loss 0.06314686 - samples/sec: 15.52 - lr: 0.010000\n",
-      "2021-09-08 01:57:14,740 epoch 8 - iter 16/41 - loss 0.07874610 - samples/sec: 13.44 - lr: 0.010000\n",
-      "2021-09-08 01:57:15,140 epoch 8 - iter 20/41 - loss 0.11041075 - samples/sec: 10.01 - lr: 0.010000\n",
-      "2021-09-08 01:57:15,367 epoch 8 - iter 24/41 - loss 0.12493025 - samples/sec: 17.67 - lr: 0.010000\n",
-      "2021-09-08 01:57:15,597 epoch 8 - iter 28/41 - loss 0.13298150 - samples/sec: 17.43 - lr: 0.010000\n",
-      "2021-09-08 01:57:15,835 epoch 8 - iter 32/41 - loss 0.13698259 - samples/sec: 16.89 - lr: 0.010000\n",
-      "2021-09-08 01:57:16,237 epoch 8 - iter 36/41 - loss 0.13529168 - samples/sec: 9.95 - lr: 0.010000\n",
-      "2021-09-08 01:57:16,593 epoch 8 - iter 40/41 - loss 0.12603454 - samples/sec: 11.28 - lr: 0.010000\n",
-      "2021-09-08 01:57:16,676 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:16,676 EPOCH 8 done: loss 0.1252 - lr 0.0100000\n",
-      "2021-09-08 01:57:16,920 DEV : loss 1.221712350845337 - score 0.0\n",
-      "2021-09-08 01:57:16,921 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:57:16,998 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:17,435 epoch 9 - iter 4/41 - loss 0.12221686 - samples/sec: 10.65 - lr: 0.010000\n",
-      "2021-09-08 01:57:17,788 epoch 9 - iter 8/41 - loss 0.19598289 - samples/sec: 11.33 - lr: 0.010000\n",
-      "2021-09-08 01:57:18,030 epoch 9 - iter 12/41 - loss 0.16802541 - samples/sec: 16.60 - lr: 0.010000\n",
-      "2021-09-08 01:57:18,326 epoch 9 - iter 16/41 - loss 0.14353233 - samples/sec: 13.56 - lr: 0.010000\n",
-      "2021-09-08 01:57:18,538 epoch 9 - iter 20/41 - loss 0.14520074 - samples/sec: 18.85 - lr: 0.010000\n",
-      "2021-09-08 01:57:18,835 epoch 9 - iter 24/41 - loss 0.13082243 - samples/sec: 13.50 - lr: 0.010000\n",
-      "2021-09-08 01:57:19,199 epoch 9 - iter 28/41 - loss 0.11425813 - samples/sec: 11.01 - lr: 0.010000\n",
-      "2021-09-08 01:57:19,452 epoch 9 - iter 32/41 - loss 0.10082313 - samples/sec: 15.86 - lr: 0.010000\n",
-      "2021-09-08 01:57:19,732 epoch 9 - iter 36/41 - loss 0.09445565 - samples/sec: 14.32 - lr: 0.010000\n",
-      "2021-09-08 01:57:20,031 epoch 9 - iter 40/41 - loss 0.09753969 - samples/sec: 13.40 - lr: 0.010000\n",
-      "2021-09-08 01:57:20,130 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:20,130 EPOCH 9 done: loss 0.0959 - lr 0.0100000\n",
-      "2021-09-08 01:57:20,301 DEV : loss 1.2578325271606445 - score 0.0\n",
-      "2021-09-08 01:57:20,302 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:57:20,306 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:20,645 epoch 10 - iter 4/41 - loss 0.16172596 - samples/sec: 12.78 - lr: 0.010000\n",
-      "2021-09-08 01:57:20,935 epoch 10 - iter 8/41 - loss 0.09435207 - samples/sec: 13.80 - lr: 0.010000\n",
-      "2021-09-08 01:57:21,231 epoch 10 - iter 12/41 - loss 0.08937085 - samples/sec: 13.55 - lr: 0.010000\n",
-      "2021-09-08 01:57:21,541 epoch 10 - iter 16/41 - loss 0.07719541 - samples/sec: 12.94 - lr: 0.010000\n",
-      "2021-09-08 01:57:21,789 epoch 10 - iter 20/41 - loss 0.06400977 - samples/sec: 16.11 - lr: 0.010000\n",
-      "2021-09-08 01:57:22,214 epoch 10 - iter 24/41 - loss 0.06692759 - samples/sec: 9.42 - lr: 0.010000\n",
-      "2021-09-08 01:57:22,527 epoch 10 - iter 28/41 - loss 0.10779743 - samples/sec: 12.81 - lr: 0.010000\n",
-      "2021-09-08 01:57:22,771 epoch 10 - iter 32/41 - loss 0.09625004 - samples/sec: 16.47 - lr: 0.010000\n",
-      "2021-09-08 01:57:23,073 epoch 10 - iter 36/41 - loss 0.10265556 - samples/sec: 13.27 - lr: 0.010000\n",
-      "2021-09-08 01:57:23,442 epoch 10 - iter 40/41 - loss 0.10906612 - samples/sec: 10.84 - lr: 0.010000\n",
-      "2021-09-08 01:57:23,536 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:23,537 EPOCH 10 done: loss 0.1065 - lr 0.0100000\n",
-      "2021-09-08 01:57:23,738 DEV : loss 1.2692331075668335 - score 0.0\n",
-      "2021-09-08 01:57:23,738 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:57:30,670 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:30,670 Testing using best model ...\n",
-      "2021-09-08 01:57:30,672 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:30:28,073 EPOCH 5 done: loss 0.3836 - lr 0.0200000\n",
+      "2021-09-21 21:30:28,194 DEV : loss 0.6878945827484131 - score 0.25\n",
+      "2021-09-21 21:30:28,195 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:30:28,198 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:28,491 epoch 6 - iter 4/41 - loss 0.13422390 - samples/sec: 15.08 - lr: 0.020000\n",
+      "2021-09-21 21:30:28,782 epoch 6 - iter 8/41 - loss 0.57230117 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:30:29,033 epoch 6 - iter 12/41 - loss 0.41779356 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 21:30:29,352 epoch 6 - iter 16/41 - loss 0.38394295 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 21:30:29,616 epoch 6 - iter 20/41 - loss 0.35459252 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:30:30,011 epoch 6 - iter 24/41 - loss 0.30029019 - samples/sec: 10.14 - lr: 0.020000\n",
+      "2021-09-21 21:30:30,311 epoch 6 - iter 28/41 - loss 0.30299592 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 21:30:30,683 epoch 6 - iter 32/41 - loss 0.31708822 - samples/sec: 10.75 - lr: 0.020000\n",
+      "2021-09-21 21:30:30,943 epoch 6 - iter 36/41 - loss 0.31740212 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 21:30:31,201 epoch 6 - iter 40/41 - loss 0.28839522 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 21:30:31,274 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:31,274 EPOCH 6 done: loss 0.2814 - lr 0.0200000\n",
+      "2021-09-21 21:30:31,586 DEV : loss 0.6668463945388794 - score 0.25\n",
+      "2021-09-21 21:30:31,588 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:30:31,590 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:31,918 epoch 7 - iter 4/41 - loss 0.12502356 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 21:30:32,313 epoch 7 - iter 8/41 - loss 0.09975322 - samples/sec: 10.13 - lr: 0.020000\n",
+      "2021-09-21 21:30:32,575 epoch 7 - iter 12/41 - loss 0.09065085 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 21:30:32,824 epoch 7 - iter 16/41 - loss 0.10525245 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 21:30:33,083 epoch 7 - iter 20/41 - loss 0.09268652 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:30:33,344 epoch 7 - iter 24/41 - loss 0.20236619 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 21:30:33,713 epoch 7 - iter 28/41 - loss 0.21649868 - samples/sec: 10.87 - lr: 0.020000\n",
+      "2021-09-21 21:30:34,025 epoch 7 - iter 32/41 - loss 0.26092800 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 21:30:34,271 epoch 7 - iter 36/41 - loss 0.23852451 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 21:30:34,589 epoch 7 - iter 40/41 - loss 0.21668277 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 21:30:34,652 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:34,653 EPOCH 7 done: loss 0.2115 - lr 0.0200000\n",
+      "2021-09-21 21:30:34,773 DEV : loss 0.34542882442474365 - score 0.75\n",
+      "2021-09-21 21:30:34,774 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:30:38,983 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:39,306 epoch 8 - iter 4/41 - loss 0.13535843 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 21:30:39,563 epoch 8 - iter 8/41 - loss 0.45693324 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:30:39,839 epoch 8 - iter 12/41 - loss 0.31556703 - samples/sec: 14.57 - lr: 0.020000\n",
+      "2021-09-21 21:30:40,109 epoch 8 - iter 16/41 - loss 0.27160975 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:30:40,532 epoch 8 - iter 20/41 - loss 0.24243926 - samples/sec: 9.48 - lr: 0.020000\n",
+      "2021-09-21 21:30:40,913 epoch 8 - iter 24/41 - loss 0.22017897 - samples/sec: 10.50 - lr: 0.020000\n",
+      "2021-09-21 21:30:41,241 epoch 8 - iter 28/41 - loss 0.23798903 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 21:30:41,512 epoch 8 - iter 32/41 - loss 0.21805177 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 21:30:41,753 epoch 8 - iter 36/41 - loss 0.19475640 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 21:30:42,038 epoch 8 - iter 40/41 - loss 0.17737182 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:30:42,095 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:42,095 EPOCH 8 done: loss 0.1731 - lr 0.0200000\n",
+      "2021-09-21 21:30:42,204 DEV : loss 0.43244487047195435 - score 0.5\n",
+      "2021-09-21 21:30:42,205 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:30:42,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:42,498 epoch 9 - iter 4/41 - loss 0.02975003 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 21:30:42,735 epoch 9 - iter 8/41 - loss 0.01922025 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 21:30:43,002 epoch 9 - iter 12/41 - loss 0.02227687 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:30:43,268 epoch 9 - iter 16/41 - loss 0.02069861 - samples/sec: 15.05 - lr: 0.020000\n",
+      "2021-09-21 21:30:43,504 epoch 9 - iter 20/41 - loss 0.04438330 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 21:30:43,799 epoch 9 - iter 24/41 - loss 0.04756092 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 21:30:44,132 epoch 9 - iter 28/41 - loss 0.09293676 - samples/sec: 12.05 - lr: 0.020000\n",
+      "2021-09-21 21:30:44,501 epoch 9 - iter 32/41 - loss 0.08154567 - samples/sec: 10.84 - lr: 0.020000\n",
+      "2021-09-21 21:30:45,056 epoch 9 - iter 36/41 - loss 0.08546294 - samples/sec: 7.22 - lr: 0.020000\n",
+      "2021-09-21 21:30:45,301 epoch 9 - iter 40/41 - loss 0.09181159 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 21:30:45,354 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:45,355 EPOCH 9 done: loss 0.0896 - lr 0.0200000\n",
+      "2021-09-21 21:30:45,476 DEV : loss 0.768585205078125 - score 0.25\n",
+      "2021-09-21 21:30:45,477 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:30:45,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:45,740 epoch 10 - iter 4/41 - loss 0.11465257 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:30:46,017 epoch 10 - iter 8/41 - loss 0.10892961 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:30:46,438 epoch 10 - iter 12/41 - loss 0.07347434 - samples/sec: 9.50 - lr: 0.020000\n",
+      "2021-09-21 21:30:46,661 epoch 10 - iter 16/41 - loss 0.05665514 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:30:46,953 epoch 10 - iter 20/41 - loss 0.16742484 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:30:47,339 epoch 10 - iter 24/41 - loss 0.21684032 - samples/sec: 10.38 - lr: 0.020000\n",
+      "2021-09-21 21:30:47,592 epoch 10 - iter 28/41 - loss 0.24012224 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 21:30:47,817 epoch 10 - iter 32/41 - loss 0.21548602 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 21:30:48,058 epoch 10 - iter 36/41 - loss 0.19547174 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 21:30:48,293 epoch 10 - iter 40/41 - loss 0.17602728 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 21:30:48,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:48,358 EPOCH 10 done: loss 0.2026 - lr 0.0200000\n",
+      "2021-09-21 21:30:48,479 DEV : loss 0.2350221425294876 - score 0.75\n",
+      "2021-09-21 21:30:48,482 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:30:56,451 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:30:56,452 Testing using best model ...\n",
+      "2021-09-21 21:30:56,453 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:57:35,602 \t0.6\n",
-      "2021-09-08 01:57:35,603 \n",
+      "2021-09-21 21:31:01,183 \t0.2\n",
+      "2021-09-21 21:31:01,183 \n",
       "Results:\n",
-      "- F-score (micro) 0.6\n",
-      "- F-score (macro) 0.5333\n",
-      "- Accuracy 0.6\n",
+      "- F-score (micro) 0.2\n",
+      "- F-score (macro) 0.1333\n",
+      "- Accuracy 0.2\n",
       "\n",
       "By class:\n",
       "                                                                      precision    recall  f1-score   support\n",
       "\n",
-      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         1\n",
-      "that which is below standard or expectations as of ethics or decency     1.0000    1.0000    1.0000         1\n",
+      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         0\n",
+      "that which is below standard or expectations as of ethics or decency     1.0000    0.5000    0.6667         2\n",
       "                        that which is pleasing or valuable or useful     0.0000    0.0000    0.0000         1\n",
-      "  remarkable or out of the ordinary in degree or magnitude or effect     0.5000    1.0000    0.6667         1\n",
-      "                                    exceptionally bad or displeasing     1.0000    1.0000    1.0000         1\n",
-      "\n",
-      "                                                           micro avg     0.6000    0.6000    0.6000         5\n",
-      "                                                           macro avg     0.5000    0.6000    0.5333         5\n",
-      "                                                        weighted avg     0.5000    0.6000    0.5333         5\n",
-      "                                                         samples avg     0.6000    0.6000    0.6000         5\n",
+      "  remarkable or out of the ordinary in degree or magnitude or effect     0.0000    0.0000    0.0000         2\n",
+      "                                    exceptionally bad or displeasing     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "2021-09-08 01:57:35,603 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:24,585 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "                                                           micro avg     0.2000    0.2000    0.2000         5\n",
+      "                                                           macro avg     0.2000    0.1000    0.1333         5\n",
+      "                                                        weighted avg     0.4000    0.2000    0.2667         5\n",
+      "                                                         samples avg     0.2000    0.2000    0.2000         5\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 21:31:01,184 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:49,054 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:58:28,723 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:31:53,015 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 16586.83it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 18664.79it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:28,727 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 01:58:28,740 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:28,742 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:31:53,019 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 21:31:53,028 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:53,030 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7732,25 +7752,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:28,742 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:28,743 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 01:58:28,743 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:28,743 Parameters:\n",
-      "2021-09-08 01:58:28,743  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:58:28,744  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:58:28,744  - patience: \"3\"\n",
-      "2021-09-08 01:58:28,744  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:58:28,744  - max_epochs: \"10\"\n",
-      "2021-09-08 01:58:28,745  - shuffle: \"True\"\n",
-      "2021-09-08 01:58:28,745  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:58:28,745  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:58:28,746 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:28,746 Model training base path: \"temp1\"\n",
-      "2021-09-08 01:58:28,746 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:28,746 Device: cuda:0\n",
-      "2021-09-08 01:58:28,747 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:28,747 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:58:28,753 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:31:53,030 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:53,031 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:31:53,031 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:53,031 Parameters:\n",
+      "2021-09-21 21:31:53,032  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:31:53,032  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:31:53,032  - patience: \"3\"\n",
+      "2021-09-21 21:31:53,033  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:31:53,033  - max_epochs: \"10\"\n",
+      "2021-09-21 21:31:53,033  - shuffle: \"True\"\n",
+      "2021-09-21 21:31:53,033  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:31:53,034  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:31:53,034 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:53,034 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:31:53,035 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:53,035 Device: cuda:0\n",
+      "2021-09-21 21:31:53,035 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:53,036 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:31:53,042 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7764,211 +7784,210 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:29,061 epoch 1 - iter 4/41 - loss 0.45895466 - samples/sec: 13.99 - lr: 0.020000\n",
-      "2021-09-08 01:58:29,507 epoch 1 - iter 8/41 - loss 0.33778767 - samples/sec: 8.97 - lr: 0.020000\n",
-      "2021-09-08 01:58:29,796 epoch 1 - iter 12/41 - loss 0.67405958 - samples/sec: 13.89 - lr: 0.020000\n",
-      "2021-09-08 01:58:30,041 epoch 1 - iter 16/41 - loss 0.68351192 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 01:58:30,378 epoch 1 - iter 20/41 - loss 0.74679424 - samples/sec: 11.86 - lr: 0.020000\n",
-      "2021-09-08 01:58:30,777 epoch 1 - iter 24/41 - loss 0.68990345 - samples/sec: 10.05 - lr: 0.020000\n",
-      "2021-09-08 01:58:31,042 epoch 1 - iter 28/41 - loss 0.67911222 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 01:58:31,307 epoch 1 - iter 32/41 - loss 0.60614326 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:58:31,778 epoch 1 - iter 36/41 - loss 0.61596523 - samples/sec: 8.50 - lr: 0.020000\n",
-      "2021-09-08 01:58:32,003 epoch 1 - iter 40/41 - loss 0.61794133 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 01:58:32,077 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:32,077 EPOCH 1 done: loss 0.6050 - lr 0.0200000\n",
-      "2021-09-08 01:58:32,240 DEV : loss 0.9148366451263428 - score 0.5\n",
-      "2021-09-08 01:58:32,241 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:58:39,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:39,629 epoch 2 - iter 4/41 - loss 0.98992932 - samples/sec: 11.83 - lr: 0.020000\n",
-      "2021-09-08 01:58:39,985 epoch 2 - iter 8/41 - loss 0.77695094 - samples/sec: 11.26 - lr: 0.020000\n",
-      "2021-09-08 01:58:40,302 epoch 2 - iter 12/41 - loss 0.68007787 - samples/sec: 12.63 - lr: 0.020000\n",
-      "2021-09-08 01:58:40,633 epoch 2 - iter 16/41 - loss 0.62218977 - samples/sec: 12.12 - lr: 0.020000\n",
-      "2021-09-08 01:58:40,951 epoch 2 - iter 20/41 - loss 0.63679048 - samples/sec: 12.60 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,217 epoch 2 - iter 24/41 - loss 0.83674338 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,533 epoch 2 - iter 28/41 - loss 0.83672003 - samples/sec: 12.67 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,854 epoch 2 - iter 32/41 - loss 0.77277423 - samples/sec: 12.48 - lr: 0.020000\n",
-      "2021-09-08 01:58:42,316 epoch 2 - iter 36/41 - loss 0.75053491 - samples/sec: 8.67 - lr: 0.020000\n",
-      "2021-09-08 01:58:42,709 epoch 2 - iter 40/41 - loss 0.75168855 - samples/sec: 10.21 - lr: 0.020000\n",
-      "2021-09-08 01:58:42,762 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:42,762 EPOCH 2 done: loss 0.7508 - lr 0.0200000\n",
-      "2021-09-08 01:58:43,045 DEV : loss 0.6256421208381653 - score 0.5\n",
-      "2021-09-08 01:58:43,046 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:31:53,257 epoch 1 - iter 4/41 - loss 0.71729925 - samples/sec: 19.93 - lr: 0.020000\n",
+      "2021-09-21 21:31:53,515 epoch 1 - iter 8/41 - loss 0.73650781 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 21:31:53,707 epoch 1 - iter 12/41 - loss 0.75892136 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 21:31:53,947 epoch 1 - iter 16/41 - loss 0.72998673 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 21:31:54,154 epoch 1 - iter 20/41 - loss 0.69500316 - samples/sec: 19.37 - lr: 0.020000\n",
+      "2021-09-21 21:31:54,409 epoch 1 - iter 24/41 - loss 0.66291800 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 21:31:54,623 epoch 1 - iter 28/41 - loss 0.69728854 - samples/sec: 18.78 - lr: 0.020000\n",
+      "2021-09-21 21:31:54,827 epoch 1 - iter 32/41 - loss 0.69330529 - samples/sec: 19.62 - lr: 0.020000\n",
+      "2021-09-21 21:31:55,085 epoch 1 - iter 36/41 - loss 0.63936677 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 21:31:55,280 epoch 1 - iter 40/41 - loss 0.59137848 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 21:31:55,342 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:31:55,343 EPOCH 1 done: loss 0.5813 - lr 0.0200000\n",
+      "2021-09-21 21:31:55,513 DEV : loss 1.6068806648254395 - score 0.25\n",
+      "2021-09-21 21:31:55,514 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:58:49,842 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:50,286 epoch 3 - iter 4/41 - loss 0.75709302 - samples/sec: 9.47 - lr: 0.020000\n",
-      "2021-09-08 01:58:50,486 epoch 3 - iter 8/41 - loss 0.64962852 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 01:58:50,794 epoch 3 - iter 12/41 - loss 0.55877859 - samples/sec: 12.99 - lr: 0.020000\n",
-      "2021-09-08 01:58:51,052 epoch 3 - iter 16/41 - loss 0.63832891 - samples/sec: 15.57 - lr: 0.020000\n",
-      "2021-09-08 01:58:51,327 epoch 3 - iter 20/41 - loss 0.67205365 - samples/sec: 14.57 - lr: 0.020000\n",
-      "2021-09-08 01:58:51,596 epoch 3 - iter 24/41 - loss 0.60597626 - samples/sec: 14.92 - lr: 0.020000\n",
-      "2021-09-08 01:58:51,938 epoch 3 - iter 28/41 - loss 0.61544678 - samples/sec: 11.70 - lr: 0.020000\n",
-      "2021-09-08 01:58:52,229 epoch 3 - iter 32/41 - loss 0.62438916 - samples/sec: 13.79 - lr: 0.020000\n",
-      "2021-09-08 01:58:52,477 epoch 3 - iter 36/41 - loss 0.63965872 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:58:52,788 epoch 3 - iter 40/41 - loss 0.62252056 - samples/sec: 12.87 - lr: 0.020000\n",
-      "2021-09-08 01:58:52,855 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:52,855 EPOCH 3 done: loss 0.6099 - lr 0.0200000\n",
-      "2021-09-08 01:58:53,061 DEV : loss 0.742709219455719 - score 0.5\n",
-      "2021-09-08 01:58:53,062 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:58:53,065 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:53,344 epoch 4 - iter 4/41 - loss 0.35443392 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 01:58:53,658 epoch 4 - iter 8/41 - loss 0.25039079 - samples/sec: 12.75 - lr: 0.020000\n",
-      "2021-09-08 01:58:54,051 epoch 4 - iter 12/41 - loss 0.44234947 - samples/sec: 10.20 - lr: 0.020000\n",
-      "2021-09-08 01:58:54,362 epoch 4 - iter 16/41 - loss 0.41097112 - samples/sec: 12.90 - lr: 0.020000\n",
-      "2021-09-08 01:58:54,823 epoch 4 - iter 20/41 - loss 0.35427842 - samples/sec: 8.68 - lr: 0.020000\n",
-      "2021-09-08 01:58:55,245 epoch 4 - iter 24/41 - loss 0.35132805 - samples/sec: 9.48 - lr: 0.020000\n",
-      "2021-09-08 01:58:55,553 epoch 4 - iter 28/41 - loss 0.36415816 - samples/sec: 13.02 - lr: 0.020000\n",
-      "2021-09-08 01:58:56,064 epoch 4 - iter 32/41 - loss 0.36957153 - samples/sec: 7.85 - lr: 0.020000\n",
-      "2021-09-08 01:58:56,408 epoch 4 - iter 36/41 - loss 0.38527574 - samples/sec: 11.64 - lr: 0.020000\n",
-      "2021-09-08 01:58:56,692 epoch 4 - iter 40/41 - loss 0.39896627 - samples/sec: 14.12 - lr: 0.020000\n",
-      "2021-09-08 01:58:56,767 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:56,767 EPOCH 4 done: loss 0.4100 - lr 0.0200000\n",
-      "2021-09-08 01:58:57,002 DEV : loss 0.9859580993652344 - score 0.25\n",
-      "2021-09-08 01:58:57,003 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:58:57,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:57,468 epoch 5 - iter 4/41 - loss 0.51287953 - samples/sec: 12.14 - lr: 0.020000\n",
-      "2021-09-08 01:58:57,755 epoch 5 - iter 8/41 - loss 0.30580349 - samples/sec: 13.95 - lr: 0.020000\n",
-      "2021-09-08 01:58:58,365 epoch 5 - iter 12/41 - loss 0.31294439 - samples/sec: 6.57 - lr: 0.020000\n",
-      "2021-09-08 01:58:58,656 epoch 5 - iter 16/41 - loss 0.42530449 - samples/sec: 13.79 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,000 epoch 5 - iter 20/41 - loss 0.42888899 - samples/sec: 11.65 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,293 epoch 5 - iter 24/41 - loss 0.42345753 - samples/sec: 13.67 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,636 epoch 5 - iter 28/41 - loss 0.44529837 - samples/sec: 11.70 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,889 epoch 5 - iter 32/41 - loss 0.43826030 - samples/sec: 15.83 - lr: 0.020000\n",
-      "2021-09-08 01:59:00,137 epoch 5 - iter 36/41 - loss 0.39350821 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 01:59:00,414 epoch 5 - iter 40/41 - loss 0.42222281 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 01:59:00,475 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:00,476 EPOCH 5 done: loss 0.4174 - lr 0.0200000\n",
-      "2021-09-08 01:59:00,595 DEV : loss 0.5313699245452881 - score 0.5\n",
-      "2021-09-08 01:59:00,596 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:32:12,722 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:13,014 epoch 2 - iter 4/41 - loss 1.64348927 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 21:32:13,251 epoch 2 - iter 8/41 - loss 1.07993741 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 21:32:13,456 epoch 2 - iter 12/41 - loss 0.86968793 - samples/sec: 19.57 - lr: 0.020000\n",
+      "2021-09-21 21:32:13,652 epoch 2 - iter 16/41 - loss 0.77841432 - samples/sec: 20.41 - lr: 0.020000\n",
+      "2021-09-21 21:32:13,916 epoch 2 - iter 20/41 - loss 0.76960017 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 21:32:14,114 epoch 2 - iter 24/41 - loss 0.78269658 - samples/sec: 20.20 - lr: 0.020000\n",
+      "2021-09-21 21:32:14,316 epoch 2 - iter 28/41 - loss 0.71109763 - samples/sec: 19.86 - lr: 0.020000\n",
+      "2021-09-21 21:32:14,526 epoch 2 - iter 32/41 - loss 0.71903021 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 21:32:14,733 epoch 2 - iter 36/41 - loss 0.71654322 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 21:32:14,997 epoch 2 - iter 40/41 - loss 0.74029242 - samples/sec: 15.21 - lr: 0.020000\n",
+      "2021-09-21 21:32:15,050 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:15,050 EPOCH 2 done: loss 0.7373 - lr 0.0200000\n",
+      "2021-09-21 21:32:15,364 DEV : loss 0.4468148648738861 - score 0.5\n",
+      "2021-09-21 21:32:15,365 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:59:06,866 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:07,315 epoch 6 - iter 4/41 - loss 0.40527464 - samples/sec: 9.25 - lr: 0.020000\n",
-      "2021-09-08 01:59:07,526 epoch 6 - iter 8/41 - loss 0.41124380 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:59:07,926 epoch 6 - iter 12/41 - loss 0.31795770 - samples/sec: 10.02 - lr: 0.020000\n",
-      "2021-09-08 01:59:08,200 epoch 6 - iter 16/41 - loss 0.32984301 - samples/sec: 14.65 - lr: 0.020000\n",
-      "2021-09-08 01:59:08,476 epoch 6 - iter 20/41 - loss 0.42945447 - samples/sec: 14.51 - lr: 0.020000\n",
-      "2021-09-08 01:59:08,833 epoch 6 - iter 24/41 - loss 0.41358434 - samples/sec: 11.22 - lr: 0.020000\n",
-      "2021-09-08 01:59:09,132 epoch 6 - iter 28/41 - loss 0.39745783 - samples/sec: 13.42 - lr: 0.020000\n",
-      "2021-09-08 01:59:09,443 epoch 6 - iter 32/41 - loss 0.46118687 - samples/sec: 12.90 - lr: 0.020000\n",
-      "2021-09-08 01:59:09,699 epoch 6 - iter 36/41 - loss 0.44380606 - samples/sec: 15.63 - lr: 0.020000\n",
-      "2021-09-08 01:59:10,107 epoch 6 - iter 40/41 - loss 0.43977963 - samples/sec: 9.83 - lr: 0.020000\n",
-      "2021-09-08 01:59:10,166 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:32:21,376 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:21,660 epoch 3 - iter 4/41 - loss 0.58359245 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 21:32:21,904 epoch 3 - iter 8/41 - loss 0.58228523 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 21:32:22,149 epoch 3 - iter 12/41 - loss 0.50906458 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 21:32:22,445 epoch 3 - iter 16/41 - loss 0.54879838 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 21:32:22,817 epoch 3 - iter 20/41 - loss 0.54367812 - samples/sec: 10.74 - lr: 0.020000\n",
+      "2021-09-21 21:32:23,077 epoch 3 - iter 24/41 - loss 0.51971825 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:32:23,375 epoch 3 - iter 28/41 - loss 0.49645579 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:32:23,671 epoch 3 - iter 32/41 - loss 0.49921879 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 21:32:23,912 epoch 3 - iter 36/41 - loss 0.53472401 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 21:32:24,164 epoch 3 - iter 40/41 - loss 0.55019729 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 21:32:24,302 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:24,303 EPOCH 3 done: loss 0.5473 - lr 0.0200000\n",
+      "2021-09-21 21:32:24,420 DEV : loss 0.5661556124687195 - score 0.5\n",
+      "2021-09-21 21:32:24,422 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:32:24,424 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:24,688 epoch 4 - iter 4/41 - loss 0.38344856 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:32:24,942 epoch 4 - iter 8/41 - loss 0.54927396 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:32:25,202 epoch 4 - iter 12/41 - loss 0.48165137 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 21:32:25,556 epoch 4 - iter 16/41 - loss 0.52199842 - samples/sec: 11.33 - lr: 0.020000\n",
+      "2021-09-21 21:32:25,817 epoch 4 - iter 20/41 - loss 0.54571622 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 21:32:26,073 epoch 4 - iter 24/41 - loss 0.50648962 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 21:32:26,416 epoch 4 - iter 28/41 - loss 0.49154282 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 21:32:26,736 epoch 4 - iter 32/41 - loss 0.51076999 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 21:32:26,981 epoch 4 - iter 36/41 - loss 0.52076683 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 21:32:27,253 epoch 4 - iter 40/41 - loss 0.48492464 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 21:32:27,312 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:27,312 EPOCH 4 done: loss 0.4872 - lr 0.0200000\n",
+      "2021-09-21 21:32:27,425 DEV : loss 0.45632559061050415 - score 0.25\n",
+      "2021-09-21 21:32:27,426 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:32:27,430 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:27,712 epoch 5 - iter 4/41 - loss 0.20987176 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 21:32:28,009 epoch 5 - iter 8/41 - loss 0.33720900 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:32:28,270 epoch 5 - iter 12/41 - loss 0.35143762 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 21:32:28,609 epoch 5 - iter 16/41 - loss 0.36536000 - samples/sec: 11.80 - lr: 0.020000\n",
+      "2021-09-21 21:32:28,885 epoch 5 - iter 20/41 - loss 0.41854443 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 21:32:29,116 epoch 5 - iter 24/41 - loss 0.40637852 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 21:32:29,397 epoch 5 - iter 28/41 - loss 0.42136094 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:32:29,721 epoch 5 - iter 32/41 - loss 0.41455507 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 21:32:29,996 epoch 5 - iter 36/41 - loss 0.40799639 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 21:32:30,239 epoch 5 - iter 40/41 - loss 0.39660355 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 21:32:30,309 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:30,310 EPOCH 5 done: loss 0.3910 - lr 0.0200000\n",
+      "2021-09-21 21:32:30,414 DEV : loss 0.7768427729606628 - score 0.5\n",
+      "2021-09-21 21:32:30,415 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:32:30,417 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:30,690 epoch 6 - iter 4/41 - loss 0.19404444 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 21:32:31,050 epoch 6 - iter 8/41 - loss 0.41224584 - samples/sec: 11.12 - lr: 0.020000\n",
+      "2021-09-21 21:32:31,389 epoch 6 - iter 12/41 - loss 0.37416896 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 21:32:31,629 epoch 6 - iter 16/41 - loss 0.29328845 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 21:32:31,884 epoch 6 - iter 20/41 - loss 0.45786770 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 21:32:32,120 epoch 6 - iter 24/41 - loss 0.47387422 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 21:32:32,446 epoch 6 - iter 28/41 - loss 0.44377721 - samples/sec: 12.30 - lr: 0.020000\n",
+      "2021-09-21 21:32:32,709 epoch 6 - iter 32/41 - loss 0.43905548 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 21:32:32,942 epoch 6 - iter 36/41 - loss 0.40239104 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:32:33,244 epoch 6 - iter 40/41 - loss 0.38396754 - samples/sec: 13.24 - lr: 0.020000\n",
+      "2021-09-21 21:32:33,315 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:59:10,166 EPOCH 6 done: loss 0.4488 - lr 0.0200000\n",
-      "2021-09-08 01:59:10,300 DEV : loss 0.8806915283203125 - score 0.75\n",
-      "2021-09-08 01:59:10,300 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:32:33,316 EPOCH 6 done: loss 0.3808 - lr 0.0200000\n",
+      "2021-09-21 21:32:33,429 DEV : loss 0.4757530987262726 - score 0.5\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:32:33,431 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:32:33,433 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:33,700 epoch 7 - iter 4/41 - loss 0.49034518 - samples/sec: 16.53 - lr: 0.010000\n",
+      "2021-09-21 21:32:33,912 epoch 7 - iter 8/41 - loss 0.38561436 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 21:32:34,190 epoch 7 - iter 12/41 - loss 0.37663747 - samples/sec: 14.38 - lr: 0.010000\n",
+      "2021-09-21 21:32:34,578 epoch 7 - iter 16/41 - loss 0.46195424 - samples/sec: 10.32 - lr: 0.010000\n",
+      "2021-09-21 21:32:34,918 epoch 7 - iter 20/41 - loss 0.38170879 - samples/sec: 11.79 - lr: 0.010000\n",
+      "2021-09-21 21:32:35,163 epoch 7 - iter 24/41 - loss 0.40867363 - samples/sec: 16.39 - lr: 0.010000\n",
+      "2021-09-21 21:32:35,418 epoch 7 - iter 28/41 - loss 0.37950856 - samples/sec: 15.68 - lr: 0.010000\n",
+      "2021-09-21 21:32:35,683 epoch 7 - iter 32/41 - loss 0.34948845 - samples/sec: 15.16 - lr: 0.010000\n",
+      "2021-09-21 21:32:35,925 epoch 7 - iter 36/41 - loss 0.35296561 - samples/sec: 16.53 - lr: 0.010000\n",
+      "2021-09-21 21:32:36,164 epoch 7 - iter 40/41 - loss 0.32038394 - samples/sec: 16.78 - lr: 0.010000\n",
+      "2021-09-21 21:32:36,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:36,222 EPOCH 7 done: loss 0.3261 - lr 0.0100000\n",
+      "2021-09-21 21:32:36,325 DEV : loss 0.45274895429611206 - score 0.5\n",
+      "2021-09-21 21:32:36,326 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:32:36,328 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:36,642 epoch 8 - iter 4/41 - loss 0.42202247 - samples/sec: 13.64 - lr: 0.010000\n",
+      "2021-09-21 21:32:36,898 epoch 8 - iter 8/41 - loss 0.29616906 - samples/sec: 15.66 - lr: 0.010000\n",
+      "2021-09-21 21:32:37,133 epoch 8 - iter 12/41 - loss 0.30153328 - samples/sec: 17.04 - lr: 0.010000\n",
+      "2021-09-21 21:32:37,508 epoch 8 - iter 16/41 - loss 0.37632564 - samples/sec: 10.69 - lr: 0.010000\n",
+      "2021-09-21 21:32:37,844 epoch 8 - iter 20/41 - loss 0.41538498 - samples/sec: 11.91 - lr: 0.010000\n",
+      "2021-09-21 21:32:38,083 epoch 8 - iter 24/41 - loss 0.41213933 - samples/sec: 16.77 - lr: 0.010000\n",
+      "2021-09-21 21:32:38,319 epoch 8 - iter 28/41 - loss 0.36209612 - samples/sec: 17.00 - lr: 0.010000\n",
+      "2021-09-21 21:32:38,571 epoch 8 - iter 32/41 - loss 0.32597650 - samples/sec: 15.90 - lr: 0.010000\n",
+      "2021-09-21 21:32:38,827 epoch 8 - iter 36/41 - loss 0.29299934 - samples/sec: 15.65 - lr: 0.010000\n",
+      "2021-09-21 21:32:39,108 epoch 8 - iter 40/41 - loss 0.27725772 - samples/sec: 14.25 - lr: 0.010000\n",
+      "2021-09-21 21:32:39,172 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:39,173 EPOCH 8 done: loss 0.2708 - lr 0.0100000\n",
+      "2021-09-21 21:32:39,280 DEV : loss 0.3452635109424591 - score 0.75\n",
+      "2021-09-21 21:32:39,281 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:59:14,346 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:14,764 epoch 7 - iter 4/41 - loss 0.21786834 - samples/sec: 10.59 - lr: 0.020000\n",
-      "2021-09-08 01:59:15,019 epoch 7 - iter 8/41 - loss 0.16237477 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 01:59:15,283 epoch 7 - iter 12/41 - loss 0.14700637 - samples/sec: 15.17 - lr: 0.020000\n",
-      "2021-09-08 01:59:15,592 epoch 7 - iter 16/41 - loss 0.16198171 - samples/sec: 12.97 - lr: 0.020000\n",
-      "2021-09-08 01:59:16,015 epoch 7 - iter 20/41 - loss 0.15013268 - samples/sec: 9.48 - lr: 0.020000\n",
-      "2021-09-08 01:59:16,391 epoch 7 - iter 24/41 - loss 0.12742287 - samples/sec: 10.67 - lr: 0.020000\n",
-      "2021-09-08 01:59:16,750 epoch 7 - iter 28/41 - loss 0.22141445 - samples/sec: 11.14 - lr: 0.020000\n",
-      "2021-09-08 01:59:16,986 epoch 7 - iter 32/41 - loss 0.23998186 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 01:59:17,287 epoch 7 - iter 36/41 - loss 0.22754556 - samples/sec: 13.32 - lr: 0.020000\n",
-      "2021-09-08 01:59:17,549 epoch 7 - iter 40/41 - loss 0.32392258 - samples/sec: 15.33 - lr: 0.020000\n",
-      "2021-09-08 01:59:17,594 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:17,594 EPOCH 7 done: loss 0.3164 - lr 0.0200000\n",
-      "2021-09-08 01:59:17,821 DEV : loss 0.5175986886024475 - score 0.5\n",
-      "2021-09-08 01:59:17,822 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:59:17,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:18,136 epoch 8 - iter 4/41 - loss 0.12743232 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 01:59:18,430 epoch 8 - iter 8/41 - loss 0.17077334 - samples/sec: 13.67 - lr: 0.020000\n",
-      "2021-09-08 01:59:18,705 epoch 8 - iter 12/41 - loss 0.14720331 - samples/sec: 14.54 - lr: 0.020000\n",
-      "2021-09-08 01:59:19,124 epoch 8 - iter 16/41 - loss 0.11722941 - samples/sec: 9.57 - lr: 0.020000\n",
-      "2021-09-08 01:59:19,420 epoch 8 - iter 20/41 - loss 0.09661468 - samples/sec: 13.53 - lr: 0.020000\n",
-      "2021-09-08 01:59:19,679 epoch 8 - iter 24/41 - loss 0.13932660 - samples/sec: 15.50 - lr: 0.020000\n",
-      "2021-09-08 01:59:19,955 epoch 8 - iter 28/41 - loss 0.25665995 - samples/sec: 14.54 - lr: 0.020000\n",
-      "2021-09-08 01:59:20,201 epoch 8 - iter 32/41 - loss 0.31748111 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 01:59:20,411 epoch 8 - iter 36/41 - loss 0.34640206 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:59:20,692 epoch 8 - iter 40/41 - loss 0.31788128 - samples/sec: 14.25 - lr: 0.020000\n",
-      "2021-09-08 01:59:20,753 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:20,753 EPOCH 8 done: loss 0.3136 - lr 0.0200000\n",
-      "2021-09-08 01:59:21,111 DEV : loss 1.0292963981628418 - score 0.5\n",
-      "2021-09-08 01:59:21,112 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:59:21,115 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:21,409 epoch 9 - iter 4/41 - loss 0.54567210 - samples/sec: 14.87 - lr: 0.020000\n",
-      "2021-09-08 01:59:21,696 epoch 9 - iter 8/41 - loss 0.47487062 - samples/sec: 13.95 - lr: 0.020000\n",
-      "2021-09-08 01:59:21,970 epoch 9 - iter 12/41 - loss 0.39383576 - samples/sec: 14.66 - lr: 0.020000\n",
-      "2021-09-08 01:59:22,333 epoch 9 - iter 16/41 - loss 0.39104196 - samples/sec: 11.03 - lr: 0.020000\n",
-      "2021-09-08 01:59:22,625 epoch 9 - iter 20/41 - loss 0.34481124 - samples/sec: 13.74 - lr: 0.020000\n",
-      "2021-09-08 01:59:22,929 epoch 9 - iter 24/41 - loss 0.30359352 - samples/sec: 13.17 - lr: 0.020000\n",
-      "2021-09-08 01:59:23,495 epoch 9 - iter 28/41 - loss 0.26236627 - samples/sec: 7.08 - lr: 0.020000\n",
-      "2021-09-08 01:59:23,763 epoch 9 - iter 32/41 - loss 0.31917303 - samples/sec: 14.98 - lr: 0.020000\n",
-      "2021-09-08 01:59:24,101 epoch 9 - iter 36/41 - loss 0.32085532 - samples/sec: 11.86 - lr: 0.020000\n",
-      "2021-09-08 01:59:24,431 epoch 9 - iter 40/41 - loss 0.29285384 - samples/sec: 12.11 - lr: 0.020000\n",
-      "2021-09-08 01:59:24,517 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:24,517 EPOCH 9 done: loss 0.3001 - lr 0.0200000\n",
-      "2021-09-08 01:59:24,645 DEV : loss 1.0605953931808472 - score 0.75\n",
-      "2021-09-08 01:59:24,646 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:59:24,648 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:24,977 epoch 10 - iter 4/41 - loss 0.38167935 - samples/sec: 12.96 - lr: 0.020000\n",
-      "2021-09-08 01:59:25,242 epoch 10 - iter 8/41 - loss 0.19859060 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 01:59:25,537 epoch 10 - iter 12/41 - loss 0.16538840 - samples/sec: 13.59 - lr: 0.020000\n",
-      "2021-09-08 01:59:25,849 epoch 10 - iter 16/41 - loss 0.12469192 - samples/sec: 12.84 - lr: 0.020000\n",
-      "2021-09-08 01:59:26,082 epoch 10 - iter 20/41 - loss 0.17720114 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 01:59:26,370 epoch 10 - iter 24/41 - loss 0.15359254 - samples/sec: 13.90 - lr: 0.020000\n",
-      "2021-09-08 01:59:26,687 epoch 10 - iter 28/41 - loss 0.17252924 - samples/sec: 12.64 - lr: 0.020000\n",
-      "2021-09-08 01:59:26,975 epoch 10 - iter 32/41 - loss 0.16803301 - samples/sec: 13.92 - lr: 0.020000\n",
-      "2021-09-08 01:59:27,352 epoch 10 - iter 36/41 - loss 0.21047557 - samples/sec: 10.64 - lr: 0.020000\n",
-      "2021-09-08 01:59:27,692 epoch 10 - iter 40/41 - loss 0.19288270 - samples/sec: 11.78 - lr: 0.020000\n",
-      "2021-09-08 01:59:27,950 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:27,950 EPOCH 10 done: loss 0.1885 - lr 0.0200000\n",
-      "2021-09-08 01:59:28,070 DEV : loss 0.9269056916236877 - score 0.75\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:59:28,071 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:59:32,224 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:32,224 Testing using best model ...\n",
-      "2021-09-08 01:59:32,226 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:32:43,081 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:43,332 epoch 9 - iter 4/41 - loss 0.18560294 - samples/sec: 17.46 - lr: 0.010000\n",
+      "2021-09-21 21:32:43,583 epoch 9 - iter 8/41 - loss 0.28061574 - samples/sec: 15.97 - lr: 0.010000\n",
+      "2021-09-21 21:32:43,870 epoch 9 - iter 12/41 - loss 0.36382707 - samples/sec: 13.96 - lr: 0.010000\n",
+      "2021-09-21 21:32:44,193 epoch 9 - iter 16/41 - loss 0.38423819 - samples/sec: 12.39 - lr: 0.010000\n",
+      "2021-09-21 21:32:44,466 epoch 9 - iter 20/41 - loss 0.35276567 - samples/sec: 14.69 - lr: 0.010000\n",
+      "2021-09-21 21:32:44,723 epoch 9 - iter 24/41 - loss 0.33438884 - samples/sec: 15.63 - lr: 0.010000\n",
+      "2021-09-21 21:32:44,960 epoch 9 - iter 28/41 - loss 0.30518574 - samples/sec: 16.93 - lr: 0.010000\n",
+      "2021-09-21 21:32:45,289 epoch 9 - iter 32/41 - loss 0.32730184 - samples/sec: 12.17 - lr: 0.010000\n",
+      "2021-09-21 21:32:45,560 epoch 9 - iter 36/41 - loss 0.33590253 - samples/sec: 14.79 - lr: 0.010000\n",
+      "2021-09-21 21:32:45,862 epoch 9 - iter 40/41 - loss 0.32800844 - samples/sec: 13.24 - lr: 0.010000\n",
+      "2021-09-21 21:32:45,930 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:45,931 EPOCH 9 done: loss 0.3207 - lr 0.0100000\n",
+      "2021-09-21 21:32:46,040 DEV : loss 0.5220744609832764 - score 0.5\n",
+      "2021-09-21 21:32:46,040 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:32:46,042 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:46,310 epoch 10 - iter 4/41 - loss 0.28102246 - samples/sec: 16.88 - lr: 0.010000\n",
+      "2021-09-21 21:32:46,604 epoch 10 - iter 8/41 - loss 0.40117879 - samples/sec: 13.63 - lr: 0.010000\n",
+      "2021-09-21 21:32:46,924 epoch 10 - iter 12/41 - loss 0.37890091 - samples/sec: 12.52 - lr: 0.010000\n",
+      "2021-09-21 21:32:47,195 epoch 10 - iter 16/41 - loss 0.37881584 - samples/sec: 14.78 - lr: 0.010000\n",
+      "2021-09-21 21:32:47,444 epoch 10 - iter 20/41 - loss 0.40141865 - samples/sec: 16.11 - lr: 0.010000\n",
+      "2021-09-21 21:32:47,748 epoch 10 - iter 24/41 - loss 0.33729769 - samples/sec: 13.17 - lr: 0.010000\n",
+      "2021-09-21 21:32:48,073 epoch 10 - iter 28/41 - loss 0.30917875 - samples/sec: 12.34 - lr: 0.010000\n",
+      "2021-09-21 21:32:48,305 epoch 10 - iter 32/41 - loss 0.30279237 - samples/sec: 17.24 - lr: 0.010000\n",
+      "2021-09-21 21:32:48,562 epoch 10 - iter 36/41 - loss 0.27293948 - samples/sec: 15.63 - lr: 0.010000\n",
+      "2021-09-21 21:32:48,811 epoch 10 - iter 40/41 - loss 0.29678924 - samples/sec: 16.11 - lr: 0.010000\n",
+      "2021-09-21 21:32:48,890 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:48,890 EPOCH 10 done: loss 0.2897 - lr 0.0100000\n",
+      "2021-09-21 21:32:49,006 DEV : loss 0.31221136450767517 - score 0.5\n",
+      "2021-09-21 21:32:49,007 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:32:53,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:32:53,022 Testing using best model ...\n",
+      "2021-09-21 21:32:53,023 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:59:37,580 \t0.4\n",
-      "2021-09-08 01:59:37,581 \n",
+      "2021-09-21 21:32:58,446 \t0.2\n",
+      "2021-09-21 21:32:58,447 \n",
       "Results:\n",
-      "- F-score (micro) 0.4\n",
-      "- F-score (macro) 0.3333\n",
-      "- Accuracy 0.4\n",
+      "- F-score (micro) 0.2\n",
+      "- F-score (macro) 0.1\n",
+      "- Accuracy 0.2\n",
       "\n",
       "By class:\n",
       "                                                                      precision    recall  f1-score   support\n",
       "\n",
-      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         0\n",
-      "that which is below standard or expectations as of ethics or decency     0.0000    0.0000    0.0000         0\n",
-      "                        that which is pleasing or valuable or useful     0.0000    0.0000    0.0000         3\n",
-      "  remarkable or out of the ordinary in degree or magnitude or effect     0.5000    1.0000    0.6667         1\n",
-      "                                    exceptionally bad or displeasing     1.0000    1.0000    1.0000         1\n",
+      "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         1\n",
+      "that which is below standard or expectations as of ethics or decency     0.0000    0.0000    0.0000         2\n",
+      "                        that which is pleasing or valuable or useful     0.3333    1.0000    0.5000         1\n",
+      "  remarkable or out of the ordinary in degree or magnitude or effect     0.0000    0.0000    0.0000         1\n",
+      "                                    exceptionally bad or displeasing     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                                           micro avg     0.4000    0.4000    0.4000         5\n",
-      "                                                           macro avg     0.3000    0.4000    0.3333         5\n",
-      "                                                        weighted avg     0.3000    0.4000    0.3333         5\n",
-      "                                                         samples avg     0.4000    0.4000    0.4000         5\n",
+      "                                                           micro avg     0.2000    0.2000    0.2000         5\n",
+      "                                                           macro avg     0.0667    0.2000    0.1000         5\n",
+      "                                                        weighted avg     0.0667    0.2000    0.1000         5\n",
+      "                                                         samples avg     0.2000    0.2000    0.2000         5\n",
       "\n",
-      "2021-09-08 01:59:37,581 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:31,070 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 21:32:58,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:43,541 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:00:35,248 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:33:47,470 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 46/46 [00:00<00:00, 17383.37it/s]"
+      "100%|██████████| 46/46 [00:00<00:00, 20792.97it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:35,252 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 02:00:35,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:35,264 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:33:47,474 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 21:33:47,486 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:47,487 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8281,25 +8300,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:35,264 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:35,265 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
-      "2021-09-08 02:00:35,265 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:35,265 Parameters:\n",
-      "2021-09-08 02:00:35,265  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:00:35,266  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:00:35,266  - patience: \"3\"\n",
-      "2021-09-08 02:00:35,266  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:00:35,267  - max_epochs: \"10\"\n",
-      "2021-09-08 02:00:35,267  - shuffle: \"True\"\n",
-      "2021-09-08 02:00:35,267  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:00:35,267  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:00:35,268 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:35,268 Model training base path: \"temp1\"\n",
-      "2021-09-08 02:00:35,268 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:35,269 Device: cuda:0\n",
-      "2021-09-08 02:00:35,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:35,269 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:00:35,276 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:33:47,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:47,489 Corpus: \"Corpus: 41 train + 4 dev + 5 test sentences\"\n",
+      "2021-09-21 21:33:47,489 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:47,489 Parameters:\n",
+      "2021-09-21 21:33:47,490  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:33:47,490  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:33:47,490  - patience: \"3\"\n",
+      "2021-09-21 21:33:47,490  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:33:47,491  - max_epochs: \"10\"\n",
+      "2021-09-21 21:33:47,491  - shuffle: \"True\"\n",
+      "2021-09-21 21:33:47,491  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:33:47,492  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:33:47,492 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:47,492 Model training base path: \"temp1\"\n",
+      "2021-09-21 21:33:47,493 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:47,493 Device: cuda:0\n",
+      "2021-09-21 21:33:47,493 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:47,494 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:33:47,500 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -8313,172 +8332,170 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:35,575 epoch 1 - iter 4/41 - loss 0.78347725 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 02:00:35,942 epoch 1 - iter 8/41 - loss 0.51947204 - samples/sec: 10.92 - lr: 0.020000\n",
-      "2021-09-08 02:00:36,278 epoch 1 - iter 12/41 - loss 0.64482010 - samples/sec: 11.91 - lr: 0.020000\n",
-      "2021-09-08 02:00:36,569 epoch 1 - iter 16/41 - loss 0.55978363 - samples/sec: 13.80 - lr: 0.020000\n",
-      "2021-09-08 02:00:36,817 epoch 1 - iter 20/41 - loss 0.65917711 - samples/sec: 16.12 - lr: 0.020000\n",
-      "2021-09-08 02:00:37,161 epoch 1 - iter 24/41 - loss 0.65065153 - samples/sec: 11.65 - lr: 0.020000\n",
-      "2021-09-08 02:00:37,401 epoch 1 - iter 28/41 - loss 0.65223933 - samples/sec: 16.73 - lr: 0.020000\n",
-      "2021-09-08 02:00:37,643 epoch 1 - iter 32/41 - loss 0.64914173 - samples/sec: 16.54 - lr: 0.020000\n",
-      "2021-09-08 02:00:37,955 epoch 1 - iter 36/41 - loss 0.65956583 - samples/sec: 12.85 - lr: 0.020000\n",
-      "2021-09-08 02:00:38,254 epoch 1 - iter 40/41 - loss 0.67530852 - samples/sec: 13.41 - lr: 0.020000\n",
-      "2021-09-08 02:00:38,322 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:38,322 EPOCH 1 done: loss 0.6784 - lr 0.0200000\n",
-      "2021-09-08 02:00:38,580 DEV : loss 0.5400399565696716 - score 0.25\n",
-      "2021-09-08 02:00:38,581 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:33:47,773 epoch 1 - iter 4/41 - loss 0.68386428 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:33:47,994 epoch 1 - iter 8/41 - loss 0.57316270 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 21:33:48,218 epoch 1 - iter 12/41 - loss 0.63719612 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:33:48,419 epoch 1 - iter 16/41 - loss 0.55916871 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 21:33:48,629 epoch 1 - iter 20/41 - loss 0.59517972 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:33:48,821 epoch 1 - iter 24/41 - loss 0.66398732 - samples/sec: 20.85 - lr: 0.020000\n",
+      "2021-09-21 21:33:49,011 epoch 1 - iter 28/41 - loss 0.65080045 - samples/sec: 21.10 - lr: 0.020000\n",
+      "2021-09-21 21:33:49,202 epoch 1 - iter 32/41 - loss 0.67287817 - samples/sec: 21.09 - lr: 0.020000\n",
+      "2021-09-21 21:33:49,403 epoch 1 - iter 36/41 - loss 0.63829940 - samples/sec: 19.90 - lr: 0.020000\n",
+      "2021-09-21 21:33:49,586 epoch 1 - iter 40/41 - loss 0.57857108 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 21:33:49,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:49,632 EPOCH 1 done: loss 0.5651 - lr 0.0200000\n",
+      "2021-09-21 21:33:49,805 DEV : loss 0.502212405204773 - score 0.5\n",
+      "2021-09-21 21:33:49,806 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:00:50,980 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,241 epoch 2 - iter 4/41 - loss 0.76547751 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 02:00:51,483 epoch 2 - iter 8/41 - loss 0.80255119 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 02:00:51,860 epoch 2 - iter 12/41 - loss 0.77813106 - samples/sec: 10.61 - lr: 0.020000\n",
-      "2021-09-08 02:00:52,178 epoch 2 - iter 16/41 - loss 0.75976481 - samples/sec: 12.63 - lr: 0.020000\n",
-      "2021-09-08 02:00:52,443 epoch 2 - iter 20/41 - loss 0.74416137 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 02:00:52,732 epoch 2 - iter 24/41 - loss 0.74399829 - samples/sec: 13.87 - lr: 0.020000\n",
-      "2021-09-08 02:00:53,044 epoch 2 - iter 28/41 - loss 0.72640888 - samples/sec: 12.85 - lr: 0.020000\n",
-      "2021-09-08 02:00:53,446 epoch 2 - iter 32/41 - loss 0.71476969 - samples/sec: 9.98 - lr: 0.020000\n",
-      "2021-09-08 02:00:53,918 epoch 2 - iter 36/41 - loss 0.71578592 - samples/sec: 8.49 - lr: 0.020000\n",
-      "2021-09-08 02:00:54,195 epoch 2 - iter 40/41 - loss 0.71246586 - samples/sec: 14.48 - lr: 0.020000\n",
-      "2021-09-08 02:00:54,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:54,281 EPOCH 2 done: loss 0.7124 - lr 0.0200000\n",
-      "2021-09-08 02:00:54,432 DEV : loss 0.7203139066696167 - score 0.25\n",
-      "2021-09-08 02:00:54,434 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:00:54,449 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:54,732 epoch 3 - iter 4/41 - loss 0.65686807 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 02:00:54,988 epoch 3 - iter 8/41 - loss 0.65870718 - samples/sec: 15.68 - lr: 0.020000\n",
-      "2021-09-08 02:00:55,284 epoch 3 - iter 12/41 - loss 0.68150533 - samples/sec: 13.52 - lr: 0.020000\n",
-      "2021-09-08 02:00:55,698 epoch 3 - iter 16/41 - loss 0.67682751 - samples/sec: 9.69 - lr: 0.020000\n",
-      "2021-09-08 02:00:55,967 epoch 3 - iter 20/41 - loss 0.66719269 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 02:00:56,407 epoch 3 - iter 24/41 - loss 0.67139850 - samples/sec: 9.11 - lr: 0.020000\n",
-      "2021-09-08 02:00:56,745 epoch 3 - iter 28/41 - loss 0.66451663 - samples/sec: 11.85 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,088 epoch 3 - iter 32/41 - loss 0.65876733 - samples/sec: 11.68 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,406 epoch 3 - iter 36/41 - loss 0.65692212 - samples/sec: 12.58 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,684 epoch 3 - iter 40/41 - loss 0.65493785 - samples/sec: 14.43 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,786 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:57,787 EPOCH 3 done: loss 0.6526 - lr 0.0200000\n",
-      "2021-09-08 02:00:57,952 DEV : loss 0.5013717412948608 - score 0.0\n",
-      "2021-09-08 02:00:57,953 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:00:57,970 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:58,495 epoch 4 - iter 4/41 - loss 0.66006491 - samples/sec: 7.94 - lr: 0.020000\n",
-      "2021-09-08 02:00:58,785 epoch 4 - iter 8/41 - loss 0.65780056 - samples/sec: 13.85 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,133 epoch 4 - iter 12/41 - loss 0.67858150 - samples/sec: 11.52 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,462 epoch 4 - iter 16/41 - loss 0.67369272 - samples/sec: 12.15 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,779 epoch 4 - iter 20/41 - loss 0.66240871 - samples/sec: 12.65 - lr: 0.020000\n",
-      "2021-09-08 02:01:00,037 epoch 4 - iter 24/41 - loss 0.65193431 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 02:01:00,292 epoch 4 - iter 28/41 - loss 0.65382184 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 02:01:00,530 epoch 4 - iter 32/41 - loss 0.66274834 - samples/sec: 16.85 - lr: 0.020000\n",
-      "2021-09-08 02:01:00,734 epoch 4 - iter 36/41 - loss 0.66299629 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 02:01:01,031 epoch 4 - iter 40/41 - loss 0.66472970 - samples/sec: 13.50 - lr: 0.020000\n",
-      "2021-09-08 02:01:01,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:01,088 EPOCH 4 done: loss 0.6651 - lr 0.0200000\n",
-      "2021-09-08 02:01:01,308 DEV : loss 0.5795559883117676 - score 0.25\n",
-      "2021-09-08 02:01:01,309 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:01:01,312 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:01,580 epoch 5 - iter 4/41 - loss 0.58674063 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 02:01:01,956 epoch 5 - iter 8/41 - loss 0.62022258 - samples/sec: 10.68 - lr: 0.020000\n",
-      "2021-09-08 02:01:02,198 epoch 5 - iter 12/41 - loss 0.63608657 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 02:01:02,409 epoch 5 - iter 16/41 - loss 0.63116100 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 02:01:02,716 epoch 5 - iter 20/41 - loss 0.63660936 - samples/sec: 13.02 - lr: 0.020000\n",
-      "2021-09-08 02:01:02,927 epoch 5 - iter 24/41 - loss 0.63644734 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 02:01:03,232 epoch 5 - iter 28/41 - loss 0.63948566 - samples/sec: 13.11 - lr: 0.020000\n",
-      "2021-09-08 02:01:03,562 epoch 5 - iter 32/41 - loss 0.63720671 - samples/sec: 12.16 - lr: 0.020000\n",
-      "2021-09-08 02:01:03,773 epoch 5 - iter 36/41 - loss 0.64307459 - samples/sec: 19.02 - lr: 0.020000\n",
-      "2021-09-08 02:01:04,033 epoch 5 - iter 40/41 - loss 0.64272973 - samples/sec: 15.39 - lr: 0.020000\n",
-      "2021-09-08 02:01:04,083 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:04,083 EPOCH 5 done: loss 0.6413 - lr 0.0200000\n",
-      "2021-09-08 02:01:07,357 DEV : loss 0.5667296648025513 - score 0.0\n",
+      "2021-09-21 21:33:55,626 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:55,830 epoch 2 - iter 4/41 - loss 0.43164983 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 21:33:56,046 epoch 2 - iter 8/41 - loss 0.56691620 - samples/sec: 18.55 - lr: 0.020000\n",
+      "2021-09-21 21:33:56,249 epoch 2 - iter 12/41 - loss 0.57293377 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 21:33:56,446 epoch 2 - iter 16/41 - loss 0.61380061 - samples/sec: 20.44 - lr: 0.020000\n",
+      "2021-09-21 21:33:56,642 epoch 2 - iter 20/41 - loss 0.68475145 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 21:33:56,899 epoch 2 - iter 24/41 - loss 0.64607958 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 21:33:57,113 epoch 2 - iter 28/41 - loss 0.62676934 - samples/sec: 18.81 - lr: 0.020000\n",
+      "2021-09-21 21:33:57,318 epoch 2 - iter 32/41 - loss 0.57945109 - samples/sec: 19.48 - lr: 0.020000\n",
+      "2021-09-21 21:33:57,535 epoch 2 - iter 36/41 - loss 0.58797789 - samples/sec: 18.56 - lr: 0.020000\n",
+      "2021-09-21 21:33:57,727 epoch 2 - iter 40/41 - loss 0.60904408 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 21:33:57,772 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:57,773 EPOCH 2 done: loss 0.5946 - lr 0.0200000\n",
+      "2021-09-21 21:33:57,953 DEV : loss 1.0090711116790771 - score 0.5\n",
+      "2021-09-21 21:33:57,954 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:33:58,031 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:33:58,248 epoch 3 - iter 4/41 - loss 0.70353531 - samples/sec: 19.63 - lr: 0.020000\n",
+      "2021-09-21 21:33:58,453 epoch 3 - iter 8/41 - loss 0.51051763 - samples/sec: 19.58 - lr: 0.020000\n",
+      "2021-09-21 21:33:58,664 epoch 3 - iter 12/41 - loss 0.39985222 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 21:33:58,882 epoch 3 - iter 16/41 - loss 0.38005048 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 21:33:59,071 epoch 3 - iter 20/41 - loss 0.41006987 - samples/sec: 21.22 - lr: 0.020000\n",
+      "2021-09-21 21:33:59,258 epoch 3 - iter 24/41 - loss 0.43531354 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 21:33:59,493 epoch 3 - iter 28/41 - loss 0.40224203 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 21:33:59,691 epoch 3 - iter 32/41 - loss 0.41782338 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 21:33:59,950 epoch 3 - iter 36/41 - loss 0.43884217 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:34:00,236 epoch 3 - iter 40/41 - loss 0.44644643 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 21:34:00,319 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:00,319 EPOCH 3 done: loss 0.4402 - lr 0.0200000\n",
+      "2021-09-21 21:34:00,448 DEV : loss 0.9564387798309326 - score 0.0\n",
+      "2021-09-21 21:34:00,448 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:34:00,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:00,753 epoch 4 - iter 4/41 - loss 0.70016090 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:34:01,082 epoch 4 - iter 8/41 - loss 0.44498220 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 21:34:01,329 epoch 4 - iter 12/41 - loss 0.42846344 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 21:34:01,610 epoch 4 - iter 16/41 - loss 0.42822198 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 21:34:01,870 epoch 4 - iter 20/41 - loss 0.37276500 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 21:34:02,159 epoch 4 - iter 24/41 - loss 0.36403462 - samples/sec: 13.88 - lr: 0.020000\n",
+      "2021-09-21 21:34:02,445 epoch 4 - iter 28/41 - loss 0.46071148 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 21:34:02,778 epoch 4 - iter 32/41 - loss 0.41690868 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 21:34:03,070 epoch 4 - iter 36/41 - loss 0.40838926 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 21:34:03,341 epoch 4 - iter 40/41 - loss 0.43434471 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 21:34:03,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:03,396 EPOCH 4 done: loss 0.4250 - lr 0.0200000\n",
+      "2021-09-21 21:34:03,517 DEV : loss 1.0582847595214844 - score 0.25\n",
+      "2021-09-21 21:34:03,518 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:34:03,520 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:03,828 epoch 5 - iter 4/41 - loss 0.43421082 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 21:34:04,105 epoch 5 - iter 8/41 - loss 0.32224846 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:34:04,321 epoch 5 - iter 12/41 - loss 0.27345944 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 21:34:04,510 epoch 5 - iter 16/41 - loss 0.24334782 - samples/sec: 21.20 - lr: 0.020000\n",
+      "2021-09-21 21:34:04,705 epoch 5 - iter 20/41 - loss 0.26783937 - samples/sec: 20.57 - lr: 0.020000\n",
+      "2021-09-21 21:34:04,913 epoch 5 - iter 24/41 - loss 0.31585002 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 21:34:05,120 epoch 5 - iter 28/41 - loss 0.30551863 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 21:34:05,361 epoch 5 - iter 32/41 - loss 0.29334944 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 21:34:05,545 epoch 5 - iter 36/41 - loss 0.28831222 - samples/sec: 21.79 - lr: 0.020000\n",
+      "2021-09-21 21:34:05,765 epoch 5 - iter 40/41 - loss 0.33759693 - samples/sec: 18.20 - lr: 0.020000\n",
+      "2021-09-21 21:34:05,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:05,816 EPOCH 5 done: loss 0.3430 - lr 0.0200000\n",
+      "2021-09-21 21:34:05,989 DEV : loss 1.1785340309143066 - score 0.25\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:01:07,358 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:01:07,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:07,733 epoch 6 - iter 4/41 - loss 0.64264752 - samples/sec: 13.31 - lr: 0.010000\n",
-      "2021-09-08 02:01:08,032 epoch 6 - iter 8/41 - loss 0.62545754 - samples/sec: 13.39 - lr: 0.010000\n",
-      "2021-09-08 02:01:08,418 epoch 6 - iter 12/41 - loss 0.62553362 - samples/sec: 10.39 - lr: 0.010000\n",
-      "2021-09-08 02:01:08,794 epoch 6 - iter 16/41 - loss 0.61889667 - samples/sec: 10.66 - lr: 0.010000\n",
-      "2021-09-08 02:01:09,070 epoch 6 - iter 20/41 - loss 0.62429485 - samples/sec: 14.52 - lr: 0.010000\n",
-      "2021-09-08 02:01:09,308 epoch 6 - iter 24/41 - loss 0.62893382 - samples/sec: 16.81 - lr: 0.010000\n",
-      "2021-09-08 02:01:09,632 epoch 6 - iter 28/41 - loss 0.62789044 - samples/sec: 12.38 - lr: 0.010000\n",
-      "2021-09-08 02:01:09,871 epoch 6 - iter 32/41 - loss 0.62841083 - samples/sec: 16.81 - lr: 0.010000\n",
-      "2021-09-08 02:01:10,127 epoch 6 - iter 36/41 - loss 0.62472733 - samples/sec: 15.63 - lr: 0.010000\n",
-      "2021-09-08 02:01:10,540 epoch 6 - iter 40/41 - loss 0.62459370 - samples/sec: 9.70 - lr: 0.010000\n",
-      "2021-09-08 02:01:10,601 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:34:05,990 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:34:06,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:06,282 epoch 6 - iter 4/41 - loss 0.27406964 - samples/sec: 20.07 - lr: 0.010000\n",
+      "2021-09-21 21:34:06,529 epoch 6 - iter 8/41 - loss 0.19048840 - samples/sec: 16.20 - lr: 0.010000\n",
+      "2021-09-21 21:34:06,711 epoch 6 - iter 12/41 - loss 0.17161071 - samples/sec: 21.99 - lr: 0.010000\n",
+      "2021-09-21 21:34:06,923 epoch 6 - iter 16/41 - loss 0.21643626 - samples/sec: 19.00 - lr: 0.010000\n",
+      "2021-09-21 21:34:07,125 epoch 6 - iter 20/41 - loss 0.27042540 - samples/sec: 19.79 - lr: 0.010000\n",
+      "2021-09-21 21:34:07,316 epoch 6 - iter 24/41 - loss 0.28313526 - samples/sec: 21.05 - lr: 0.010000\n",
+      "2021-09-21 21:34:07,501 epoch 6 - iter 28/41 - loss 0.28248213 - samples/sec: 21.62 - lr: 0.010000\n",
+      "2021-09-21 21:34:07,695 epoch 6 - iter 32/41 - loss 0.27103271 - samples/sec: 20.71 - lr: 0.010000\n",
+      "2021-09-21 21:34:07,908 epoch 6 - iter 36/41 - loss 0.28040458 - samples/sec: 18.80 - lr: 0.010000\n",
+      "2021-09-21 21:34:08,132 epoch 6 - iter 40/41 - loss 0.28009408 - samples/sec: 17.94 - lr: 0.010000\n",
+      "2021-09-21 21:34:08,186 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:01:10,602 EPOCH 6 done: loss 0.6244 - lr 0.0100000\n",
-      "2021-09-08 02:01:10,814 DEV : loss 0.5654149055480957 - score 0.0\n",
-      "2021-09-08 02:01:10,815 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:01:10,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:11,184 epoch 7 - iter 4/41 - loss 0.62763906 - samples/sec: 12.63 - lr: 0.010000\n",
-      "2021-09-08 02:01:11,456 epoch 7 - iter 8/41 - loss 0.65814080 - samples/sec: 14.72 - lr: 0.010000\n",
-      "2021-09-08 02:01:11,813 epoch 7 - iter 12/41 - loss 0.65962258 - samples/sec: 11.22 - lr: 0.010000\n",
-      "2021-09-08 02:01:12,139 epoch 7 - iter 16/41 - loss 0.65694210 - samples/sec: 12.30 - lr: 0.010000\n",
-      "2021-09-08 02:01:12,455 epoch 7 - iter 20/41 - loss 0.65955752 - samples/sec: 12.68 - lr: 0.010000\n",
-      "2021-09-08 02:01:12,732 epoch 7 - iter 24/41 - loss 0.65289782 - samples/sec: 14.48 - lr: 0.010000\n",
-      "2021-09-08 02:01:13,096 epoch 7 - iter 28/41 - loss 0.66133147 - samples/sec: 11.01 - lr: 0.010000\n",
-      "2021-09-08 02:01:13,335 epoch 7 - iter 32/41 - loss 0.66943952 - samples/sec: 16.80 - lr: 0.010000\n",
-      "2021-09-08 02:01:13,584 epoch 7 - iter 36/41 - loss 0.67245064 - samples/sec: 16.05 - lr: 0.010000\n",
-      "2021-09-08 02:01:13,916 epoch 7 - iter 40/41 - loss 0.66830252 - samples/sec: 12.10 - lr: 0.010000\n",
-      "2021-09-08 02:01:14,126 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:14,126 EPOCH 7 done: loss 0.6666 - lr 0.0100000\n",
-      "2021-09-08 02:01:14,262 DEV : loss 0.5273861885070801 - score 0.25\n",
-      "2021-09-08 02:01:14,263 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:01:26,212 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:26,554 epoch 8 - iter 4/41 - loss 0.62878971 - samples/sec: 12.51 - lr: 0.010000\n",
-      "2021-09-08 02:01:26,866 epoch 8 - iter 8/41 - loss 0.65379786 - samples/sec: 12.84 - lr: 0.010000\n",
-      "2021-09-08 02:01:27,097 epoch 8 - iter 12/41 - loss 0.67156335 - samples/sec: 17.34 - lr: 0.010000\n",
-      "2021-09-08 02:01:27,351 epoch 8 - iter 16/41 - loss 0.66193357 - samples/sec: 15.82 - lr: 0.010000\n",
-      "2021-09-08 02:01:27,616 epoch 8 - iter 20/41 - loss 0.66001766 - samples/sec: 15.15 - lr: 0.010000\n",
-      "2021-09-08 02:01:27,841 epoch 8 - iter 24/41 - loss 0.66108724 - samples/sec: 17.81 - lr: 0.010000\n",
-      "2021-09-08 02:01:28,161 epoch 8 - iter 28/41 - loss 0.66014631 - samples/sec: 12.50 - lr: 0.010000\n",
-      "2021-09-08 02:01:28,362 epoch 8 - iter 32/41 - loss 0.65026895 - samples/sec: 20.02 - lr: 0.010000\n",
-      "2021-09-08 02:01:28,677 epoch 8 - iter 36/41 - loss 0.64280377 - samples/sec: 12.72 - lr: 0.010000\n",
-      "2021-09-08 02:01:28,907 epoch 8 - iter 40/41 - loss 0.64042114 - samples/sec: 17.44 - lr: 0.010000\n",
-      "2021-09-08 02:01:29,000 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:29,000 EPOCH 8 done: loss 0.6413 - lr 0.0100000\n",
-      "2021-09-08 02:01:29,421 DEV : loss 0.5337260365486145 - score 0.0\n",
-      "2021-09-08 02:01:29,422 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:01:29,430 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:29,706 epoch 9 - iter 4/41 - loss 0.58992323 - samples/sec: 16.24 - lr: 0.010000\n",
-      "2021-09-08 02:01:29,978 epoch 9 - iter 8/41 - loss 0.58325215 - samples/sec: 14.78 - lr: 0.010000\n",
-      "2021-09-08 02:01:30,272 epoch 9 - iter 12/41 - loss 0.58443275 - samples/sec: 13.61 - lr: 0.010000\n",
-      "2021-09-08 02:01:30,870 epoch 9 - iter 16/41 - loss 0.62707901 - samples/sec: 6.69 - lr: 0.010000\n",
-      "2021-09-08 02:01:31,255 epoch 9 - iter 20/41 - loss 0.62698911 - samples/sec: 10.43 - lr: 0.010000\n",
-      "2021-09-08 02:01:31,555 epoch 9 - iter 24/41 - loss 0.62796867 - samples/sec: 13.34 - lr: 0.010000\n",
-      "2021-09-08 02:01:31,806 epoch 9 - iter 28/41 - loss 0.62800445 - samples/sec: 15.96 - lr: 0.010000\n",
-      "2021-09-08 02:01:32,149 epoch 9 - iter 32/41 - loss 0.62310698 - samples/sec: 11.70 - lr: 0.010000\n",
-      "2021-09-08 02:01:32,476 epoch 9 - iter 36/41 - loss 0.62110304 - samples/sec: 12.24 - lr: 0.010000\n",
-      "2021-09-08 02:01:32,735 epoch 9 - iter 40/41 - loss 0.62122268 - samples/sec: 15.50 - lr: 0.010000\n",
-      "2021-09-08 02:01:32,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:32,793 EPOCH 9 done: loss 0.6187 - lr 0.0100000\n",
-      "2021-09-08 02:01:32,951 DEV : loss 0.6662838459014893 - score 0.5\n",
-      "2021-09-08 02:01:32,952 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:01:38,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:38,594 epoch 10 - iter 4/41 - loss 0.61718121 - samples/sec: 17.83 - lr: 0.010000\n",
-      "2021-09-08 02:01:38,931 epoch 10 - iter 8/41 - loss 0.62435616 - samples/sec: 11.89 - lr: 0.010000\n",
-      "2021-09-08 02:01:39,189 epoch 10 - iter 12/41 - loss 0.63139756 - samples/sec: 15.53 - lr: 0.010000\n",
-      "2021-09-08 02:01:40,174 epoch 10 - iter 16/41 - loss 0.64987989 - samples/sec: 18.65 - lr: 0.010000\n",
-      "2021-09-08 02:01:40,440 epoch 10 - iter 20/41 - loss 0.63779131 - samples/sec: 15.06 - lr: 0.010000\n",
-      "2021-09-08 02:01:40,668 epoch 10 - iter 24/41 - loss 0.63621197 - samples/sec: 17.63 - lr: 0.010000\n",
-      "2021-09-08 02:01:40,934 epoch 10 - iter 28/41 - loss 0.63610009 - samples/sec: 15.07 - lr: 0.010000\n",
-      "2021-09-08 02:01:41,323 epoch 10 - iter 32/41 - loss 0.63467590 - samples/sec: 10.33 - lr: 0.010000\n",
-      "2021-09-08 02:01:41,551 epoch 10 - iter 36/41 - loss 0.64041090 - samples/sec: 17.58 - lr: 0.010000\n",
-      "2021-09-08 02:01:41,845 epoch 10 - iter 40/41 - loss 0.64628833 - samples/sec: 13.64 - lr: 0.010000\n",
-      "2021-09-08 02:01:41,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:41,897 EPOCH 10 done: loss 0.6467 - lr 0.0100000\n",
-      "2021-09-08 02:01:42,415 DEV : loss 0.5296592712402344 - score 0.5\n",
-      "2021-09-08 02:01:42,416 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:01:54,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:54,608 Testing using best model ...\n",
-      "2021-09-08 02:01:54,609 loading file temp1/best-model.pt\n",
+      "2021-09-21 21:34:08,186 EPOCH 6 done: loss 0.2787 - lr 0.0100000\n",
+      "2021-09-21 21:34:08,280 DEV : loss 1.1321133375167847 - score 0.25\n",
+      "2021-09-21 21:34:08,281 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:34:08,284 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:08,568 epoch 7 - iter 4/41 - loss 0.29696838 - samples/sec: 14.84 - lr: 0.010000\n",
+      "2021-09-21 21:34:08,840 epoch 7 - iter 8/41 - loss 0.18268676 - samples/sec: 14.68 - lr: 0.010000\n",
+      "2021-09-21 21:34:09,127 epoch 7 - iter 12/41 - loss 0.24799339 - samples/sec: 13.97 - lr: 0.010000\n",
+      "2021-09-21 21:34:09,450 epoch 7 - iter 16/41 - loss 0.21919724 - samples/sec: 12.39 - lr: 0.010000\n",
+      "2021-09-21 21:34:09,707 epoch 7 - iter 20/41 - loss 0.24294927 - samples/sec: 15.65 - lr: 0.010000\n",
+      "2021-09-21 21:34:09,982 epoch 7 - iter 24/41 - loss 0.25971027 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 21:34:10,300 epoch 7 - iter 28/41 - loss 0.23174850 - samples/sec: 12.59 - lr: 0.010000\n",
+      "2021-09-21 21:34:10,541 epoch 7 - iter 32/41 - loss 0.20806054 - samples/sec: 16.62 - lr: 0.010000\n",
+      "2021-09-21 21:34:10,795 epoch 7 - iter 36/41 - loss 0.18656455 - samples/sec: 15.82 - lr: 0.010000\n",
+      "2021-09-21 21:34:11,086 epoch 7 - iter 40/41 - loss 0.17385796 - samples/sec: 13.75 - lr: 0.010000\n",
+      "2021-09-21 21:34:11,145 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:11,146 EPOCH 7 done: loss 0.1895 - lr 0.0100000\n",
+      "2021-09-21 21:34:11,262 DEV : loss 1.1273910999298096 - score 0.25\n",
+      "2021-09-21 21:34:11,262 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:34:11,264 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:11,604 epoch 8 - iter 4/41 - loss 0.51940981 - samples/sec: 12.43 - lr: 0.010000\n",
+      "2021-09-21 21:34:11,839 epoch 8 - iter 8/41 - loss 0.32503010 - samples/sec: 17.06 - lr: 0.010000\n",
+      "2021-09-21 21:34:12,098 epoch 8 - iter 12/41 - loss 0.30258265 - samples/sec: 15.48 - lr: 0.010000\n",
+      "2021-09-21 21:34:12,446 epoch 8 - iter 16/41 - loss 0.23396636 - samples/sec: 11.54 - lr: 0.010000\n",
+      "2021-09-21 21:34:12,705 epoch 8 - iter 20/41 - loss 0.19874609 - samples/sec: 15.45 - lr: 0.010000\n",
+      "2021-09-21 21:34:12,960 epoch 8 - iter 24/41 - loss 0.22676984 - samples/sec: 15.69 - lr: 0.010000\n",
+      "2021-09-21 21:34:13,214 epoch 8 - iter 28/41 - loss 0.20701157 - samples/sec: 15.78 - lr: 0.010000\n",
+      "2021-09-21 21:34:13,531 epoch 8 - iter 32/41 - loss 0.22712942 - samples/sec: 12.64 - lr: 0.010000\n",
+      "2021-09-21 21:34:13,783 epoch 8 - iter 36/41 - loss 0.20347725 - samples/sec: 15.95 - lr: 0.010000\n",
+      "2021-09-21 21:34:14,020 epoch 8 - iter 40/41 - loss 0.18344535 - samples/sec: 16.90 - lr: 0.010000\n",
+      "2021-09-21 21:34:14,131 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:14,132 EPOCH 8 done: loss 0.2007 - lr 0.0100000\n",
+      "2021-09-21 21:34:14,248 DEV : loss 1.112389087677002 - score 0.25\n",
+      "2021-09-21 21:34:14,249 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:34:14,250 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:14,575 epoch 9 - iter 4/41 - loss 0.30867705 - samples/sec: 13.06 - lr: 0.010000\n",
+      "2021-09-21 21:34:14,846 epoch 9 - iter 8/41 - loss 0.19568641 - samples/sec: 14.81 - lr: 0.010000\n",
+      "2021-09-21 21:34:15,182 epoch 9 - iter 12/41 - loss 0.21021446 - samples/sec: 11.92 - lr: 0.010000\n",
+      "2021-09-21 21:34:15,433 epoch 9 - iter 16/41 - loss 0.20578043 - samples/sec: 15.96 - lr: 0.010000\n",
+      "2021-09-21 21:34:15,738 epoch 9 - iter 20/41 - loss 0.26636472 - samples/sec: 13.12 - lr: 0.010000\n",
+      "2021-09-21 21:34:16,021 epoch 9 - iter 24/41 - loss 0.22449749 - samples/sec: 14.17 - lr: 0.010000\n",
+      "2021-09-21 21:34:16,258 epoch 9 - iter 28/41 - loss 0.19496632 - samples/sec: 16.89 - lr: 0.010000\n",
+      "2021-09-21 21:34:16,545 epoch 9 - iter 32/41 - loss 0.17958297 - samples/sec: 14.00 - lr: 0.010000\n",
+      "2021-09-21 21:34:16,821 epoch 9 - iter 36/41 - loss 0.16060029 - samples/sec: 14.54 - lr: 0.010000\n",
+      "2021-09-21 21:34:17,109 epoch 9 - iter 40/41 - loss 0.14538670 - samples/sec: 13.91 - lr: 0.010000\n",
+      "2021-09-21 21:34:17,170 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:17,170 EPOCH 9 done: loss 0.1421 - lr 0.0100000\n",
+      "2021-09-21 21:34:17,283 DEV : loss 1.3090126514434814 - score 0.25\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:34:17,284 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:34:17,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:17,641 epoch 10 - iter 4/41 - loss 0.01676910 - samples/sec: 11.84 - lr: 0.005000\n",
+      "2021-09-21 21:34:17,912 epoch 10 - iter 8/41 - loss 0.02080590 - samples/sec: 14.79 - lr: 0.005000\n",
+      "2021-09-21 21:34:18,176 epoch 10 - iter 12/41 - loss 0.06517203 - samples/sec: 15.21 - lr: 0.005000\n",
+      "2021-09-21 21:34:18,456 epoch 10 - iter 16/41 - loss 0.08201323 - samples/sec: 14.30 - lr: 0.005000\n",
+      "2021-09-21 21:34:18,740 epoch 10 - iter 20/41 - loss 0.13459479 - samples/sec: 14.09 - lr: 0.005000\n",
+      "2021-09-21 21:34:19,042 epoch 10 - iter 24/41 - loss 0.12989658 - samples/sec: 13.27 - lr: 0.005000\n",
+      "2021-09-21 21:34:19,267 epoch 10 - iter 28/41 - loss 0.11770070 - samples/sec: 17.83 - lr: 0.005000\n",
+      "2021-09-21 21:34:19,533 epoch 10 - iter 32/41 - loss 0.10353778 - samples/sec: 15.10 - lr: 0.005000\n",
+      "2021-09-21 21:34:19,783 epoch 10 - iter 36/41 - loss 0.10712005 - samples/sec: 16.00 - lr: 0.005000\n",
+      "2021-09-21 21:34:20,109 epoch 10 - iter 40/41 - loss 0.09854865 - samples/sec: 12.29 - lr: 0.005000\n",
+      "2021-09-21 21:34:20,164 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:20,164 EPOCH 10 done: loss 0.0966 - lr 0.0050000\n",
+      "2021-09-21 21:34:20,282 DEV : loss 1.2587002515792847 - score 0.25\n",
+      "2021-09-21 21:34:20,283 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:34:24,326 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:34:24,327 Testing using best model ...\n",
+      "2021-09-21 21:34:24,328 loading file temp1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:01:59,648 \t0.0\n",
-      "2021-09-08 02:01:59,648 \n",
+      "2021-09-21 21:34:29,200 \t0.0\n",
+      "2021-09-21 21:34:29,200 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -8488,18 +8505,18 @@
       "                                                                      precision    recall  f1-score   support\n",
       "\n",
       "            one who does not side with any party in a war or dispute     0.0000    0.0000    0.0000         2\n",
-      "that which is below standard or expectations as of ethics or decency     0.0000    0.0000    0.0000         1\n",
-      "                        that which is pleasing or valuable or useful     0.0000    0.0000    0.0000         0\n",
-      "  remarkable or out of the ordinary in degree or magnitude or effect     0.0000    0.0000    0.0000         1\n",
-      "                                    exceptionally bad or displeasing     0.0000    0.0000    0.0000         1\n",
+      "that which is below standard or expectations as of ethics or decency     0.0000    0.0000    0.0000         2\n",
+      "                        that which is pleasing or valuable or useful     0.0000    0.0000    0.0000         1\n",
+      "  remarkable or out of the ordinary in degree or magnitude or effect     0.0000    0.0000    0.0000         0\n",
+      "                                    exceptionally bad or displeasing     0.0000    0.0000    0.0000         0\n",
       "\n",
       "                                                           micro avg     0.0000    0.0000    0.0000         5\n",
       "                                                           macro avg     0.0000    0.0000    0.0000         5\n",
       "                                                        weighted avg     0.0000    0.0000    0.0000         5\n",
       "                                                         samples avg     0.0000    0.0000    0.0000         5\n",
       "\n",
-      "2021-09-08 02:01:59,649 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.32681851579720794\n"
+      "2021-09-21 21:34:29,201 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.3710506980161646\n"
      ]
     }
    ],
@@ -8571,11 +8588,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a310936c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.39162380602498165, 0.2233651726671565, 0.41440117560617196, 0.41734019103600295, 0.4085231447465099]\n",
+      "0.07437799742669726\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/fewshot/topic_huffpost_fewshot.ipynb b/fewshot/topic_huffpost_fewshot.ipynb
index f8efe59..08a32a4 100644
--- a/fewshot/topic_huffpost_fewshot.ipynb
+++ b/fewshot/topic_huffpost_fewshot.ipynb
@@ -37,7 +37,7 @@
    "source": [
     "# GRAKA auswählen\n",
     "import flair, torch\n",
-    "flair.device = torch.device('cuda:1') "
+    "flair.device = torch.device('cuda:0') "
    ]
   },
   {
@@ -70,25 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:30,078 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 21:51:30,920 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:53:38,134 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:51:37,681 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 11504.75it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 17808.96it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:38,145 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
-      "2021-09-08 01:53:38,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:38,153 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:51:37,688 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
+      "2021-09-21 21:51:37,701 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:37,705 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -401,25 +401,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:38,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:38,154 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 01:53:38,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:38,154 Parameters:\n",
-      "2021-09-08 01:53:38,155  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:53:38,155  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:53:38,155  - patience: \"3\"\n",
-      "2021-09-08 01:53:38,155  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:53:38,156  - max_epochs: \"10\"\n",
-      "2021-09-08 01:53:38,156  - shuffle: \"True\"\n",
-      "2021-09-08 01:53:38,156  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:53:38,157  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:53:38,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:38,157 Model training base path: \"None\"\n",
-      "2021-09-08 01:53:38,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:38,158 Device: cuda:1\n",
-      "2021-09-08 01:53:38,158 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:38,158 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:53:38,165 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:51:37,708 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:37,708 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 21:51:37,709 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:37,709 Parameters:\n",
+      "2021-09-21 21:51:37,710  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:51:37,711  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:51:37,711  - patience: \"3\"\n",
+      "2021-09-21 21:51:37,712  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:51:37,713  - max_epochs: \"10\"\n",
+      "2021-09-21 21:51:37,713  - shuffle: \"True\"\n",
+      "2021-09-21 21:51:37,714  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:51:37,715  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:51:37,715 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:37,716 Model training base path: \"None\"\n",
+      "2021-09-21 21:51:37,717 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:37,717 Device: cuda:0\n",
+      "2021-09-21 21:51:37,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:37,719 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:51:37,726 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -433,216 +433,215 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:38,585 epoch 1 - iter 8/81 - loss 0.49631763 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 01:53:39,002 epoch 1 - iter 16/81 - loss 0.40212047 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:53:39,538 epoch 1 - iter 24/81 - loss 0.44015991 - samples/sec: 14.94 - lr: 0.020000\n",
-      "2021-09-08 01:53:39,941 epoch 1 - iter 32/81 - loss 0.55122625 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 01:53:40,395 epoch 1 - iter 40/81 - loss 0.55245033 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 01:53:40,935 epoch 1 - iter 48/81 - loss 0.51910507 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 01:53:41,424 epoch 1 - iter 56/81 - loss 0.49714110 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:53:41,932 epoch 1 - iter 64/81 - loss 0.50662555 - samples/sec: 15.77 - lr: 0.020000\n",
-      "2021-09-08 01:53:42,447 epoch 1 - iter 72/81 - loss 0.53577230 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 01:53:42,965 epoch 1 - iter 80/81 - loss 0.50265367 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 01:53:43,023 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:43,080 EPOCH 1 done: loss 0.5092 - lr 0.0200000\n",
-      "2021-09-08 01:53:43,754 DEV : loss 0.22022660076618195 - score 0.6667\n",
-      "2021-09-08 01:53:43,755 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:51:38,156 epoch 1 - iter 8/81 - loss 0.61116588 - samples/sec: 19.80 - lr: 0.020000\n",
+      "2021-09-21 21:51:38,687 epoch 1 - iter 16/81 - loss 0.54071260 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 21:51:39,377 epoch 1 - iter 24/81 - loss 0.52880725 - samples/sec: 11.62 - lr: 0.020000\n",
+      "2021-09-21 21:51:39,800 epoch 1 - iter 32/81 - loss 0.52538773 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 21:51:40,174 epoch 1 - iter 40/81 - loss 0.46411006 - samples/sec: 21.42 - lr: 0.020000\n",
+      "2021-09-21 21:51:40,573 epoch 1 - iter 48/81 - loss 0.51176550 - samples/sec: 20.12 - lr: 0.020000\n",
+      "2021-09-21 21:51:41,009 epoch 1 - iter 56/81 - loss 0.45167366 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 21:51:41,431 epoch 1 - iter 64/81 - loss 0.47536695 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 21:51:41,915 epoch 1 - iter 72/81 - loss 0.48585870 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 21:51:42,316 epoch 1 - iter 80/81 - loss 0.50004204 - samples/sec: 19.98 - lr: 0.020000\n",
+      "2021-09-21 21:51:42,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:42,367 EPOCH 1 done: loss 0.5033 - lr 0.0200000\n",
+      "2021-09-21 21:51:42,730 DEV : loss 0.5760729312896729 - score 0.2222\n",
+      "2021-09-21 21:51:42,731 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:53:54,697 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:55,412 epoch 2 - iter 8/81 - loss 0.39643956 - samples/sec: 11.50 - lr: 0.020000\n",
-      "2021-09-08 01:53:55,959 epoch 2 - iter 16/81 - loss 0.49141753 - samples/sec: 14.66 - lr: 0.020000\n",
-      "2021-09-08 01:53:56,369 epoch 2 - iter 24/81 - loss 0.50509960 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:53:56,884 epoch 2 - iter 32/81 - loss 0.40953637 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:53:57,303 epoch 2 - iter 40/81 - loss 0.37161211 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 01:53:57,712 epoch 2 - iter 48/81 - loss 0.40144323 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 01:53:58,130 epoch 2 - iter 56/81 - loss 0.38527334 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 01:53:58,696 epoch 2 - iter 64/81 - loss 0.41140760 - samples/sec: 14.16 - lr: 0.020000\n",
-      "2021-09-08 01:53:59,142 epoch 2 - iter 72/81 - loss 0.39350168 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 01:53:59,543 epoch 2 - iter 80/81 - loss 0.39054997 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 01:53:59,599 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:59,599 EPOCH 2 done: loss 0.3858 - lr 0.0200000\n",
-      "2021-09-08 01:54:00,021 DEV : loss 0.3238915801048279 - score 0.7778\n",
-      "2021-09-08 01:54:00,023 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:51:46,723 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:47,345 epoch 2 - iter 8/81 - loss 0.27484679 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 21:51:48,114 epoch 2 - iter 16/81 - loss 0.37101021 - samples/sec: 10.41 - lr: 0.020000\n",
+      "2021-09-21 21:51:48,577 epoch 2 - iter 24/81 - loss 0.36261979 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 21:51:49,083 epoch 2 - iter 32/81 - loss 0.34374603 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 21:51:49,782 epoch 2 - iter 40/81 - loss 0.32375292 - samples/sec: 11.46 - lr: 0.020000\n",
+      "2021-09-21 21:51:50,543 epoch 2 - iter 48/81 - loss 0.38445090 - samples/sec: 10.52 - lr: 0.020000\n",
+      "2021-09-21 21:51:50,997 epoch 2 - iter 56/81 - loss 0.33601512 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 21:51:51,679 epoch 2 - iter 64/81 - loss 0.31761851 - samples/sec: 11.75 - lr: 0.020000\n",
+      "2021-09-21 21:51:52,198 epoch 2 - iter 72/81 - loss 0.35285584 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:51:52,721 epoch 2 - iter 80/81 - loss 0.37301161 - samples/sec: 15.30 - lr: 0.020000\n",
+      "2021-09-21 21:51:52,783 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:52,784 EPOCH 2 done: loss 0.3823 - lr 0.0200000\n",
+      "2021-09-21 21:51:53,341 DEV : loss 0.598229706287384 - score 0.3333\n",
+      "2021-09-21 21:51:53,342 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:54:04,460 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:05,027 epoch 3 - iter 8/81 - loss 0.44205943 - samples/sec: 14.78 - lr: 0.020000\n",
-      "2021-09-08 01:54:05,433 epoch 3 - iter 16/81 - loss 0.43108804 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 01:54:05,897 epoch 3 - iter 24/81 - loss 0.28970274 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 01:54:06,473 epoch 3 - iter 32/81 - loss 0.24810618 - samples/sec: 13.91 - lr: 0.020000\n",
-      "2021-09-08 01:54:06,903 epoch 3 - iter 40/81 - loss 0.26410413 - samples/sec: 18.62 - lr: 0.020000\n",
-      "2021-09-08 01:54:07,284 epoch 3 - iter 48/81 - loss 0.27093016 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 01:54:07,887 epoch 3 - iter 56/81 - loss 0.24284110 - samples/sec: 13.28 - lr: 0.020000\n",
-      "2021-09-08 01:54:08,288 epoch 3 - iter 64/81 - loss 0.21722022 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 01:54:08,795 epoch 3 - iter 72/81 - loss 0.20081636 - samples/sec: 15.84 - lr: 0.020000\n",
-      "2021-09-08 01:54:09,255 epoch 3 - iter 80/81 - loss 0.26995129 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 01:54:09,306 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:09,307 EPOCH 3 done: loss 0.2666 - lr 0.0200000\n",
-      "2021-09-08 01:54:09,839 DEV : loss 0.2771213948726654 - score 0.7778\n",
-      "2021-09-08 01:54:09,841 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:51:58,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:58,877 epoch 3 - iter 8/81 - loss 0.30343648 - samples/sec: 18.66 - lr: 0.020000\n",
+      "2021-09-21 21:51:59,304 epoch 3 - iter 16/81 - loss 0.24188006 - samples/sec: 18.74 - lr: 0.020000\n",
+      "2021-09-21 21:51:59,724 epoch 3 - iter 24/81 - loss 0.17993413 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 21:52:00,284 epoch 3 - iter 32/81 - loss 0.19665174 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 21:52:00,771 epoch 3 - iter 40/81 - loss 0.18648088 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 21:52:01,382 epoch 3 - iter 48/81 - loss 0.15618762 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 21:52:01,932 epoch 3 - iter 56/81 - loss 0.14318981 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 21:52:02,522 epoch 3 - iter 64/81 - loss 0.13787439 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 21:52:03,133 epoch 3 - iter 72/81 - loss 0.13761005 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 21:52:03,742 epoch 3 - iter 80/81 - loss 0.12407884 - samples/sec: 13.16 - lr: 0.020000\n",
+      "2021-09-21 21:52:03,800 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:03,801 EPOCH 3 done: loss 0.1283 - lr 0.0200000\n",
+      "2021-09-21 21:52:04,369 DEV : loss 0.7118868231773376 - score 0.4444\n",
+      "2021-09-21 21:52:04,370 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:54:15,264 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:15,995 epoch 4 - iter 8/81 - loss 0.24756937 - samples/sec: 11.22 - lr: 0.020000\n",
-      "2021-09-08 01:54:16,389 epoch 4 - iter 16/81 - loss 0.14577883 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 01:54:16,895 epoch 4 - iter 24/81 - loss 0.14009434 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 01:54:17,368 epoch 4 - iter 32/81 - loss 0.11910559 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 01:54:17,781 epoch 4 - iter 40/81 - loss 0.18218392 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 01:54:18,182 epoch 4 - iter 48/81 - loss 0.17739405 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 01:54:18,831 epoch 4 - iter 56/81 - loss 0.19200238 - samples/sec: 12.36 - lr: 0.020000\n",
-      "2021-09-08 01:54:19,214 epoch 4 - iter 64/81 - loss 0.16896013 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:54:19,792 epoch 4 - iter 72/81 - loss 0.16094053 - samples/sec: 13.87 - lr: 0.020000\n",
-      "2021-09-08 01:54:20,183 epoch 4 - iter 80/81 - loss 0.14857161 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 01:54:20,238 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:20,238 EPOCH 4 done: loss 0.1468 - lr 0.0200000\n",
-      "2021-09-08 01:54:20,662 DEV : loss 0.1848618984222412 - score 0.6667\n",
-      "2021-09-08 01:54:20,663 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:54:20,666 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:21,077 epoch 5 - iter 8/81 - loss 0.00142013 - samples/sec: 20.30 - lr: 0.020000\n",
-      "2021-09-08 01:54:21,564 epoch 5 - iter 16/81 - loss 0.00513618 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 01:54:22,051 epoch 5 - iter 24/81 - loss 0.03917684 - samples/sec: 16.44 - lr: 0.020000\n",
-      "2021-09-08 01:54:22,429 epoch 5 - iter 32/81 - loss 0.04697274 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 01:54:22,944 epoch 5 - iter 40/81 - loss 0.07227716 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:54:23,405 epoch 5 - iter 48/81 - loss 0.06250215 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:54:23,904 epoch 5 - iter 56/81 - loss 0.06354449 - samples/sec: 16.05 - lr: 0.020000\n",
-      "2021-09-08 01:54:24,483 epoch 5 - iter 64/81 - loss 0.05581145 - samples/sec: 13.82 - lr: 0.020000\n",
-      "2021-09-08 01:54:24,897 epoch 5 - iter 72/81 - loss 0.05017298 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 01:54:25,277 epoch 5 - iter 80/81 - loss 0.06303975 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 01:54:25,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:25,328 EPOCH 5 done: loss 0.0623 - lr 0.0200000\n",
-      "2021-09-08 01:54:25,747 DEV : loss 0.2818804979324341 - score 0.7778\n",
-      "2021-09-08 01:54:25,748 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:54:25,750 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:26,249 epoch 6 - iter 8/81 - loss 0.00060391 - samples/sec: 16.55 - lr: 0.020000\n",
-      "2021-09-08 01:54:26,725 epoch 6 - iter 16/81 - loss 0.02885965 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 01:54:27,100 epoch 6 - iter 24/81 - loss 0.01972666 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 01:54:27,538 epoch 6 - iter 32/81 - loss 0.12787687 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 01:54:28,041 epoch 6 - iter 40/81 - loss 0.10371414 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 01:54:28,457 epoch 6 - iter 48/81 - loss 0.08715875 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 01:54:28,836 epoch 6 - iter 56/81 - loss 0.07477150 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:54:29,234 epoch 6 - iter 64/81 - loss 0.08555487 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 01:54:29,709 epoch 6 - iter 72/81 - loss 0.11921118 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:54:30,456 epoch 6 - iter 80/81 - loss 0.10794498 - samples/sec: 10.73 - lr: 0.020000\n",
-      "2021-09-08 01:54:30,513 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:52:08,155 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:08,637 epoch 4 - iter 8/81 - loss 0.00187508 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:52:09,142 epoch 4 - iter 16/81 - loss 0.00677227 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 21:52:09,524 epoch 4 - iter 24/81 - loss 0.00900101 - samples/sec: 21.01 - lr: 0.020000\n",
+      "2021-09-21 21:52:10,007 epoch 4 - iter 32/81 - loss 0.03670344 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 21:52:10,371 epoch 4 - iter 40/81 - loss 0.04856228 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 21:52:10,841 epoch 4 - iter 48/81 - loss 0.06922633 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 21:52:11,205 epoch 4 - iter 56/81 - loss 0.08539541 - samples/sec: 22.04 - lr: 0.020000\n",
+      "2021-09-21 21:52:11,719 epoch 4 - iter 64/81 - loss 0.07490759 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:52:12,408 epoch 4 - iter 72/81 - loss 0.09052224 - samples/sec: 11.62 - lr: 0.020000\n",
+      "2021-09-21 21:52:12,877 epoch 4 - iter 80/81 - loss 0.08950024 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:52:12,927 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:12,928 EPOCH 4 done: loss 0.0884 - lr 0.0200000\n",
+      "2021-09-21 21:52:13,493 DEV : loss 0.8685470223426819 - score 0.3333\n",
+      "2021-09-21 21:52:13,494 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:52:13,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:14,163 epoch 5 - iter 8/81 - loss 0.00301704 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 21:52:14,639 epoch 5 - iter 16/81 - loss 0.00600637 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 21:52:15,093 epoch 5 - iter 24/81 - loss 0.00419871 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:52:15,532 epoch 5 - iter 32/81 - loss 0.00559822 - samples/sec: 18.21 - lr: 0.020000\n",
+      "2021-09-21 21:52:16,306 epoch 5 - iter 40/81 - loss 0.00461402 - samples/sec: 10.35 - lr: 0.020000\n",
+      "2021-09-21 21:52:17,097 epoch 5 - iter 48/81 - loss 0.08017815 - samples/sec: 10.12 - lr: 0.020000\n",
+      "2021-09-21 21:52:17,766 epoch 5 - iter 56/81 - loss 0.08236654 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 21:52:18,242 epoch 5 - iter 64/81 - loss 0.07375846 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 21:52:18,889 epoch 5 - iter 72/81 - loss 0.07878068 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 21:52:19,414 epoch 5 - iter 80/81 - loss 0.07581975 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 21:52:19,461 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:19,461 EPOCH 5 done: loss 0.0749 - lr 0.0200000\n",
+      "2021-09-21 21:52:19,832 DEV : loss 0.8622210621833801 - score 0.3333\n",
+      "2021-09-21 21:52:19,833 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:52:19,836 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:20,372 epoch 6 - iter 8/81 - loss 0.00030485 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 21:52:20,849 epoch 6 - iter 16/81 - loss 0.00465941 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 21:52:21,214 epoch 6 - iter 24/81 - loss 0.00318755 - samples/sec: 21.98 - lr: 0.020000\n",
+      "2021-09-21 21:52:21,586 epoch 6 - iter 32/81 - loss 0.03595997 - samples/sec: 21.53 - lr: 0.020000\n",
+      "2021-09-21 21:52:21,947 epoch 6 - iter 40/81 - loss 0.07988090 - samples/sec: 22.23 - lr: 0.020000\n",
+      "2021-09-21 21:52:22,402 epoch 6 - iter 48/81 - loss 0.06836710 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 21:52:22,813 epoch 6 - iter 56/81 - loss 0.05870148 - samples/sec: 19.54 - lr: 0.020000\n",
+      "2021-09-21 21:52:23,438 epoch 6 - iter 64/81 - loss 0.05147571 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 21:52:24,082 epoch 6 - iter 72/81 - loss 0.05976968 - samples/sec: 12.44 - lr: 0.020000\n",
+      "2021-09-21 21:52:24,536 epoch 6 - iter 80/81 - loss 0.05782301 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 21:52:24,595 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:54:30,513 EPOCH 6 done: loss 0.1066 - lr 0.0200000\n",
-      "2021-09-08 01:54:31,165 DEV : loss 0.1858157366514206 - score 0.8889\n",
-      "2021-09-08 01:54:31,168 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:54:35,454 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:36,005 epoch 7 - iter 8/81 - loss 0.31722289 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:54:36,548 epoch 7 - iter 16/81 - loss 0.16061388 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 01:54:36,937 epoch 7 - iter 24/81 - loss 0.10730494 - samples/sec: 20.61 - lr: 0.020000\n",
-      "2021-09-08 01:54:37,352 epoch 7 - iter 32/81 - loss 0.08167414 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 01:54:37,801 epoch 7 - iter 40/81 - loss 0.08312209 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 01:54:38,200 epoch 7 - iter 48/81 - loss 0.06956264 - samples/sec: 20.12 - lr: 0.020000\n",
-      "2021-09-08 01:54:38,579 epoch 7 - iter 56/81 - loss 0.05976267 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 01:54:39,000 epoch 7 - iter 64/81 - loss 0.07487650 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:54:39,576 epoch 7 - iter 72/81 - loss 0.06660912 - samples/sec: 13.91 - lr: 0.020000\n",
-      "2021-09-08 01:54:40,131 epoch 7 - iter 80/81 - loss 0.06037949 - samples/sec: 14.42 - lr: 0.020000\n",
-      "2021-09-08 01:54:40,228 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:40,229 EPOCH 7 done: loss 0.0596 - lr 0.0200000\n",
-      "2021-09-08 01:54:40,656 DEV : loss 0.2104434221982956 - score 0.8889\n",
-      "2021-09-08 01:54:40,657 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:54:40,659 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:41,205 epoch 8 - iter 8/81 - loss 0.10051998 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 01:54:41,757 epoch 8 - iter 16/81 - loss 0.26970814 - samples/sec: 14.52 - lr: 0.020000\n",
-      "2021-09-08 01:54:42,333 epoch 8 - iter 24/81 - loss 0.17990791 - samples/sec: 13.91 - lr: 0.020000\n",
-      "2021-09-08 01:54:42,757 epoch 8 - iter 32/81 - loss 0.13500236 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 01:54:43,235 epoch 8 - iter 40/81 - loss 0.10811640 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:54:43,697 epoch 8 - iter 48/81 - loss 0.09012615 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 01:54:44,080 epoch 8 - iter 56/81 - loss 0.07734778 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:54:44,525 epoch 8 - iter 64/81 - loss 0.06775397 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 01:54:44,924 epoch 8 - iter 72/81 - loss 0.06025275 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 01:54:45,309 epoch 8 - iter 80/81 - loss 0.05424488 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 01:54:45,366 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:45,367 EPOCH 8 done: loss 0.0536 - lr 0.0200000\n",
-      "2021-09-08 01:54:45,788 DEV : loss 0.19649717211723328 - score 0.8889\n",
-      "2021-09-08 01:54:45,789 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:54:45,791 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:46,359 epoch 9 - iter 8/81 - loss 0.00598200 - samples/sec: 14.47 - lr: 0.020000\n",
-      "2021-09-08 01:54:46,758 epoch 9 - iter 16/81 - loss 0.00311211 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 01:54:47,172 epoch 9 - iter 24/81 - loss 0.00211859 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 01:54:47,607 epoch 9 - iter 32/81 - loss 0.00170203 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 01:54:47,997 epoch 9 - iter 40/81 - loss 0.00142004 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 01:54:48,402 epoch 9 - iter 48/81 - loss 0.00121394 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 01:54:49,158 epoch 9 - iter 56/81 - loss 0.00110814 - samples/sec: 10.60 - lr: 0.020000\n",
-      "2021-09-08 01:54:49,657 epoch 9 - iter 64/81 - loss 0.00350904 - samples/sec: 16.04 - lr: 0.020000\n",
-      "2021-09-08 01:54:50,204 epoch 9 - iter 72/81 - loss 0.00317096 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 01:54:50,606 epoch 9 - iter 80/81 - loss 0.00288187 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 01:54:50,656 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:50,656 EPOCH 9 done: loss 0.0028 - lr 0.0200000\n",
-      "2021-09-08 01:54:51,072 DEV : loss 0.21556080877780914 - score 0.8889\n",
-      "2021-09-08 01:54:51,073 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:54:51,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:51,635 epoch 10 - iter 8/81 - loss 0.00017439 - samples/sec: 14.67 - lr: 0.020000\n",
-      "2021-09-08 01:54:52,107 epoch 10 - iter 16/81 - loss 0.01195253 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 01:54:52,527 epoch 10 - iter 24/81 - loss 0.00825009 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:54:52,914 epoch 10 - iter 32/81 - loss 0.00634089 - samples/sec: 20.66 - lr: 0.020000\n",
-      "2021-09-08 01:54:53,399 epoch 10 - iter 40/81 - loss 0.00509877 - samples/sec: 16.54 - lr: 0.020000\n",
-      "2021-09-08 01:54:53,876 epoch 10 - iter 48/81 - loss 0.00427894 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 01:54:54,412 epoch 10 - iter 56/81 - loss 0.00368528 - samples/sec: 14.94 - lr: 0.020000\n",
-      "2021-09-08 01:54:54,815 epoch 10 - iter 64/81 - loss 0.00325458 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 01:54:55,303 epoch 10 - iter 72/81 - loss 0.00290705 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:54:55,680 epoch 10 - iter 80/81 - loss 0.00262681 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 01:54:55,727 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:55,728 EPOCH 10 done: loss 0.0026 - lr 0.0200000\n",
-      "2021-09-08 01:54:56,252 DEV : loss 0.27662402391433716 - score 0.8889\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:54:56,254 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:55:02,139 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:02,140 Testing using best model ...\n",
-      "2021-09-08 01:55:02,141 loading file None/best-model.pt\n",
+      "2021-09-21 21:52:24,595 EPOCH 6 done: loss 0.0621 - lr 0.0200000\n",
+      "2021-09-21 21:52:25,150 DEV : loss 0.7583877444267273 - score 0.4444\n",
+      "2021-09-21 21:52:25,152 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:52:25,154 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:25,852 epoch 7 - iter 8/81 - loss 0.00284381 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 21:52:26,385 epoch 7 - iter 16/81 - loss 0.00157057 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:52:26,836 epoch 7 - iter 24/81 - loss 0.00132027 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 21:52:27,560 epoch 7 - iter 32/81 - loss 0.00121834 - samples/sec: 11.06 - lr: 0.020000\n",
+      "2021-09-21 21:52:28,076 epoch 7 - iter 40/81 - loss 0.00100587 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 21:52:28,730 epoch 7 - iter 48/81 - loss 0.00087370 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 21:52:29,199 epoch 7 - iter 56/81 - loss 0.00079268 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 21:52:29,653 epoch 7 - iter 64/81 - loss 0.05058125 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:52:30,302 epoch 7 - iter 72/81 - loss 0.04498356 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 21:52:30,872 epoch 7 - iter 80/81 - loss 0.04057386 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 21:52:30,925 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:30,925 EPOCH 7 done: loss 0.0401 - lr 0.0200000\n",
+      "2021-09-21 21:52:31,408 DEV : loss 0.821498692035675 - score 0.3333\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:52:31,410 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:52:31,412 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:32,493 epoch 8 - iter 8/81 - loss 0.00089660 - samples/sec: 7.58 - lr: 0.010000\n",
+      "2021-09-21 21:52:33,039 epoch 8 - iter 16/81 - loss 0.00072122 - samples/sec: 14.69 - lr: 0.010000\n",
+      "2021-09-21 21:52:33,552 epoch 8 - iter 24/81 - loss 0.00055982 - samples/sec: 15.62 - lr: 0.010000\n",
+      "2021-09-21 21:52:34,005 epoch 8 - iter 32/81 - loss 0.02420687 - samples/sec: 17.66 - lr: 0.010000\n",
+      "2021-09-21 21:52:34,486 epoch 8 - iter 40/81 - loss 0.01958513 - samples/sec: 16.66 - lr: 0.010000\n",
+      "2021-09-21 21:52:34,935 epoch 8 - iter 48/81 - loss 0.01636370 - samples/sec: 17.84 - lr: 0.010000\n",
+      "2021-09-21 21:52:35,566 epoch 8 - iter 56/81 - loss 0.01409216 - samples/sec: 12.71 - lr: 0.010000\n",
+      "2021-09-21 21:52:36,100 epoch 8 - iter 64/81 - loss 0.01235980 - samples/sec: 14.99 - lr: 0.010000\n",
+      "2021-09-21 21:52:36,545 epoch 8 - iter 72/81 - loss 0.01101028 - samples/sec: 17.99 - lr: 0.010000\n",
+      "2021-09-21 21:52:37,252 epoch 8 - iter 80/81 - loss 0.02050065 - samples/sec: 11.33 - lr: 0.010000\n",
+      "2021-09-21 21:52:37,326 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:37,326 EPOCH 8 done: loss 0.0203 - lr 0.0100000\n",
+      "2021-09-21 21:52:37,868 DEV : loss 0.7911439538002014 - score 0.3333\n",
+      "2021-09-21 21:52:37,870 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:52:37,872 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:38,661 epoch 9 - iter 8/81 - loss 0.00027830 - samples/sec: 10.37 - lr: 0.010000\n",
+      "2021-09-21 21:52:39,160 epoch 9 - iter 16/81 - loss 0.00020252 - samples/sec: 16.07 - lr: 0.010000\n",
+      "2021-09-21 21:52:39,681 epoch 9 - iter 24/81 - loss 0.00024294 - samples/sec: 15.38 - lr: 0.010000\n",
+      "2021-09-21 21:52:40,273 epoch 9 - iter 32/81 - loss 0.00043121 - samples/sec: 13.52 - lr: 0.010000\n",
+      "2021-09-21 21:52:40,749 epoch 9 - iter 40/81 - loss 0.00038612 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 21:52:41,422 epoch 9 - iter 48/81 - loss 0.00035228 - samples/sec: 11.89 - lr: 0.010000\n",
+      "2021-09-21 21:52:41,881 epoch 9 - iter 56/81 - loss 0.00032304 - samples/sec: 17.49 - lr: 0.010000\n",
+      "2021-09-21 21:52:42,388 epoch 9 - iter 64/81 - loss 0.00031054 - samples/sec: 15.79 - lr: 0.010000\n",
+      "2021-09-21 21:52:42,850 epoch 9 - iter 72/81 - loss 0.00031275 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 21:52:43,430 epoch 9 - iter 80/81 - loss 0.00029450 - samples/sec: 13.81 - lr: 0.010000\n",
+      "2021-09-21 21:52:43,481 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:43,481 EPOCH 9 done: loss 0.0003 - lr 0.0100000\n",
+      "2021-09-21 21:52:44,028 DEV : loss 0.7972260117530823 - score 0.3333\n",
+      "2021-09-21 21:52:44,029 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:52:44,031 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:44,661 epoch 10 - iter 8/81 - loss 0.00020654 - samples/sec: 13.08 - lr: 0.010000\n",
+      "2021-09-21 21:52:45,261 epoch 10 - iter 16/81 - loss 0.00140581 - samples/sec: 13.34 - lr: 0.010000\n",
+      "2021-09-21 21:52:45,883 epoch 10 - iter 24/81 - loss 0.00101617 - samples/sec: 12.89 - lr: 0.010000\n",
+      "2021-09-21 21:52:46,633 epoch 10 - iter 32/81 - loss 0.00083681 - samples/sec: 10.68 - lr: 0.010000\n",
+      "2021-09-21 21:52:47,037 epoch 10 - iter 40/81 - loss 0.00069973 - samples/sec: 19.80 - lr: 0.010000\n",
+      "2021-09-21 21:52:47,491 epoch 10 - iter 48/81 - loss 0.00063235 - samples/sec: 17.66 - lr: 0.010000\n",
+      "2021-09-21 21:52:48,012 epoch 10 - iter 56/81 - loss 0.00056280 - samples/sec: 15.39 - lr: 0.010000\n",
+      "2021-09-21 21:52:48,410 epoch 10 - iter 64/81 - loss 0.00055397 - samples/sec: 20.17 - lr: 0.010000\n",
+      "2021-09-21 21:52:48,783 epoch 10 - iter 72/81 - loss 0.00050949 - samples/sec: 21.43 - lr: 0.010000\n",
+      "2021-09-21 21:52:49,139 epoch 10 - iter 80/81 - loss 0.00047117 - samples/sec: 22.54 - lr: 0.010000\n",
+      "2021-09-21 21:52:49,186 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:49,187 EPOCH 10 done: loss 0.0005 - lr 0.0100000\n",
+      "2021-09-21 21:52:49,795 DEV : loss 0.8185486197471619 - score 0.3333\n",
+      "2021-09-21 21:52:49,797 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:52:53,675 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:53,675 Testing using best model ...\n",
+      "2021-09-21 21:52:53,677 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:55:07,406 \t0.9\n",
-      "2021-09-08 01:55:07,407 \n",
+      "2021-09-21 21:52:59,057 \t0.8\n",
+      "2021-09-21 21:52:59,058 \n",
       "Results:\n",
-      "- F-score (micro) 0.9\n",
-      "- F-score (macro) 0.6667\n",
-      "- Accuracy 0.9\n",
+      "- F-score (micro) 0.8\n",
+      "- F-score (macro) 0.53\n",
+      "- Accuracy 0.8\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
       "Family & Relationships     0.0000    0.0000    0.0000         0\n",
-      "                Health     1.0000    1.0000    1.0000         1\n",
+      "                Health     0.0000    0.0000    0.0000         0\n",
       " Science & Mathematics     1.0000    1.0000    1.0000         1\n",
-      " Entertainment & Music     1.0000    1.0000    1.0000         1\n",
-      "  Computers & Internet     0.5000    1.0000    0.6667         1\n",
-      " Education & Reference     1.0000    1.0000    1.0000         2\n",
-      "                Sports     1.0000    1.0000    1.0000         2\n",
-      "     Society & Culture     0.0000    0.0000    0.0000         1\n",
-      "    Business & Finance     1.0000    1.0000    1.0000         1\n",
-      " Politics & Government     0.0000    0.0000    0.0000         0\n",
+      " Entertainment & Music     1.0000    1.0000    1.0000         2\n",
+      "  Computers & Internet     1.0000    1.0000    1.0000         1\n",
+      " Education & Reference     0.5000    0.5000    0.5000         2\n",
+      "                Sports     1.0000    1.0000    1.0000         1\n",
+      "     Society & Culture     0.0000    0.0000    0.0000         0\n",
+      "    Business & Finance     0.0000    0.0000    0.0000         1\n",
+      " Politics & Government     0.6667    1.0000    0.8000         2\n",
       "\n",
-      "             micro avg     0.9000    0.9000    0.9000        10\n",
-      "             macro avg     0.6500    0.7000    0.6667        10\n",
-      "          weighted avg     0.8500    0.9000    0.8667        10\n",
-      "           samples avg     0.9000    0.9000    0.9000        10\n",
+      "             micro avg     0.8000    0.8000    0.8000        10\n",
+      "             macro avg     0.5167    0.5500    0.5300        10\n",
+      "          weighted avg     0.7333    0.8000    0.7600        10\n",
+      "           samples avg     0.8000    0.8000    0.8000        10\n",
       "\n",
-      "2021-09-08 01:55:07,407 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:21,155 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 21:52:59,058 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:16,773 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:55:25,488 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:53:20,932 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 13870.76it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 13385.29it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:55:25,496 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
-      "2021-09-08 01:55:25,633 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:25,634 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:53:20,941 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
+      "2021-09-21 21:53:20,951 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:20,953 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -955,24 +954,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:55:25,635 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:25,635 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 01:55:25,636 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:25,636 Parameters:\n",
-      "2021-09-08 01:55:25,636  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:55:25,636  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:55:25,637  - patience: \"3\"\n",
-      "2021-09-08 01:55:25,637  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:55:25,637  - max_epochs: \"10\"\n",
-      "2021-09-08 01:55:25,638  - shuffle: \"True\"\n",
-      "2021-09-08 01:55:25,638  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:55:25,638  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:55:25,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:25,639 Model training base path: \"None\"\n",
-      "2021-09-08 01:55:25,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:25,639 Device: cuda:1\n",
-      "2021-09-08 01:55:25,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:25,640 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:53:20,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:20,955 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 21:53:20,955 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:20,955 Parameters:\n",
+      "2021-09-21 21:53:20,956  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:53:20,956  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:53:20,957  - patience: \"3\"\n",
+      "2021-09-21 21:53:20,957  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:53:20,958  - max_epochs: \"10\"\n",
+      "2021-09-21 21:53:20,958  - shuffle: \"True\"\n",
+      "2021-09-21 21:53:20,959  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:53:20,959  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:53:20,959 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:20,960 Model training base path: \"None\"\n",
+      "2021-09-21 21:53:20,960 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:20,961 Device: cuda:0\n",
+      "2021-09-21 21:53:20,961 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:20,962 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:53:20,970 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -986,215 +986,216 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:55:25,825 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:26,284 epoch 1 - iter 8/81 - loss 0.51546285 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 01:55:26,859 epoch 1 - iter 16/81 - loss 0.45952369 - samples/sec: 13.93 - lr: 0.020000\n",
-      "2021-09-08 01:55:27,245 epoch 1 - iter 24/81 - loss 0.36405992 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 01:55:27,659 epoch 1 - iter 32/81 - loss 0.44273432 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 01:55:28,064 epoch 1 - iter 40/81 - loss 0.45077919 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 01:55:28,626 epoch 1 - iter 48/81 - loss 0.46000035 - samples/sec: 14.27 - lr: 0.020000\n",
-      "2021-09-08 01:55:29,259 epoch 1 - iter 56/81 - loss 0.51669858 - samples/sec: 12.65 - lr: 0.020000\n",
-      "2021-09-08 01:55:29,708 epoch 1 - iter 64/81 - loss 0.57875988 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 01:55:30,204 epoch 1 - iter 72/81 - loss 0.61893441 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 01:55:30,780 epoch 1 - iter 80/81 - loss 0.56626108 - samples/sec: 13.92 - lr: 0.020000\n",
-      "2021-09-08 01:55:30,838 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:30,839 EPOCH 1 done: loss 0.5808 - lr 0.0200000\n",
-      "2021-09-08 01:55:31,354 DEV : loss 0.22032129764556885 - score 0.6667\n",
-      "2021-09-08 01:55:31,356 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:53:21,469 epoch 1 - iter 8/81 - loss 0.50973105 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 21:53:22,051 epoch 1 - iter 16/81 - loss 0.34944096 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 21:53:22,673 epoch 1 - iter 24/81 - loss 0.33938041 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 21:53:23,246 epoch 1 - iter 32/81 - loss 0.34762113 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 21:53:23,811 epoch 1 - iter 40/81 - loss 0.32258924 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 21:53:24,352 epoch 1 - iter 48/81 - loss 0.42153343 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 21:53:24,895 epoch 1 - iter 56/81 - loss 0.41351143 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 21:53:25,340 epoch 1 - iter 64/81 - loss 0.47270923 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 21:53:25,940 epoch 1 - iter 72/81 - loss 0.47806475 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 21:53:26,531 epoch 1 - iter 80/81 - loss 0.48386381 - samples/sec: 13.55 - lr: 0.020000\n",
+      "2021-09-21 21:53:26,576 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:26,576 EPOCH 1 done: loss 0.4780 - lr 0.0200000\n",
+      "2021-09-21 21:53:27,680 DEV : loss 0.271309494972229 - score 0.5556\n",
+      "2021-09-21 21:53:27,681 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:55:35,445 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:35,979 epoch 2 - iter 8/81 - loss 0.52228743 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 01:55:36,518 epoch 2 - iter 16/81 - loss 0.36822947 - samples/sec: 14.87 - lr: 0.020000\n",
-      "2021-09-08 01:55:37,122 epoch 2 - iter 24/81 - loss 0.34506793 - samples/sec: 13.25 - lr: 0.020000\n",
-      "2021-09-08 01:55:37,780 epoch 2 - iter 32/81 - loss 0.38329139 - samples/sec: 12.18 - lr: 0.020000\n",
-      "2021-09-08 01:55:38,267 epoch 2 - iter 40/81 - loss 0.45735479 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 01:55:38,728 epoch 2 - iter 48/81 - loss 0.46447328 - samples/sec: 17.38 - lr: 0.020000\n",
-      "2021-09-08 01:55:39,141 epoch 2 - iter 56/81 - loss 0.45077974 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 01:55:39,559 epoch 2 - iter 64/81 - loss 0.40546583 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 01:55:39,943 epoch 2 - iter 72/81 - loss 0.37937762 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 01:55:40,458 epoch 2 - iter 80/81 - loss 0.38532363 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 01:55:40,531 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:40,531 EPOCH 2 done: loss 0.3806 - lr 0.0200000\n",
-      "2021-09-08 01:55:41,062 DEV : loss 0.458000123500824 - score 0.6667\n",
-      "2021-09-08 01:55:41,064 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:55:41,074 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:41,618 epoch 3 - iter 8/81 - loss 0.10927833 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 01:55:42,047 epoch 3 - iter 16/81 - loss 0.20157124 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 01:55:42,530 epoch 3 - iter 24/81 - loss 0.21651311 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 01:55:42,926 epoch 3 - iter 32/81 - loss 0.20430753 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 01:55:43,321 epoch 3 - iter 40/81 - loss 0.20510010 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 01:55:43,722 epoch 3 - iter 48/81 - loss 0.21651988 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 01:55:44,141 epoch 3 - iter 56/81 - loss 0.18738193 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 01:55:44,549 epoch 3 - iter 64/81 - loss 0.16670883 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 01:55:45,200 epoch 3 - iter 72/81 - loss 0.16777130 - samples/sec: 12.31 - lr: 0.020000\n",
-      "2021-09-08 01:55:45,898 epoch 3 - iter 80/81 - loss 0.19926761 - samples/sec: 11.47 - lr: 0.020000\n",
-      "2021-09-08 01:55:45,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:45,949 EPOCH 3 done: loss 0.1970 - lr 0.0200000\n",
-      "2021-09-08 01:55:46,467 DEV : loss 0.48732131719589233 - score 0.6667\n",
-      "2021-09-08 01:55:46,469 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:55:46,471 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:46,895 epoch 4 - iter 8/81 - loss 0.26482914 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 01:55:47,304 epoch 4 - iter 16/81 - loss 0.14314158 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 01:55:47,744 epoch 4 - iter 24/81 - loss 0.09973036 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 01:55:48,258 epoch 4 - iter 32/81 - loss 0.14892733 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 01:55:48,985 epoch 4 - iter 40/81 - loss 0.19203994 - samples/sec: 11.01 - lr: 0.020000\n",
-      "2021-09-08 01:55:49,431 epoch 4 - iter 48/81 - loss 0.20753200 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 01:55:49,884 epoch 4 - iter 56/81 - loss 0.24820451 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 01:55:50,375 epoch 4 - iter 64/81 - loss 0.23829598 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 01:55:50,918 epoch 4 - iter 72/81 - loss 0.21449035 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 01:55:51,475 epoch 4 - iter 80/81 - loss 0.22165366 - samples/sec: 14.38 - lr: 0.020000\n",
-      "2021-09-08 01:55:51,521 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:51,522 EPOCH 4 done: loss 0.2190 - lr 0.0200000\n",
-      "2021-09-08 01:55:52,031 DEV : loss 0.4223387837409973 - score 0.6667\n",
-      "2021-09-08 01:55:52,032 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:55:52,034 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:52,582 epoch 5 - iter 8/81 - loss 0.01567519 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 01:55:53,019 epoch 5 - iter 16/81 - loss 0.00932936 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 01:55:53,459 epoch 5 - iter 24/81 - loss 0.03619396 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 01:55:53,915 epoch 5 - iter 32/81 - loss 0.02780644 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:55:54,451 epoch 5 - iter 40/81 - loss 0.02534361 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 01:55:55,048 epoch 5 - iter 48/81 - loss 0.02145488 - samples/sec: 13.42 - lr: 0.020000\n",
-      "2021-09-08 01:55:55,445 epoch 5 - iter 56/81 - loss 0.02522274 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 01:55:55,848 epoch 5 - iter 64/81 - loss 0.05455943 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 01:55:56,332 epoch 5 - iter 72/81 - loss 0.05991105 - samples/sec: 16.53 - lr: 0.020000\n",
-      "2021-09-08 01:55:56,847 epoch 5 - iter 80/81 - loss 0.05409338 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 01:55:56,894 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:56,894 EPOCH 5 done: loss 0.0534 - lr 0.0200000\n",
-      "2021-09-08 01:55:57,403 DEV : loss 0.46733903884887695 - score 0.6667\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:55:57,405 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:55:57,406 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:57,901 epoch 6 - iter 8/81 - loss 0.00113577 - samples/sec: 16.65 - lr: 0.010000\n",
-      "2021-09-08 01:55:58,367 epoch 6 - iter 16/81 - loss 0.00187874 - samples/sec: 17.20 - lr: 0.010000\n",
-      "2021-09-08 01:55:58,790 epoch 6 - iter 24/81 - loss 0.04506867 - samples/sec: 18.95 - lr: 0.010000\n",
-      "2021-09-08 01:55:59,233 epoch 6 - iter 32/81 - loss 0.03409746 - samples/sec: 18.09 - lr: 0.010000\n",
-      "2021-09-08 01:55:59,695 epoch 6 - iter 40/81 - loss 0.02769618 - samples/sec: 17.35 - lr: 0.010000\n",
-      "2021-09-08 01:56:00,460 epoch 6 - iter 48/81 - loss 0.02316068 - samples/sec: 10.47 - lr: 0.010000\n",
-      "2021-09-08 01:56:01,030 epoch 6 - iter 56/81 - loss 0.01997787 - samples/sec: 14.06 - lr: 0.010000\n",
-      "2021-09-08 01:56:01,443 epoch 6 - iter 64/81 - loss 0.02411953 - samples/sec: 19.41 - lr: 0.010000\n",
-      "2021-09-08 01:56:01,876 epoch 6 - iter 72/81 - loss 0.02950509 - samples/sec: 18.53 - lr: 0.010000\n"
+      "2021-09-21 21:53:32,402 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:32,836 epoch 2 - iter 8/81 - loss 0.74491983 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 21:53:33,279 epoch 2 - iter 16/81 - loss 0.42817680 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 21:53:34,039 epoch 2 - iter 24/81 - loss 0.37361513 - samples/sec: 10.55 - lr: 0.020000\n",
+      "2021-09-21 21:53:34,462 epoch 2 - iter 32/81 - loss 0.40897763 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 21:53:35,057 epoch 2 - iter 40/81 - loss 0.39816498 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:53:35,533 epoch 2 - iter 48/81 - loss 0.45093370 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 21:53:36,121 epoch 2 - iter 56/81 - loss 0.46725343 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 21:53:36,561 epoch 2 - iter 64/81 - loss 0.43070868 - samples/sec: 18.21 - lr: 0.020000\n",
+      "2021-09-21 21:53:37,156 epoch 2 - iter 72/81 - loss 0.43898320 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:53:38,001 epoch 2 - iter 80/81 - loss 0.45137717 - samples/sec: 9.48 - lr: 0.020000\n",
+      "2021-09-21 21:53:38,062 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:38,063 EPOCH 2 done: loss 0.4480 - lr 0.0200000\n",
+      "2021-09-21 21:53:38,435 DEV : loss 0.19293515384197235 - score 0.6667\n",
+      "2021-09-21 21:53:38,436 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:53:42,434 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:42,898 epoch 3 - iter 8/81 - loss 0.12423208 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 21:53:43,467 epoch 3 - iter 16/81 - loss 0.29787335 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 21:53:44,155 epoch 3 - iter 24/81 - loss 0.23415898 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 21:53:44,723 epoch 3 - iter 32/81 - loss 0.17736613 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 21:53:45,285 epoch 3 - iter 40/81 - loss 0.19505019 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 21:53:45,979 epoch 3 - iter 48/81 - loss 0.19143709 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 21:53:46,463 epoch 3 - iter 56/81 - loss 0.18384262 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 21:53:47,196 epoch 3 - iter 64/81 - loss 0.27499115 - samples/sec: 10.92 - lr: 0.020000\n",
+      "2021-09-21 21:53:47,742 epoch 3 - iter 72/81 - loss 0.25816770 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 21:53:48,264 epoch 3 - iter 80/81 - loss 0.25263468 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 21:53:48,313 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:48,314 EPOCH 3 done: loss 0.2496 - lr 0.0200000\n",
+      "2021-09-21 21:53:48,643 DEV : loss 0.30171817541122437 - score 0.5556\n",
+      "2021-09-21 21:53:48,645 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:53:48,653 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:49,112 epoch 4 - iter 8/81 - loss 0.29871545 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 21:53:49,552 epoch 4 - iter 16/81 - loss 0.37041934 - samples/sec: 18.22 - lr: 0.020000\n",
+      "2021-09-21 21:53:49,959 epoch 4 - iter 24/81 - loss 0.24954134 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 21:53:50,671 epoch 4 - iter 32/81 - loss 0.18809400 - samples/sec: 11.24 - lr: 0.020000\n",
+      "2021-09-21 21:53:51,138 epoch 4 - iter 40/81 - loss 0.20717282 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:53:51,586 epoch 4 - iter 48/81 - loss 0.19962937 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:53:52,100 epoch 4 - iter 56/81 - loss 0.19547212 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:53:52,674 epoch 4 - iter 64/81 - loss 0.18296190 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 21:53:53,437 epoch 4 - iter 72/81 - loss 0.16361085 - samples/sec: 10.51 - lr: 0.020000\n",
+      "2021-09-21 21:53:53,917 epoch 4 - iter 80/81 - loss 0.15957280 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 21:53:54,131 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:54,131 EPOCH 4 done: loss 0.1576 - lr 0.0200000\n",
+      "2021-09-21 21:53:54,602 DEV : loss 0.20367896556854248 - score 0.7778\n",
+      "2021-09-21 21:53:54,603 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:53:58,722 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:59,215 epoch 5 - iter 8/81 - loss 0.00158663 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 21:53:59,807 epoch 5 - iter 16/81 - loss 0.00177784 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 21:54:00,341 epoch 5 - iter 24/81 - loss 0.01468915 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:54:00,895 epoch 5 - iter 32/81 - loss 0.01565015 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:54:01,313 epoch 5 - iter 40/81 - loss 0.01449406 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 21:54:01,788 epoch 5 - iter 48/81 - loss 0.05332393 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 21:54:02,406 epoch 5 - iter 56/81 - loss 0.04621793 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:54:02,961 epoch 5 - iter 64/81 - loss 0.07974549 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 21:54:03,437 epoch 5 - iter 72/81 - loss 0.07321298 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:54:04,166 epoch 5 - iter 80/81 - loss 0.11837643 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 21:54:04,223 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:04,224 EPOCH 5 done: loss 0.1177 - lr 0.0200000\n",
+      "2021-09-21 21:54:04,687 DEV : loss 0.28769397735595703 - score 0.8889\n",
+      "2021-09-21 21:54:04,689 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:54:08,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:09,005 epoch 6 - iter 8/81 - loss 0.07547361 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 21:54:09,533 epoch 6 - iter 16/81 - loss 0.09370923 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 21:54:10,083 epoch 6 - iter 24/81 - loss 0.10984909 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 21:54:10,818 epoch 6 - iter 32/81 - loss 0.08290288 - samples/sec: 10.88 - lr: 0.020000\n",
+      "2021-09-21 21:54:11,430 epoch 6 - iter 40/81 - loss 0.06693848 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 21:54:11,889 epoch 6 - iter 48/81 - loss 0.09629685 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 21:54:12,673 epoch 6 - iter 56/81 - loss 0.12270818 - samples/sec: 10.22 - lr: 0.020000\n",
+      "2021-09-21 21:54:13,647 epoch 6 - iter 64/81 - loss 0.10879137 - samples/sec: 8.22 - lr: 0.020000\n",
+      "2021-09-21 21:54:14,227 epoch 6 - iter 72/81 - loss 0.09759642 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 21:54:15,006 epoch 6 - iter 80/81 - loss 0.08803630 - samples/sec: 10.28 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:56:02,289 epoch 6 - iter 80/81 - loss 0.02669179 - samples/sec: 19.39 - lr: 0.010000\n",
-      "2021-09-08 01:56:02,340 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:02,340 EPOCH 6 done: loss 0.0264 - lr 0.0100000\n",
-      "2021-09-08 01:56:02,849 DEV : loss 0.472150057554245 - score 0.6667\n",
-      "2021-09-08 01:56:02,851 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:56:02,853 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:03,277 epoch 7 - iter 8/81 - loss 0.00052193 - samples/sec: 19.53 - lr: 0.010000\n",
-      "2021-09-08 01:56:03,775 epoch 7 - iter 16/81 - loss 0.00064310 - samples/sec: 16.10 - lr: 0.010000\n",
-      "2021-09-08 01:56:04,242 epoch 7 - iter 24/81 - loss 0.00360506 - samples/sec: 17.15 - lr: 0.010000\n",
-      "2021-09-08 01:56:04,668 epoch 7 - iter 32/81 - loss 0.00286272 - samples/sec: 18.84 - lr: 0.010000\n",
-      "2021-09-08 01:56:05,112 epoch 7 - iter 40/81 - loss 0.00241109 - samples/sec: 18.05 - lr: 0.010000\n",
-      "2021-09-08 01:56:05,716 epoch 7 - iter 48/81 - loss 0.00308313 - samples/sec: 13.26 - lr: 0.010000\n",
-      "2021-09-08 01:56:06,291 epoch 7 - iter 56/81 - loss 0.00277661 - samples/sec: 13.92 - lr: 0.010000\n",
-      "2021-09-08 01:56:06,661 epoch 7 - iter 64/81 - loss 0.00248188 - samples/sec: 21.69 - lr: 0.010000\n",
-      "2021-09-08 01:56:07,158 epoch 7 - iter 72/81 - loss 0.00228577 - samples/sec: 16.11 - lr: 0.010000\n",
-      "2021-09-08 01:56:07,589 epoch 7 - iter 80/81 - loss 0.00211920 - samples/sec: 18.62 - lr: 0.010000\n",
-      "2021-09-08 01:56:07,675 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:07,675 EPOCH 7 done: loss 0.0021 - lr 0.0100000\n",
-      "2021-09-08 01:56:08,356 DEV : loss 0.4929634630680084 - score 0.6667\n",
-      "2021-09-08 01:56:08,358 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:56:08,360 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:08,902 epoch 8 - iter 8/81 - loss 0.00040888 - samples/sec: 15.19 - lr: 0.010000\n",
-      "2021-09-08 01:56:09,394 epoch 8 - iter 16/81 - loss 0.00040587 - samples/sec: 16.27 - lr: 0.010000\n",
-      "2021-09-08 01:56:09,941 epoch 8 - iter 24/81 - loss 0.00037273 - samples/sec: 14.66 - lr: 0.010000\n",
-      "2021-09-08 01:56:10,363 epoch 8 - iter 32/81 - loss 0.00035714 - samples/sec: 18.99 - lr: 0.010000\n",
-      "2021-09-08 01:56:10,757 epoch 8 - iter 40/81 - loss 0.00047132 - samples/sec: 20.31 - lr: 0.010000\n",
-      "2021-09-08 01:56:11,226 epoch 8 - iter 48/81 - loss 0.00044028 - samples/sec: 17.12 - lr: 0.010000\n",
-      "2021-09-08 01:56:11,682 epoch 8 - iter 56/81 - loss 0.00073114 - samples/sec: 17.57 - lr: 0.010000\n",
-      "2021-09-08 01:56:12,223 epoch 8 - iter 64/81 - loss 0.00084932 - samples/sec: 14.81 - lr: 0.010000\n",
-      "2021-09-08 01:56:12,696 epoch 8 - iter 72/81 - loss 0.00083479 - samples/sec: 16.94 - lr: 0.010000\n",
-      "2021-09-08 01:56:13,238 epoch 8 - iter 80/81 - loss 0.00077679 - samples/sec: 14.77 - lr: 0.010000\n",
-      "2021-09-08 01:56:13,285 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:13,286 EPOCH 8 done: loss 0.0008 - lr 0.0100000\n",
-      "2021-09-08 01:56:13,794 DEV : loss 0.5230015516281128 - score 0.6667\n",
-      "2021-09-08 01:56:13,796 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:56:13,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:14,225 epoch 9 - iter 8/81 - loss 0.00022367 - samples/sec: 19.36 - lr: 0.010000\n",
-      "2021-09-08 01:56:14,605 epoch 9 - iter 16/81 - loss 0.00023406 - samples/sec: 21.10 - lr: 0.010000\n",
-      "2021-09-08 01:56:15,183 epoch 9 - iter 24/81 - loss 0.00024357 - samples/sec: 13.88 - lr: 0.010000\n",
-      "2021-09-08 01:56:15,575 epoch 9 - iter 32/81 - loss 0.00376096 - samples/sec: 20.40 - lr: 0.010000\n",
-      "2021-09-08 01:56:16,071 epoch 9 - iter 40/81 - loss 0.00309337 - samples/sec: 16.16 - lr: 0.010000\n",
-      "2021-09-08 01:56:16,714 epoch 9 - iter 48/81 - loss 0.00262922 - samples/sec: 12.47 - lr: 0.010000\n",
-      "2021-09-08 01:56:17,194 epoch 9 - iter 56/81 - loss 0.00230852 - samples/sec: 16.70 - lr: 0.010000\n",
-      "2021-09-08 01:56:17,644 epoch 9 - iter 64/81 - loss 0.00213631 - samples/sec: 17.79 - lr: 0.010000\n",
-      "2021-09-08 01:56:18,054 epoch 9 - iter 72/81 - loss 0.06561860 - samples/sec: 19.53 - lr: 0.010000\n",
-      "2021-09-08 01:56:18,566 epoch 9 - iter 80/81 - loss 0.05911022 - samples/sec: 15.66 - lr: 0.010000\n",
-      "2021-09-08 01:56:18,615 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:18,615 EPOCH 9 done: loss 0.0584 - lr 0.0100000\n",
-      "2021-09-08 01:56:19,254 DEV : loss 0.4907354414463043 - score 0.6667\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:56:19,255 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:56:19,355 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:19,985 epoch 10 - iter 8/81 - loss 0.00031037 - samples/sec: 17.58 - lr: 0.005000\n",
-      "2021-09-08 01:56:20,423 epoch 10 - iter 16/81 - loss 0.00047490 - samples/sec: 18.31 - lr: 0.005000\n",
-      "2021-09-08 01:56:20,919 epoch 10 - iter 24/81 - loss 0.00050635 - samples/sec: 16.15 - lr: 0.005000\n",
-      "2021-09-08 01:56:21,466 epoch 10 - iter 32/81 - loss 0.04056032 - samples/sec: 14.63 - lr: 0.005000\n",
-      "2021-09-08 01:56:21,927 epoch 10 - iter 40/81 - loss 0.03412383 - samples/sec: 17.37 - lr: 0.005000\n",
-      "2021-09-08 01:56:22,428 epoch 10 - iter 48/81 - loss 0.02851958 - samples/sec: 16.02 - lr: 0.005000\n",
-      "2021-09-08 01:56:22,835 epoch 10 - iter 56/81 - loss 0.02457161 - samples/sec: 19.68 - lr: 0.005000\n",
-      "2021-09-08 01:56:23,292 epoch 10 - iter 64/81 - loss 0.02696171 - samples/sec: 17.55 - lr: 0.005000\n",
-      "2021-09-08 01:56:23,703 epoch 10 - iter 72/81 - loss 0.02402320 - samples/sec: 19.48 - lr: 0.005000\n",
-      "2021-09-08 01:56:24,172 epoch 10 - iter 80/81 - loss 0.02172017 - samples/sec: 17.08 - lr: 0.005000\n",
-      "2021-09-08 01:56:24,314 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:24,315 EPOCH 10 done: loss 0.0215 - lr 0.0050000\n",
-      "2021-09-08 01:56:24,824 DEV : loss 0.4981146454811096 - score 0.6667\n",
-      "2021-09-08 01:56:24,826 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:56:28,751 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:28,752 Testing using best model ...\n",
-      "2021-09-08 01:56:28,754 loading file None/best-model.pt\n",
+      "2021-09-21 21:54:15,071 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:15,072 EPOCH 6 done: loss 0.0870 - lr 0.0200000\n",
+      "2021-09-21 21:54:15,557 DEV : loss 0.2399182915687561 - score 0.7778\n",
+      "2021-09-21 21:54:15,558 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:54:15,560 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:16,251 epoch 7 - iter 8/81 - loss 0.00158285 - samples/sec: 11.98 - lr: 0.020000\n",
+      "2021-09-21 21:54:16,802 epoch 7 - iter 16/81 - loss 0.11549149 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 21:54:17,415 epoch 7 - iter 24/81 - loss 0.07759484 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 21:54:18,093 epoch 7 - iter 32/81 - loss 0.05841332 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 21:54:18,602 epoch 7 - iter 40/81 - loss 0.04688435 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 21:54:19,387 epoch 7 - iter 48/81 - loss 0.03968968 - samples/sec: 10.20 - lr: 0.020000\n",
+      "2021-09-21 21:54:20,090 epoch 7 - iter 56/81 - loss 0.06087365 - samples/sec: 11.39 - lr: 0.020000\n",
+      "2021-09-21 21:54:20,855 epoch 7 - iter 64/81 - loss 0.05402205 - samples/sec: 10.46 - lr: 0.020000\n",
+      "2021-09-21 21:54:21,558 epoch 7 - iter 72/81 - loss 0.06232837 - samples/sec: 11.40 - lr: 0.020000\n",
+      "2021-09-21 21:54:22,097 epoch 7 - iter 80/81 - loss 0.07593369 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 21:54:22,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:22,202 EPOCH 7 done: loss 0.0806 - lr 0.0200000\n",
+      "2021-09-21 21:54:22,824 DEV : loss 0.27966785430908203 - score 0.7778\n",
+      "2021-09-21 21:54:22,826 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:54:22,845 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:23,475 epoch 8 - iter 8/81 - loss 0.00067320 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 21:54:24,061 epoch 8 - iter 16/81 - loss 0.04258189 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 21:54:24,883 epoch 8 - iter 24/81 - loss 0.02888460 - samples/sec: 9.75 - lr: 0.020000\n",
+      "2021-09-21 21:54:25,427 epoch 8 - iter 32/81 - loss 0.02195914 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 21:54:25,918 epoch 8 - iter 40/81 - loss 0.06140353 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 21:54:26,364 epoch 8 - iter 48/81 - loss 0.06115703 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 21:54:26,778 epoch 8 - iter 56/81 - loss 0.05253822 - samples/sec: 19.36 - lr: 0.020000\n",
+      "2021-09-21 21:54:27,366 epoch 8 - iter 64/81 - loss 0.04610634 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 21:54:27,881 epoch 8 - iter 72/81 - loss 0.04109137 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 21:54:28,334 epoch 8 - iter 80/81 - loss 0.07711285 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 21:54:28,382 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:28,383 EPOCH 8 done: loss 0.0765 - lr 0.0200000\n",
+      "2021-09-21 21:54:29,380 DEV : loss 0.22246916592121124 - score 0.7778\n",
+      "2021-09-21 21:54:29,382 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:54:29,383 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:30,248 epoch 9 - iter 8/81 - loss 0.10150409 - samples/sec: 9.42 - lr: 0.020000\n",
+      "2021-09-21 21:54:30,708 epoch 9 - iter 16/81 - loss 0.15708138 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:54:31,425 epoch 9 - iter 24/81 - loss 0.10506612 - samples/sec: 11.16 - lr: 0.020000\n",
+      "2021-09-21 21:54:32,031 epoch 9 - iter 32/81 - loss 0.07938637 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 21:54:32,925 epoch 9 - iter 40/81 - loss 0.06906891 - samples/sec: 8.96 - lr: 0.020000\n",
+      "2021-09-21 21:54:33,415 epoch 9 - iter 48/81 - loss 0.12488048 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 21:54:34,024 epoch 9 - iter 56/81 - loss 0.10724576 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 21:54:34,658 epoch 9 - iter 64/81 - loss 0.09394251 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 21:54:35,233 epoch 9 - iter 72/81 - loss 0.10595390 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 21:54:35,985 epoch 9 - iter 80/81 - loss 0.09541044 - samples/sec: 10.64 - lr: 0.020000\n",
+      "2021-09-21 21:54:36,056 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:36,057 EPOCH 9 done: loss 0.0942 - lr 0.0200000\n",
+      "2021-09-21 21:54:36,518 DEV : loss 0.20184962451457977 - score 0.7778\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:54:36,519 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:54:36,521 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:37,237 epoch 10 - iter 8/81 - loss 0.00069677 - samples/sec: 11.48 - lr: 0.010000\n",
+      "2021-09-21 21:54:38,395 epoch 10 - iter 16/81 - loss 0.00058964 - samples/sec: 6.91 - lr: 0.010000\n",
+      "2021-09-21 21:54:38,992 epoch 10 - iter 24/81 - loss 0.00066780 - samples/sec: 13.42 - lr: 0.010000\n",
+      "2021-09-21 21:54:39,516 epoch 10 - iter 32/81 - loss 0.00071630 - samples/sec: 15.28 - lr: 0.010000\n",
+      "2021-09-21 21:54:40,254 epoch 10 - iter 40/81 - loss 0.00071600 - samples/sec: 10.84 - lr: 0.010000\n",
+      "2021-09-21 21:54:40,841 epoch 10 - iter 48/81 - loss 0.00069720 - samples/sec: 13.65 - lr: 0.010000\n",
+      "2021-09-21 21:54:41,425 epoch 10 - iter 56/81 - loss 0.00067918 - samples/sec: 13.73 - lr: 0.010000\n",
+      "2021-09-21 21:54:42,100 epoch 10 - iter 64/81 - loss 0.00066718 - samples/sec: 11.85 - lr: 0.010000\n",
+      "2021-09-21 21:54:42,575 epoch 10 - iter 72/81 - loss 0.00083110 - samples/sec: 16.87 - lr: 0.010000\n",
+      "2021-09-21 21:54:43,202 epoch 10 - iter 80/81 - loss 0.00078069 - samples/sec: 12.79 - lr: 0.010000\n",
+      "2021-09-21 21:54:43,258 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:43,258 EPOCH 10 done: loss 0.0008 - lr 0.0100000\n",
+      "2021-09-21 21:54:43,716 DEV : loss 0.21194766461849213 - score 0.7778\n",
+      "2021-09-21 21:54:43,717 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:54:47,813 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:47,814 Testing using best model ...\n",
+      "2021-09-21 21:54:47,815 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:56:34,000 \t0.6\n",
-      "2021-09-08 01:56:34,000 \n",
+      "2021-09-21 21:54:53,172 \t0.6\n",
+      "2021-09-21 21:54:53,172 \n",
       "Results:\n",
       "- F-score (micro) 0.6\n",
-      "- F-score (macro) 0.4333\n",
+      "- F-score (macro) 0.4833\n",
       "- Accuracy 0.6\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
-      "Family & Relationships     0.0000    0.0000    0.0000         0\n",
-      "                Health     0.0000    0.0000    0.0000         2\n",
-      " Science & Mathematics     1.0000    0.5000    0.6667         2\n",
-      " Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      "Family & Relationships     0.5000    1.0000    0.6667         1\n",
+      "                Health     0.0000    0.0000    0.0000         0\n",
+      " Science & Mathematics     0.5000    1.0000    0.6667         1\n",
+      " Entertainment & Music     0.0000    0.0000    0.0000         2\n",
       "  Computers & Internet     1.0000    1.0000    1.0000         1\n",
-      " Education & Reference     1.0000    1.0000    1.0000         1\n",
-      "                Sports     1.0000    1.0000    1.0000         2\n",
-      "     Society & Culture     0.0000    0.0000    0.0000         0\n",
-      "    Business & Finance     1.0000    0.5000    0.6667         2\n",
-      " Politics & Government     0.0000    0.0000    0.0000         0\n",
+      " Education & Reference     0.0000    0.0000    0.0000         1\n",
+      "                Sports     1.0000    1.0000    1.0000         1\n",
+      "     Society & Culture     0.5000    0.5000    0.5000         2\n",
+      "    Business & Finance     0.0000    0.0000    0.0000         0\n",
+      " Politics & Government     1.0000    1.0000    1.0000         1\n",
       "\n",
       "             micro avg     0.6000    0.6000    0.6000        10\n",
-      "             macro avg     0.5000    0.4000    0.4333        10\n",
-      "          weighted avg     0.8000    0.6000    0.6667        10\n",
+      "             macro avg     0.4500    0.5500    0.4833        10\n",
+      "          weighted avg     0.5000    0.6000    0.5333        10\n",
       "           samples avg     0.6000    0.6000    0.6000        10\n",
       "\n",
-      "2021-09-08 01:56:34,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:47,790 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 21:54:53,172 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:12,513 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:56:52,059 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:55:16,798 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 9855.44it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 14217.98it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:56:52,071 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
-      "2021-09-08 01:56:52,081 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,083 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:55:16,806 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
+      "2021-09-21 21:55:16,870 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:16,872 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1507,25 +1508,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:56:52,083 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,084 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 01:56:52,084 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,084 Parameters:\n",
-      "2021-09-08 01:56:52,085  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:56:52,085  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:56:52,085  - patience: \"3\"\n",
-      "2021-09-08 01:56:52,085  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:56:52,086  - max_epochs: \"10\"\n",
-      "2021-09-08 01:56:52,086  - shuffle: \"True\"\n",
-      "2021-09-08 01:56:52,086  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:56:52,087  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:56:52,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,087 Model training base path: \"None\"\n",
-      "2021-09-08 01:56:52,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,088 Device: cuda:1\n",
-      "2021-09-08 01:56:52,088 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:52,088 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:56:52,095 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:55:16,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:16,873 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 21:55:16,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:16,874 Parameters:\n",
+      "2021-09-21 21:55:16,874  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:55:16,874  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:55:16,874  - patience: \"3\"\n",
+      "2021-09-21 21:55:16,875  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:55:16,875  - max_epochs: \"10\"\n",
+      "2021-09-21 21:55:16,875  - shuffle: \"True\"\n",
+      "2021-09-21 21:55:16,875  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:55:16,876  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:55:16,876 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:16,876 Model training base path: \"None\"\n",
+      "2021-09-21 21:55:16,877 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:16,877 Device: cuda:0\n",
+      "2021-09-21 21:55:16,877 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:16,877 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -1539,216 +1539,217 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:56:52,762 epoch 1 - iter 8/81 - loss 0.82736073 - samples/sec: 12.35 - lr: 0.020000\n",
-      "2021-09-08 01:56:53,347 epoch 1 - iter 16/81 - loss 0.48468460 - samples/sec: 13.67 - lr: 0.020000\n",
-      "2021-09-08 01:56:54,109 epoch 1 - iter 24/81 - loss 0.51471012 - samples/sec: 10.51 - lr: 0.020000\n",
-      "2021-09-08 01:56:54,584 epoch 1 - iter 32/81 - loss 0.55494186 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:56:55,084 epoch 1 - iter 40/81 - loss 0.50997860 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 01:56:55,704 epoch 1 - iter 48/81 - loss 0.59009054 - samples/sec: 12.93 - lr: 0.020000\n",
-      "2021-09-08 01:56:56,297 epoch 1 - iter 56/81 - loss 0.58155419 - samples/sec: 13.49 - lr: 0.020000\n",
-      "2021-09-08 01:56:56,757 epoch 1 - iter 64/81 - loss 0.59246459 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 01:56:57,276 epoch 1 - iter 72/81 - loss 0.58900031 - samples/sec: 15.43 - lr: 0.020000\n",
-      "2021-09-08 01:56:57,947 epoch 1 - iter 80/81 - loss 0.57185513 - samples/sec: 11.93 - lr: 0.020000\n",
-      "2021-09-08 01:56:58,009 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:58,009 EPOCH 1 done: loss 0.5668 - lr 0.0200000\n",
-      "2021-09-08 01:56:58,626 DEV : loss 0.10638042539358139 - score 0.8889\n",
-      "2021-09-08 01:56:58,628 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:55:17,653 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:18,088 epoch 1 - iter 8/81 - loss 0.55652961 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 21:55:18,589 epoch 1 - iter 16/81 - loss 0.44362413 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 21:55:19,224 epoch 1 - iter 24/81 - loss 0.43038512 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 21:55:19,639 epoch 1 - iter 32/81 - loss 0.56772094 - samples/sec: 19.33 - lr: 0.020000\n",
+      "2021-09-21 21:55:20,054 epoch 1 - iter 40/81 - loss 0.50241749 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 21:55:20,472 epoch 1 - iter 48/81 - loss 0.61594166 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 21:55:20,928 epoch 1 - iter 56/81 - loss 0.56143793 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 21:55:21,497 epoch 1 - iter 64/81 - loss 0.56679279 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 21:55:22,194 epoch 1 - iter 72/81 - loss 0.57563954 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 21:55:22,599 epoch 1 - iter 80/81 - loss 0.58486492 - samples/sec: 19.79 - lr: 0.020000\n",
+      "2021-09-21 21:55:22,650 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:22,650 EPOCH 1 done: loss 0.5809 - lr 0.0200000\n",
+      "2021-09-21 21:55:25,302 DEV : loss 0.5324191451072693 - score 0.1111\n",
+      "2021-09-21 21:55:25,304 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:57:02,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:03,264 epoch 2 - iter 8/81 - loss 0.58402921 - samples/sec: 13.20 - lr: 0.020000\n",
-      "2021-09-08 01:57:03,842 epoch 2 - iter 16/81 - loss 0.51513213 - samples/sec: 13.88 - lr: 0.020000\n",
-      "2021-09-08 01:57:04,282 epoch 2 - iter 24/81 - loss 0.44742380 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:57:04,699 epoch 2 - iter 32/81 - loss 0.34035395 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 01:57:05,155 epoch 2 - iter 40/81 - loss 0.35151456 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:57:05,613 epoch 2 - iter 48/81 - loss 0.43047378 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 01:57:06,259 epoch 2 - iter 56/81 - loss 0.39779853 - samples/sec: 12.40 - lr: 0.020000\n",
-      "2021-09-08 01:57:06,700 epoch 2 - iter 64/81 - loss 0.42090539 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 01:57:07,210 epoch 2 - iter 72/81 - loss 0.43807245 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 01:57:07,941 epoch 2 - iter 80/81 - loss 0.45976123 - samples/sec: 10.95 - lr: 0.020000\n",
-      "2021-09-08 01:57:07,992 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:07,992 EPOCH 2 done: loss 0.4625 - lr 0.0200000\n",
-      "2021-09-08 01:57:08,624 DEV : loss 0.14344453811645508 - score 0.8889\n",
-      "2021-09-08 01:57:08,626 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:57:08,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:09,365 epoch 3 - iter 8/81 - loss 0.48000655 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 01:57:09,896 epoch 3 - iter 16/81 - loss 0.27255076 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 01:57:10,385 epoch 3 - iter 24/81 - loss 0.18768035 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:57:10,933 epoch 3 - iter 32/81 - loss 0.24643016 - samples/sec: 14.62 - lr: 0.020000\n",
-      "2021-09-08 01:57:11,367 epoch 3 - iter 40/81 - loss 0.27665699 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 01:57:11,814 epoch 3 - iter 48/81 - loss 0.29959280 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 01:57:12,266 epoch 3 - iter 56/81 - loss 0.28724138 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 01:57:12,741 epoch 3 - iter 64/81 - loss 0.36378695 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 01:57:13,274 epoch 3 - iter 72/81 - loss 0.34768122 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 01:57:13,732 epoch 3 - iter 80/81 - loss 0.36412187 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:57:13,786 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:13,787 EPOCH 3 done: loss 0.3597 - lr 0.0200000\n",
-      "2021-09-08 01:57:14,444 DEV : loss 0.10884228348731995 - score 1.0\n",
-      "2021-09-08 01:57:14,446 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:55:30,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:30,695 epoch 2 - iter 8/81 - loss 0.64064121 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 21:55:31,146 epoch 2 - iter 16/81 - loss 0.52043485 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 21:55:31,597 epoch 2 - iter 24/81 - loss 0.53341714 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:55:32,113 epoch 2 - iter 32/81 - loss 0.45003975 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:55:32,698 epoch 2 - iter 40/81 - loss 0.44337315 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 21:55:33,131 epoch 2 - iter 48/81 - loss 0.39307888 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 21:55:33,673 epoch 2 - iter 56/81 - loss 0.38365006 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 21:55:34,205 epoch 2 - iter 64/81 - loss 0.39477043 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 21:55:34,731 epoch 2 - iter 72/81 - loss 0.40681942 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 21:55:35,312 epoch 2 - iter 80/81 - loss 0.40886268 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 21:55:35,380 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:35,380 EPOCH 2 done: loss 0.4044 - lr 0.0200000\n",
+      "2021-09-21 21:55:35,943 DEV : loss 0.36102187633514404 - score 0.4444\n",
+      "2021-09-21 21:55:35,944 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:57:18,246 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:18,778 epoch 4 - iter 8/81 - loss 0.04199211 - samples/sec: 15.50 - lr: 0.020000\n",
-      "2021-09-08 01:57:19,345 epoch 4 - iter 16/81 - loss 0.02425969 - samples/sec: 14.14 - lr: 0.020000\n",
-      "2021-09-08 01:57:19,796 epoch 4 - iter 24/81 - loss 0.02055319 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 01:57:20,256 epoch 4 - iter 32/81 - loss 0.27991283 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 01:57:20,747 epoch 4 - iter 40/81 - loss 0.22564006 - samples/sec: 16.30 - lr: 0.020000\n",
-      "2021-09-08 01:57:21,254 epoch 4 - iter 48/81 - loss 0.22052695 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 01:57:21,809 epoch 4 - iter 56/81 - loss 0.21306971 - samples/sec: 14.43 - lr: 0.020000\n",
-      "2021-09-08 01:57:22,301 epoch 4 - iter 64/81 - loss 0.20396685 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 01:57:22,762 epoch 4 - iter 72/81 - loss 0.27207639 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 01:57:23,206 epoch 4 - iter 80/81 - loss 0.29549701 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 01:57:23,296 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:23,296 EPOCH 4 done: loss 0.2919 - lr 0.0200000\n",
-      "2021-09-08 01:57:23,800 DEV : loss 0.07628563791513443 - score 1.0\n",
-      "2021-09-08 01:57:23,802 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:55:40,012 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:40,491 epoch 3 - iter 8/81 - loss 0.25689579 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:55:41,024 epoch 3 - iter 16/81 - loss 0.15796533 - samples/sec: 15.02 - lr: 0.020000\n",
+      "2021-09-21 21:55:41,506 epoch 3 - iter 24/81 - loss 0.18209192 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 21:55:41,990 epoch 3 - iter 32/81 - loss 0.25937168 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:55:42,697 epoch 3 - iter 40/81 - loss 0.22407624 - samples/sec: 11.32 - lr: 0.020000\n",
+      "2021-09-21 21:55:43,484 epoch 3 - iter 48/81 - loss 0.23102667 - samples/sec: 10.17 - lr: 0.020000\n",
+      "2021-09-21 21:55:44,078 epoch 3 - iter 56/81 - loss 0.24087334 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 21:55:44,522 epoch 3 - iter 64/81 - loss 0.21711399 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 21:55:45,111 epoch 3 - iter 72/81 - loss 0.23623837 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 21:55:45,661 epoch 3 - iter 80/81 - loss 0.21809195 - samples/sec: 14.57 - lr: 0.020000\n",
+      "2021-09-21 21:55:45,719 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:45,720 EPOCH 3 done: loss 0.2154 - lr 0.0200000\n",
+      "2021-09-21 21:55:46,196 DEV : loss 0.5178917050361633 - score 0.4444\n",
+      "2021-09-21 21:55:46,198 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:55:46,200 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:46,671 epoch 4 - iter 8/81 - loss 0.19377407 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:55:47,045 epoch 4 - iter 16/81 - loss 0.21899406 - samples/sec: 21.41 - lr: 0.020000\n",
+      "2021-09-21 21:55:47,496 epoch 4 - iter 24/81 - loss 0.16049155 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:55:47,957 epoch 4 - iter 32/81 - loss 0.17962835 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 21:55:48,480 epoch 4 - iter 40/81 - loss 0.19493916 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 21:55:49,045 epoch 4 - iter 48/81 - loss 0.25578733 - samples/sec: 14.18 - lr: 0.020000\n",
+      "2021-09-21 21:55:49,426 epoch 4 - iter 56/81 - loss 0.23131992 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 21:55:49,973 epoch 4 - iter 64/81 - loss 0.22086512 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 21:55:50,380 epoch 4 - iter 72/81 - loss 0.22039906 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 21:55:51,186 epoch 4 - iter 80/81 - loss 0.21080117 - samples/sec: 9.93 - lr: 0.020000\n",
+      "2021-09-21 21:55:51,248 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:51,249 EPOCH 4 done: loss 0.2082 - lr 0.0200000\n",
+      "2021-09-21 21:55:51,925 DEV : loss 0.6881029605865479 - score 0.4444\n",
+      "2021-09-21 21:55:51,926 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:55:51,928 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:52,508 epoch 5 - iter 8/81 - loss 0.06288492 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 21:55:53,055 epoch 5 - iter 16/81 - loss 0.11765706 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 21:55:53,673 epoch 5 - iter 24/81 - loss 0.08861224 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 21:55:54,261 epoch 5 - iter 32/81 - loss 0.07045306 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 21:55:55,009 epoch 5 - iter 40/81 - loss 0.12031047 - samples/sec: 10.71 - lr: 0.020000\n",
+      "2021-09-21 21:55:56,120 epoch 5 - iter 48/81 - loss 0.18056498 - samples/sec: 7.20 - lr: 0.020000\n",
+      "2021-09-21 21:55:56,685 epoch 5 - iter 56/81 - loss 0.15600165 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 21:55:57,270 epoch 5 - iter 64/81 - loss 0.13695176 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 21:55:57,861 epoch 5 - iter 72/81 - loss 0.13426499 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 21:55:58,413 epoch 5 - iter 80/81 - loss 0.14076041 - samples/sec: 14.50 - lr: 0.020000\n",
+      "2021-09-21 21:55:58,470 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:58,470 EPOCH 5 done: loss 0.1390 - lr 0.0200000\n",
+      "2021-09-21 21:55:58,947 DEV : loss 0.8500140905380249 - score 0.5556\n",
+      "2021-09-21 21:55:58,949 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:57:30,674 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:31,130 epoch 5 - iter 8/81 - loss 0.49691788 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 01:57:31,526 epoch 5 - iter 16/81 - loss 0.28827478 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 01:57:31,962 epoch 5 - iter 24/81 - loss 0.20142621 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 01:57:32,404 epoch 5 - iter 32/81 - loss 0.24149742 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:57:32,942 epoch 5 - iter 40/81 - loss 0.24891516 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 01:57:33,473 epoch 5 - iter 48/81 - loss 0.20909806 - samples/sec: 15.09 - lr: 0.020000\n",
-      "2021-09-08 01:57:34,138 epoch 5 - iter 56/81 - loss 0.20593561 - samples/sec: 12.05 - lr: 0.020000\n",
-      "2021-09-08 01:57:34,593 epoch 5 - iter 64/81 - loss 0.23793223 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 01:57:35,131 epoch 5 - iter 72/81 - loss 0.21362304 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 01:57:35,686 epoch 5 - iter 80/81 - loss 0.20975554 - samples/sec: 14.43 - lr: 0.020000\n",
-      "2021-09-08 01:57:35,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:35,734 EPOCH 5 done: loss 0.2072 - lr 0.0200000\n",
-      "2021-09-08 01:57:36,502 DEV : loss 0.11321542412042618 - score 0.8889\n",
-      "2021-09-08 01:57:36,503 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:57:36,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:37,145 epoch 6 - iter 8/81 - loss 0.00580212 - samples/sec: 14.39 - lr: 0.020000\n",
-      "2021-09-08 01:57:37,595 epoch 6 - iter 16/81 - loss 0.00545309 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 01:57:37,996 epoch 6 - iter 24/81 - loss 0.03632281 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 01:57:38,503 epoch 6 - iter 32/81 - loss 0.07742686 - samples/sec: 15.79 - lr: 0.020000\n",
-      "2021-09-08 01:57:38,959 epoch 6 - iter 40/81 - loss 0.07791334 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 01:57:39,437 epoch 6 - iter 48/81 - loss 0.06526576 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 01:57:40,061 epoch 6 - iter 56/81 - loss 0.09800148 - samples/sec: 12.84 - lr: 0.020000\n",
-      "2021-09-08 01:57:40,574 epoch 6 - iter 64/81 - loss 0.11965349 - samples/sec: 15.62 - lr: 0.020000\n",
-      "2021-09-08 01:57:41,115 epoch 6 - iter 72/81 - loss 0.18116678 - samples/sec: 14.79 - lr: 0.020000\n",
-      "2021-09-08 01:57:41,520 epoch 6 - iter 80/81 - loss 0.16857011 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 01:57:41,568 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:56:05,284 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:06,055 epoch 6 - iter 8/81 - loss 0.00126751 - samples/sec: 10.71 - lr: 0.020000\n",
+      "2021-09-21 21:56:06,679 epoch 6 - iter 16/81 - loss 0.12021368 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 21:56:07,297 epoch 6 - iter 24/81 - loss 0.15153173 - samples/sec: 12.96 - lr: 0.020000\n",
+      "2021-09-21 21:56:07,923 epoch 6 - iter 32/81 - loss 0.15516103 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 21:56:08,703 epoch 6 - iter 40/81 - loss 0.12438928 - samples/sec: 10.27 - lr: 0.020000\n",
+      "2021-09-21 21:56:09,272 epoch 6 - iter 48/81 - loss 0.10377909 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 21:56:10,009 epoch 6 - iter 56/81 - loss 0.11775028 - samples/sec: 10.87 - lr: 0.020000\n",
+      "2021-09-21 21:56:10,527 epoch 6 - iter 64/81 - loss 0.10423726 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 21:56:11,193 epoch 6 - iter 72/81 - loss 0.10249978 - samples/sec: 12.03 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:57:41,568 EPOCH 6 done: loss 0.1666 - lr 0.0200000\n",
-      "2021-09-08 01:57:42,065 DEV : loss 0.031762368977069855 - score 1.0\n",
-      "2021-09-08 01:57:42,067 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:56:11,745 epoch 6 - iter 80/81 - loss 0.09239532 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 21:56:11,793 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:11,793 EPOCH 6 done: loss 0.0913 - lr 0.0200000\n",
+      "2021-09-21 21:56:12,414 DEV : loss 0.8035917282104492 - score 0.5556\n",
+      "2021-09-21 21:56:12,416 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:57:46,266 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:46,736 epoch 7 - iter 8/81 - loss 0.49177344 - samples/sec: 17.70 - lr: 0.020000\n",
-      "2021-09-08 01:57:47,235 epoch 7 - iter 16/81 - loss 0.24799777 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 01:57:47,768 epoch 7 - iter 24/81 - loss 0.16680450 - samples/sec: 15.02 - lr: 0.020000\n",
-      "2021-09-08 01:57:48,188 epoch 7 - iter 32/81 - loss 0.17831306 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 01:57:49,009 epoch 7 - iter 40/81 - loss 0.18160818 - samples/sec: 9.75 - lr: 0.020000\n",
-      "2021-09-08 01:57:49,463 epoch 7 - iter 48/81 - loss 0.17763750 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 01:57:50,029 epoch 7 - iter 56/81 - loss 0.15269553 - samples/sec: 14.17 - lr: 0.020000\n",
-      "2021-09-08 01:57:50,587 epoch 7 - iter 64/81 - loss 0.15488713 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 01:57:50,976 epoch 7 - iter 72/81 - loss 0.14575440 - samples/sec: 20.58 - lr: 0.020000\n",
-      "2021-09-08 01:57:51,389 epoch 7 - iter 80/81 - loss 0.13139584 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 01:57:51,437 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:51,437 EPOCH 7 done: loss 0.1298 - lr 0.0200000\n",
-      "2021-09-08 01:57:51,932 DEV : loss 0.13602551817893982 - score 0.7778\n",
-      "2021-09-08 01:57:51,934 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:57:51,936 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:52,322 epoch 8 - iter 8/81 - loss 0.00169437 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 01:57:52,722 epoch 8 - iter 16/81 - loss 0.00206065 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 01:57:53,126 epoch 8 - iter 24/81 - loss 0.06002103 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 01:57:53,622 epoch 8 - iter 32/81 - loss 0.04522951 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:57:54,143 epoch 8 - iter 40/81 - loss 0.03780874 - samples/sec: 15.38 - lr: 0.020000\n",
-      "2021-09-08 01:57:54,535 epoch 8 - iter 48/81 - loss 0.03503381 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 01:57:55,236 epoch 8 - iter 56/81 - loss 0.09616784 - samples/sec: 11.43 - lr: 0.020000\n",
-      "2021-09-08 01:57:55,747 epoch 8 - iter 64/81 - loss 0.08629096 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 01:57:56,183 epoch 8 - iter 72/81 - loss 0.07711174 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 01:57:56,847 epoch 8 - iter 80/81 - loss 0.06964168 - samples/sec: 12.08 - lr: 0.020000\n",
-      "2021-09-08 01:57:56,895 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:56,895 EPOCH 8 done: loss 0.0688 - lr 0.0200000\n",
-      "2021-09-08 01:57:57,525 DEV : loss 0.13713237643241882 - score 0.8889\n",
-      "2021-09-08 01:57:57,527 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:57:57,531 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:57,960 epoch 9 - iter 8/81 - loss 0.15465888 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 01:57:58,451 epoch 9 - iter 16/81 - loss 0.07900406 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:57:59,087 epoch 9 - iter 24/81 - loss 0.06213519 - samples/sec: 12.59 - lr: 0.020000\n",
-      "2021-09-08 01:57:59,622 epoch 9 - iter 32/81 - loss 0.04678862 - samples/sec: 14.98 - lr: 0.020000\n",
-      "2021-09-08 01:58:00,163 epoch 9 - iter 40/81 - loss 0.03821266 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 01:58:00,650 epoch 9 - iter 48/81 - loss 0.03541205 - samples/sec: 16.44 - lr: 0.020000\n",
-      "2021-09-08 01:58:01,030 epoch 9 - iter 56/81 - loss 0.03038505 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 01:58:01,491 epoch 9 - iter 64/81 - loss 0.06471519 - samples/sec: 17.37 - lr: 0.020000\n",
-      "2021-09-08 01:58:01,872 epoch 9 - iter 72/81 - loss 0.05761118 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 01:58:02,447 epoch 9 - iter 80/81 - loss 0.05192347 - samples/sec: 13.93 - lr: 0.020000\n",
-      "2021-09-08 01:58:02,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:02,498 EPOCH 9 done: loss 0.0513 - lr 0.0200000\n",
-      "2021-09-08 01:58:02,992 DEV : loss 0.0617423914372921 - score 1.0\n",
-      "2021-09-08 01:58:02,994 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:58:02,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:03,428 epoch 10 - iter 8/81 - loss 0.00028029 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 01:58:03,849 epoch 10 - iter 16/81 - loss 0.01448107 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 01:58:04,381 epoch 10 - iter 24/81 - loss 0.11515937 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 01:58:04,872 epoch 10 - iter 32/81 - loss 0.08655590 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:58:05,480 epoch 10 - iter 40/81 - loss 0.06934311 - samples/sec: 13.18 - lr: 0.020000\n",
-      "2021-09-08 01:58:05,956 epoch 10 - iter 48/81 - loss 0.06422642 - samples/sec: 16.85 - lr: 0.020000\n",
-      "2021-09-08 01:58:06,442 epoch 10 - iter 56/81 - loss 0.05516820 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:58:07,232 epoch 10 - iter 64/81 - loss 0.04831373 - samples/sec: 10.14 - lr: 0.020000\n",
-      "2021-09-08 01:58:07,625 epoch 10 - iter 72/81 - loss 0.04312238 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 01:58:08,030 epoch 10 - iter 80/81 - loss 0.03884683 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 01:58:08,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:08,078 EPOCH 10 done: loss 0.0384 - lr 0.0200000\n",
-      "2021-09-08 01:58:08,581 DEV : loss 0.05992256850004196 - score 1.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:58:08,583 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:58:12,354 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:12,355 Testing using best model ...\n",
-      "2021-09-08 01:58:12,357 loading file None/best-model.pt\n",
+      "2021-09-21 21:56:16,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:17,001 epoch 7 - iter 8/81 - loss 0.00047268 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 21:56:17,650 epoch 7 - iter 16/81 - loss 0.03289276 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:56:18,297 epoch 7 - iter 24/81 - loss 0.06755357 - samples/sec: 12.37 - lr: 0.020000\n",
+      "2021-09-21 21:56:18,936 epoch 7 - iter 32/81 - loss 0.05321971 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 21:56:19,414 epoch 7 - iter 40/81 - loss 0.16235233 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:56:20,030 epoch 7 - iter 48/81 - loss 0.13558684 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 21:56:20,694 epoch 7 - iter 56/81 - loss 0.11629019 - samples/sec: 12.07 - lr: 0.020000\n",
+      "2021-09-21 21:56:21,065 epoch 7 - iter 64/81 - loss 0.14999944 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 21:56:21,544 epoch 7 - iter 72/81 - loss 0.13627058 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 21:56:21,976 epoch 7 - iter 80/81 - loss 0.12269173 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 21:56:22,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:22,048 EPOCH 7 done: loss 0.1212 - lr 0.0200000\n",
+      "2021-09-21 21:56:22,631 DEV : loss 0.6294692158699036 - score 0.4444\n",
+      "2021-09-21 21:56:22,633 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:56:22,710 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:23,127 epoch 8 - iter 8/81 - loss 0.34692004 - samples/sec: 19.88 - lr: 0.020000\n",
+      "2021-09-21 21:56:23,511 epoch 8 - iter 16/81 - loss 0.18577188 - samples/sec: 20.86 - lr: 0.020000\n",
+      "2021-09-21 21:56:24,233 epoch 8 - iter 24/81 - loss 0.12795456 - samples/sec: 11.08 - lr: 0.020000\n",
+      "2021-09-21 21:56:24,879 epoch 8 - iter 32/81 - loss 0.09658998 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:56:25,473 epoch 8 - iter 40/81 - loss 0.12886915 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:56:26,210 epoch 8 - iter 48/81 - loss 0.11580403 - samples/sec: 10.88 - lr: 0.020000\n",
+      "2021-09-21 21:56:26,863 epoch 8 - iter 56/81 - loss 0.13114956 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 21:56:27,605 epoch 8 - iter 64/81 - loss 0.11487928 - samples/sec: 10.79 - lr: 0.020000\n",
+      "2021-09-21 21:56:28,222 epoch 8 - iter 72/81 - loss 0.10233366 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 21:56:28,749 epoch 8 - iter 80/81 - loss 0.09217220 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 21:56:28,825 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:28,825 EPOCH 8 done: loss 0.0910 - lr 0.0200000\n",
+      "2021-09-21 21:56:29,512 DEV : loss 0.7461881041526794 - score 0.5556\n",
+      "2021-09-21 21:56:29,513 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:56:33,572 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:34,234 epoch 9 - iter 8/81 - loss 0.01123926 - samples/sec: 12.46 - lr: 0.020000\n",
+      "2021-09-21 21:56:35,009 epoch 9 - iter 16/81 - loss 0.05053418 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 21:56:35,665 epoch 9 - iter 24/81 - loss 0.03381410 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 21:56:36,352 epoch 9 - iter 32/81 - loss 0.02634763 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 21:56:36,868 epoch 9 - iter 40/81 - loss 0.02115478 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 21:56:37,440 epoch 9 - iter 48/81 - loss 0.01809738 - samples/sec: 13.99 - lr: 0.020000\n",
+      "2021-09-21 21:56:38,125 epoch 9 - iter 56/81 - loss 0.01554868 - samples/sec: 11.71 - lr: 0.020000\n",
+      "2021-09-21 21:56:38,671 epoch 9 - iter 64/81 - loss 0.01686340 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 21:56:39,089 epoch 9 - iter 72/81 - loss 0.01507566 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 21:56:39,580 epoch 9 - iter 80/81 - loss 0.01500156 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 21:56:39,647 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:39,647 EPOCH 9 done: loss 0.0148 - lr 0.0200000\n",
+      "2021-09-21 21:56:40,193 DEV : loss 0.7625837326049805 - score 0.5556\n",
+      "2021-09-21 21:56:40,194 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:56:40,265 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:40,691 epoch 10 - iter 8/81 - loss 0.00070881 - samples/sec: 19.41 - lr: 0.020000\n",
+      "2021-09-21 21:56:41,235 epoch 10 - iter 16/81 - loss 0.00071485 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 21:56:41,621 epoch 10 - iter 24/81 - loss 0.00060808 - samples/sec: 20.80 - lr: 0.020000\n",
+      "2021-09-21 21:56:42,059 epoch 10 - iter 32/81 - loss 0.00053911 - samples/sec: 18.27 - lr: 0.020000\n",
+      "2021-09-21 21:56:42,503 epoch 10 - iter 40/81 - loss 0.00047530 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 21:56:43,078 epoch 10 - iter 48/81 - loss 0.00045019 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 21:56:43,890 epoch 10 - iter 56/81 - loss 0.00044766 - samples/sec: 9.86 - lr: 0.020000\n",
+      "2021-09-21 21:56:44,398 epoch 10 - iter 64/81 - loss 0.00041888 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:56:45,138 epoch 10 - iter 72/81 - loss 0.00040285 - samples/sec: 10.81 - lr: 0.020000\n",
+      "2021-09-21 21:56:45,733 epoch 10 - iter 80/81 - loss 0.06950165 - samples/sec: 13.46 - lr: 0.020000\n",
+      "2021-09-21 21:56:45,785 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:45,786 EPOCH 10 done: loss 0.0686 - lr 0.0200000\n",
+      "2021-09-21 21:56:46,480 DEV : loss 0.8342717289924622 - score 0.5556\n",
+      "2021-09-21 21:56:46,482 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:56:50,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:50,404 Testing using best model ...\n",
+      "2021-09-21 21:56:50,406 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:58:17,658 \t0.3\n",
-      "2021-09-08 01:58:17,659 \n",
+      "2021-09-21 21:56:55,839 \t0.7\n",
+      "2021-09-21 21:56:55,839 \n",
       "Results:\n",
-      "- F-score (micro) 0.3\n",
-      "- F-score (macro) 0.15\n",
-      "- Accuracy 0.3\n",
+      "- F-score (micro) 0.7\n",
+      "- F-score (macro) 0.4333\n",
+      "- Accuracy 0.7\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
       "Family & Relationships     0.0000    0.0000    0.0000         0\n",
-      "                Health     0.3333    1.0000    0.5000         1\n",
-      " Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      "                Health     1.0000    1.0000    1.0000         1\n",
+      " Science & Mathematics     1.0000    1.0000    1.0000         1\n",
       " Entertainment & Music     0.0000    0.0000    0.0000         1\n",
-      "  Computers & Internet     1.0000    1.0000    1.0000         2\n",
+      "  Computers & Internet     0.5000    1.0000    0.6667         1\n",
       " Education & Reference     0.0000    0.0000    0.0000         0\n",
-      "                Sports     0.0000    0.0000    0.0000         0\n",
-      "     Society & Culture     0.0000    0.0000    0.0000         1\n",
-      "    Business & Finance     0.0000    0.0000    0.0000         5\n",
-      " Politics & Government     0.0000    0.0000    0.0000         0\n",
+      "                Sports     1.0000    1.0000    1.0000         2\n",
+      "     Society & Culture     0.0000    0.0000    0.0000         0\n",
+      "    Business & Finance     0.0000    0.0000    0.0000         1\n",
+      " Politics & Government     0.6667    0.6667    0.6667         3\n",
       "\n",
-      "             micro avg     0.3000    0.3000    0.3000        10\n",
-      "             macro avg     0.1333    0.2000    0.1500        10\n",
-      "          weighted avg     0.2333    0.3000    0.2500        10\n",
-      "           samples avg     0.3000    0.3000    0.3000        10\n",
+      "             micro avg     0.7000    0.7000    0.7000        10\n",
+      "             macro avg     0.4167    0.4667    0.4333        10\n",
+      "          weighted avg     0.6500    0.7000    0.6667        10\n",
+      "           samples avg     0.7000    0.7000    0.7000        10\n",
       "\n",
-      "2021-09-08 01:58:17,660 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:31,579 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 21:56:55,839 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:14,693 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:58:36,164 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:57:18,813 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 12525.65it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 13982.04it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:36,173 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
-      "2021-09-08 01:58:36,324 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:36,326 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:57:18,821 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
+      "2021-09-21 21:57:18,831 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:18,833 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2061,24 +2062,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:36,326 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:36,327 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 01:58:36,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:36,327 Parameters:\n",
-      "2021-09-08 01:58:36,328  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:58:36,328  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:58:36,328  - patience: \"3\"\n",
-      "2021-09-08 01:58:36,328  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:58:36,329  - max_epochs: \"10\"\n",
-      "2021-09-08 01:58:36,329  - shuffle: \"True\"\n",
-      "2021-09-08 01:58:36,329  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:58:36,329  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:58:36,330 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:36,330 Model training base path: \"None\"\n",
-      "2021-09-08 01:58:36,330 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:36,331 Device: cuda:1\n",
-      "2021-09-08 01:58:36,331 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:36,331 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:57:18,833 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:18,834 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 21:57:18,834 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:18,834 Parameters:\n",
+      "2021-09-21 21:57:18,835  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:57:18,835  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:57:18,835  - patience: \"3\"\n",
+      "2021-09-21 21:57:18,835  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:57:18,836  - max_epochs: \"10\"\n",
+      "2021-09-21 21:57:18,836  - shuffle: \"True\"\n",
+      "2021-09-21 21:57:18,836  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:57:18,837  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:57:18,837 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:18,837 Model training base path: \"None\"\n",
+      "2021-09-21 21:57:18,837 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:18,838 Device: cuda:0\n",
+      "2021-09-21 21:57:18,838 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:18,838 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:57:18,845 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2092,216 +2094,214 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:36,541 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:37,023 epoch 1 - iter 8/81 - loss 0.51554067 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 01:58:37,439 epoch 1 - iter 16/81 - loss 0.30535721 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 01:58:37,943 epoch 1 - iter 24/81 - loss 0.43634084 - samples/sec: 15.89 - lr: 0.020000\n",
-      "2021-09-08 01:58:38,661 epoch 1 - iter 32/81 - loss 0.41108488 - samples/sec: 11.15 - lr: 0.020000\n",
-      "2021-09-08 01:58:39,072 epoch 1 - iter 40/81 - loss 0.43422812 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 01:58:39,496 epoch 1 - iter 48/81 - loss 0.57671465 - samples/sec: 18.93 - lr: 0.020000\n",
-      "2021-09-08 01:58:40,189 epoch 1 - iter 56/81 - loss 0.54917408 - samples/sec: 11.55 - lr: 0.020000\n",
-      "2021-09-08 01:58:40,736 epoch 1 - iter 64/81 - loss 0.55363413 - samples/sec: 14.65 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,257 epoch 1 - iter 72/81 - loss 0.56767611 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,824 epoch 1 - iter 80/81 - loss 0.54087442 - samples/sec: 14.12 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,875 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:41,876 EPOCH 1 done: loss 0.5516 - lr 0.0200000\n",
-      "2021-09-08 01:58:42,468 DEV : loss 0.32463932037353516 - score 0.5556\n",
-      "2021-09-08 01:58:42,469 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:57:19,424 epoch 1 - iter 8/81 - loss 0.83507779 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 21:57:20,138 epoch 1 - iter 16/81 - loss 0.61465545 - samples/sec: 11.21 - lr: 0.020000\n",
+      "2021-09-21 21:57:20,845 epoch 1 - iter 24/81 - loss 0.48725996 - samples/sec: 11.33 - lr: 0.020000\n",
+      "2021-09-21 21:57:21,841 epoch 1 - iter 32/81 - loss 0.48079402 - samples/sec: 8.04 - lr: 0.020000\n",
+      "2021-09-21 21:57:22,287 epoch 1 - iter 40/81 - loss 0.41812385 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 21:57:22,756 epoch 1 - iter 48/81 - loss 0.50097044 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 21:57:23,267 epoch 1 - iter 56/81 - loss 0.49493178 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 21:57:23,824 epoch 1 - iter 64/81 - loss 0.51015503 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 21:57:24,417 epoch 1 - iter 72/81 - loss 0.51509037 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 21:57:24,915 epoch 1 - iter 80/81 - loss 0.51617386 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 21:57:24,971 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:24,972 EPOCH 1 done: loss 0.5245 - lr 0.0200000\n",
+      "2021-09-21 21:57:25,405 DEV : loss 0.22442027926445007 - score 0.6667\n",
+      "2021-09-21 21:57:25,406 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:58:49,070 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:49,769 epoch 2 - iter 8/81 - loss 0.82030779 - samples/sec: 11.72 - lr: 0.020000\n",
-      "2021-09-08 01:58:50,339 epoch 2 - iter 16/81 - loss 0.71816436 - samples/sec: 14.06 - lr: 0.020000\n",
-      "2021-09-08 01:58:50,726 epoch 2 - iter 24/81 - loss 0.59531988 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 01:58:51,253 epoch 2 - iter 32/81 - loss 0.50543809 - samples/sec: 15.20 - lr: 0.020000\n",
-      "2021-09-08 01:58:51,731 epoch 2 - iter 40/81 - loss 0.56554177 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 01:58:52,137 epoch 2 - iter 48/81 - loss 0.50250120 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 01:58:52,728 epoch 2 - iter 56/81 - loss 0.49744474 - samples/sec: 13.54 - lr: 0.020000\n",
-      "2021-09-08 01:58:53,207 epoch 2 - iter 64/81 - loss 0.46220242 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 01:58:53,654 epoch 2 - iter 72/81 - loss 0.42355687 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 01:58:54,104 epoch 2 - iter 80/81 - loss 0.42550397 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 01:58:54,171 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:54,172 EPOCH 2 done: loss 0.4302 - lr 0.0200000\n",
-      "2021-09-08 01:58:54,938 DEV : loss 0.2654297947883606 - score 0.7778\n",
-      "2021-09-08 01:58:54,940 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:58:58,649 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:59,115 epoch 3 - iter 8/81 - loss 0.06433895 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,607 epoch 3 - iter 16/81 - loss 0.11004854 - samples/sec: 16.29 - lr: 0.020000\n",
-      "2021-09-08 01:59:00,034 epoch 3 - iter 24/81 - loss 0.25786330 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 01:59:00,497 epoch 3 - iter 32/81 - loss 0.31006415 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 01:59:01,134 epoch 3 - iter 40/81 - loss 0.33676085 - samples/sec: 12.58 - lr: 0.020000\n",
-      "2021-09-08 01:59:01,712 epoch 3 - iter 48/81 - loss 0.31755468 - samples/sec: 13.87 - lr: 0.020000\n",
-      "2021-09-08 01:59:02,183 epoch 3 - iter 56/81 - loss 0.36572384 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 01:59:02,629 epoch 3 - iter 64/81 - loss 0.32688644 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:59:03,115 epoch 3 - iter 72/81 - loss 0.37560956 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 01:59:03,593 epoch 3 - iter 80/81 - loss 0.35120026 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 01:59:03,738 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:03,739 EPOCH 3 done: loss 0.3469 - lr 0.0200000\n",
-      "2021-09-08 01:59:04,467 DEV : loss 0.19168822467327118 - score 0.7778\n",
-      "2021-09-08 01:59:04,469 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:59:09,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:10,205 epoch 4 - iter 8/81 - loss 0.19489007 - samples/sec: 13.56 - lr: 0.020000\n",
-      "2021-09-08 01:59:10,703 epoch 4 - iter 16/81 - loss 0.19086064 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:59:11,165 epoch 4 - iter 24/81 - loss 0.19060074 - samples/sec: 17.37 - lr: 0.020000\n",
-      "2021-09-08 01:59:11,585 epoch 4 - iter 32/81 - loss 0.18956000 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 01:59:12,288 epoch 4 - iter 40/81 - loss 0.16940749 - samples/sec: 11.39 - lr: 0.020000\n",
-      "2021-09-08 01:59:12,835 epoch 4 - iter 48/81 - loss 0.20179382 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 01:59:13,394 epoch 4 - iter 56/81 - loss 0.18384975 - samples/sec: 14.34 - lr: 0.020000\n",
-      "2021-09-08 01:59:13,921 epoch 4 - iter 64/81 - loss 0.24909398 - samples/sec: 15.23 - lr: 0.020000\n",
-      "2021-09-08 01:59:14,305 epoch 4 - iter 72/81 - loss 0.23946506 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 01:59:14,878 epoch 4 - iter 80/81 - loss 0.26320068 - samples/sec: 13.98 - lr: 0.020000\n",
-      "2021-09-08 01:59:14,937 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:14,937 EPOCH 4 done: loss 0.2607 - lr 0.0200000\n",
-      "2021-09-08 01:59:15,556 DEV : loss 0.7865927815437317 - score 0.6667\n",
-      "2021-09-08 01:59:15,558 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:59:15,560 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:16,048 epoch 5 - iter 8/81 - loss 0.24827446 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 01:59:16,605 epoch 5 - iter 16/81 - loss 0.28274495 - samples/sec: 14.38 - lr: 0.020000\n",
-      "2021-09-08 01:59:17,078 epoch 5 - iter 24/81 - loss 0.19697837 - samples/sec: 16.95 - lr: 0.020000\n",
-      "2021-09-08 01:59:17,501 epoch 5 - iter 32/81 - loss 0.14830721 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 01:59:17,988 epoch 5 - iter 40/81 - loss 0.14827817 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 01:59:18,541 epoch 5 - iter 48/81 - loss 0.12420854 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 01:59:19,050 epoch 5 - iter 56/81 - loss 0.15724084 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 01:59:19,536 epoch 5 - iter 64/81 - loss 0.14420321 - samples/sec: 16.50 - lr: 0.020000\n",
-      "2021-09-08 01:59:20,243 epoch 5 - iter 72/81 - loss 0.15546449 - samples/sec: 11.31 - lr: 0.020000\n",
-      "2021-09-08 01:59:20,687 epoch 5 - iter 80/81 - loss 0.14152681 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 01:59:20,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:20,737 EPOCH 5 done: loss 0.1398 - lr 0.0200000\n",
-      "2021-09-08 01:59:21,526 DEV : loss 0.2933676242828369 - score 0.7778\n",
-      "2021-09-08 01:59:21,528 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:59:21,530 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:22,064 epoch 6 - iter 8/81 - loss 0.00769523 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 01:59:22,474 epoch 6 - iter 16/81 - loss 0.01410396 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 01:59:22,982 epoch 6 - iter 24/81 - loss 0.00989885 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 01:59:23,387 epoch 6 - iter 32/81 - loss 0.01019962 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 01:59:24,020 epoch 6 - iter 40/81 - loss 0.04920360 - samples/sec: 12.66 - lr: 0.020000\n",
-      "2021-09-08 01:59:24,506 epoch 6 - iter 48/81 - loss 0.04115642 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 01:59:24,945 epoch 6 - iter 56/81 - loss 0.03555689 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 01:59:25,497 epoch 6 - iter 64/81 - loss 0.04468019 - samples/sec: 14.51 - lr: 0.020000\n",
-      "2021-09-08 01:59:25,877 epoch 6 - iter 72/81 - loss 0.07031271 - samples/sec: 21.08 - lr: 0.020000\n"
+      "2021-09-21 21:57:31,347 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:31,823 epoch 2 - iter 8/81 - loss 0.30732209 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 21:57:32,361 epoch 2 - iter 16/81 - loss 0.36457098 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 21:57:32,795 epoch 2 - iter 24/81 - loss 0.36434835 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 21:57:33,228 epoch 2 - iter 32/81 - loss 0.32212456 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 21:57:33,610 epoch 2 - iter 40/81 - loss 0.32233945 - samples/sec: 20.99 - lr: 0.020000\n",
+      "2021-09-21 21:57:34,250 epoch 2 - iter 48/81 - loss 0.35205655 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 21:57:34,887 epoch 2 - iter 56/81 - loss 0.35630973 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 21:57:35,437 epoch 2 - iter 64/81 - loss 0.40536764 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 21:57:36,086 epoch 2 - iter 72/81 - loss 0.40414246 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 21:57:36,856 epoch 2 - iter 80/81 - loss 0.39587814 - samples/sec: 10.40 - lr: 0.020000\n",
+      "2021-09-21 21:57:36,920 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:36,920 EPOCH 2 done: loss 0.3992 - lr 0.0200000\n",
+      "2021-09-21 21:57:37,362 DEV : loss 0.4586819112300873 - score 0.6667\n",
+      "2021-09-21 21:57:37,364 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:57:37,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:38,051 epoch 3 - iter 8/81 - loss 0.11921511 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 21:57:38,651 epoch 3 - iter 16/81 - loss 0.22449604 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 21:57:39,099 epoch 3 - iter 24/81 - loss 0.18771116 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 21:57:39,764 epoch 3 - iter 32/81 - loss 0.17224633 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 21:57:40,298 epoch 3 - iter 40/81 - loss 0.22307364 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 21:57:40,801 epoch 3 - iter 48/81 - loss 0.24130883 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 21:57:41,385 epoch 3 - iter 56/81 - loss 0.27314199 - samples/sec: 13.73 - lr: 0.020000\n",
+      "2021-09-21 21:57:42,056 epoch 3 - iter 64/81 - loss 0.26153676 - samples/sec: 11.94 - lr: 0.020000\n",
+      "2021-09-21 21:57:42,631 epoch 3 - iter 72/81 - loss 0.23537042 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 21:57:43,099 epoch 3 - iter 80/81 - loss 0.21275936 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:57:43,149 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:43,149 EPOCH 3 done: loss 0.2103 - lr 0.0200000\n",
+      "2021-09-21 21:57:43,584 DEV : loss 0.6274784207344055 - score 0.5556\n",
+      "2021-09-21 21:57:43,585 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:57:43,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:44,576 epoch 4 - iter 8/81 - loss 0.01530037 - samples/sec: 8.25 - lr: 0.020000\n",
+      "2021-09-21 21:57:45,095 epoch 4 - iter 16/81 - loss 0.14407159 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 21:57:45,506 epoch 4 - iter 24/81 - loss 0.09745963 - samples/sec: 19.46 - lr: 0.020000\n",
+      "2021-09-21 21:57:45,939 epoch 4 - iter 32/81 - loss 0.07435249 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 21:57:46,716 epoch 4 - iter 40/81 - loss 0.06963313 - samples/sec: 10.30 - lr: 0.020000\n",
+      "2021-09-21 21:57:47,351 epoch 4 - iter 48/81 - loss 0.05923249 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 21:57:47,899 epoch 4 - iter 56/81 - loss 0.05148762 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 21:57:48,335 epoch 4 - iter 64/81 - loss 0.04522680 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 21:57:48,828 epoch 4 - iter 72/81 - loss 0.12428248 - samples/sec: 16.26 - lr: 0.020000\n",
+      "2021-09-21 21:57:49,350 epoch 4 - iter 80/81 - loss 0.11206423 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 21:57:49,424 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:49,425 EPOCH 4 done: loss 0.1107 - lr 0.0200000\n",
+      "2021-09-21 21:57:49,873 DEV : loss 0.42804351449012756 - score 0.5556\n",
+      "2021-09-21 21:57:49,875 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:57:49,876 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:50,447 epoch 5 - iter 8/81 - loss 0.00178292 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 21:57:51,123 epoch 5 - iter 16/81 - loss 0.00288319 - samples/sec: 11.85 - lr: 0.020000\n",
+      "2021-09-21 21:57:51,641 epoch 5 - iter 24/81 - loss 0.14810485 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:57:52,157 epoch 5 - iter 32/81 - loss 0.11995757 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 21:57:52,606 epoch 5 - iter 40/81 - loss 0.09867507 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 21:57:53,291 epoch 5 - iter 48/81 - loss 0.08716162 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 21:57:53,937 epoch 5 - iter 56/81 - loss 0.07486842 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 21:57:54,349 epoch 5 - iter 64/81 - loss 0.07538548 - samples/sec: 19.43 - lr: 0.020000\n",
+      "2021-09-21 21:57:54,880 epoch 5 - iter 72/81 - loss 0.06727076 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 21:57:55,358 epoch 5 - iter 80/81 - loss 0.06120507 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 21:57:55,411 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:55,412 EPOCH 5 done: loss 0.0605 - lr 0.0200000\n",
+      "2021-09-21 21:57:55,757 DEV : loss 0.4783254861831665 - score 0.6667\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:57:55,758 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:57:55,760 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:56,155 epoch 6 - iter 8/81 - loss 0.00055654 - samples/sec: 20.99 - lr: 0.010000\n",
+      "2021-09-21 21:57:56,816 epoch 6 - iter 16/81 - loss 0.10689473 - samples/sec: 12.12 - lr: 0.010000\n",
+      "2021-09-21 21:57:57,346 epoch 6 - iter 24/81 - loss 0.07398420 - samples/sec: 15.11 - lr: 0.010000\n",
+      "2021-09-21 21:57:57,933 epoch 6 - iter 32/81 - loss 0.05581922 - samples/sec: 13.65 - lr: 0.010000\n",
+      "2021-09-21 21:57:58,322 epoch 6 - iter 40/81 - loss 0.10614814 - samples/sec: 20.61 - lr: 0.010000\n",
+      "2021-09-21 21:57:59,064 epoch 6 - iter 48/81 - loss 0.13741060 - samples/sec: 10.80 - lr: 0.010000\n",
+      "2021-09-21 21:57:59,570 epoch 6 - iter 56/81 - loss 0.12024134 - samples/sec: 15.83 - lr: 0.010000\n",
+      "2021-09-21 21:58:00,174 epoch 6 - iter 64/81 - loss 0.11422497 - samples/sec: 13.25 - lr: 0.010000\n",
+      "2021-09-21 21:58:00,966 epoch 6 - iter 72/81 - loss 0.10760669 - samples/sec: 10.11 - lr: 0.010000\n",
+      "2021-09-21 21:58:01,605 epoch 6 - iter 80/81 - loss 0.09692014 - samples/sec: 12.54 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:59:26,372 epoch 6 - iter 80/81 - loss 0.06341199 - samples/sec: 16.19 - lr: 0.020000\n",
-      "2021-09-08 01:59:26,441 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:26,442 EPOCH 6 done: loss 0.1145 - lr 0.0200000\n",
-      "2021-09-08 01:59:27,045 DEV : loss 0.34088996052742004 - score 0.7778\n",
-      "2021-09-08 01:59:27,047 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:59:27,049 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:27,761 epoch 7 - iter 8/81 - loss 0.00305289 - samples/sec: 11.48 - lr: 0.020000\n",
-      "2021-09-08 01:59:28,177 epoch 7 - iter 16/81 - loss 0.00708796 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 01:59:28,794 epoch 7 - iter 24/81 - loss 0.06895290 - samples/sec: 12.97 - lr: 0.020000\n",
-      "2021-09-08 01:59:29,351 epoch 7 - iter 32/81 - loss 0.10433981 - samples/sec: 14.41 - lr: 0.020000\n",
-      "2021-09-08 01:59:29,847 epoch 7 - iter 40/81 - loss 0.10532774 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 01:59:30,376 epoch 7 - iter 48/81 - loss 0.09272235 - samples/sec: 15.17 - lr: 0.020000\n",
-      "2021-09-08 01:59:30,784 epoch 7 - iter 56/81 - loss 0.08013531 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 01:59:31,192 epoch 7 - iter 64/81 - loss 0.10688502 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 01:59:31,687 epoch 7 - iter 72/81 - loss 0.09802282 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 01:59:32,155 epoch 7 - iter 80/81 - loss 0.10986133 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 01:59:32,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:32,211 EPOCH 7 done: loss 0.1085 - lr 0.0200000\n",
-      "2021-09-08 01:59:32,866 DEV : loss 0.3276940584182739 - score 0.7778\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:59:32,868 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:59:32,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:33,416 epoch 8 - iter 8/81 - loss 0.01934013 - samples/sec: 15.35 - lr: 0.010000\n",
-      "2021-09-08 01:59:33,878 epoch 8 - iter 16/81 - loss 0.01772691 - samples/sec: 17.36 - lr: 0.010000\n",
-      "2021-09-08 01:59:34,255 epoch 8 - iter 24/81 - loss 0.01202103 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 01:59:34,634 epoch 8 - iter 32/81 - loss 0.01005566 - samples/sec: 21.12 - lr: 0.010000\n",
-      "2021-09-08 01:59:35,138 epoch 8 - iter 40/81 - loss 0.06328209 - samples/sec: 15.91 - lr: 0.010000\n",
-      "2021-09-08 01:59:35,614 epoch 8 - iter 48/81 - loss 0.05285082 - samples/sec: 16.82 - lr: 0.010000\n",
-      "2021-09-08 01:59:36,174 epoch 8 - iter 56/81 - loss 0.04584334 - samples/sec: 14.31 - lr: 0.010000\n",
-      "2021-09-08 01:59:36,676 epoch 8 - iter 64/81 - loss 0.06239838 - samples/sec: 15.97 - lr: 0.010000\n",
-      "2021-09-08 01:59:37,238 epoch 8 - iter 72/81 - loss 0.05563193 - samples/sec: 14.27 - lr: 0.010000\n",
-      "2021-09-08 01:59:37,738 epoch 8 - iter 80/81 - loss 0.05009360 - samples/sec: 16.01 - lr: 0.010000\n",
-      "2021-09-08 01:59:37,800 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:37,800 EPOCH 8 done: loss 0.0495 - lr 0.0100000\n",
-      "2021-09-08 01:59:38,583 DEV : loss 0.274433434009552 - score 0.7778\n",
-      "2021-09-08 01:59:38,586 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:59:38,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:39,207 epoch 9 - iter 8/81 - loss 0.00179738 - samples/sec: 13.24 - lr: 0.010000\n",
-      "2021-09-08 01:59:39,710 epoch 9 - iter 16/81 - loss 0.00208591 - samples/sec: 15.94 - lr: 0.010000\n",
-      "2021-09-08 01:59:40,114 epoch 9 - iter 24/81 - loss 0.00152114 - samples/sec: 19.84 - lr: 0.010000\n",
-      "2021-09-08 01:59:40,607 epoch 9 - iter 32/81 - loss 0.00177887 - samples/sec: 16.24 - lr: 0.010000\n",
-      "2021-09-08 01:59:41,087 epoch 9 - iter 40/81 - loss 0.00179741 - samples/sec: 16.72 - lr: 0.010000\n",
-      "2021-09-08 01:59:41,662 epoch 9 - iter 48/81 - loss 0.00154552 - samples/sec: 13.92 - lr: 0.010000\n",
-      "2021-09-08 01:59:42,153 epoch 9 - iter 56/81 - loss 0.00135074 - samples/sec: 16.32 - lr: 0.010000\n",
-      "2021-09-08 01:59:42,605 epoch 9 - iter 64/81 - loss 0.00158317 - samples/sec: 17.72 - lr: 0.010000\n",
-      "2021-09-08 01:59:43,125 epoch 9 - iter 72/81 - loss 0.00142985 - samples/sec: 15.41 - lr: 0.010000\n",
-      "2021-09-08 01:59:43,526 epoch 9 - iter 80/81 - loss 0.00136476 - samples/sec: 20.01 - lr: 0.010000\n",
-      "2021-09-08 01:59:43,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:43,574 EPOCH 9 done: loss 0.0013 - lr 0.0100000\n",
-      "2021-09-08 01:59:44,211 DEV : loss 0.2897077798843384 - score 0.7778\n",
-      "2021-09-08 01:59:44,213 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:59:45,681 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:46,383 epoch 10 - iter 8/81 - loss 0.04159786 - samples/sec: 11.66 - lr: 0.010000\n",
-      "2021-09-08 01:59:46,907 epoch 10 - iter 16/81 - loss 0.02095673 - samples/sec: 15.28 - lr: 0.010000\n",
-      "2021-09-08 01:59:47,411 epoch 10 - iter 24/81 - loss 0.01701189 - samples/sec: 15.91 - lr: 0.010000\n",
-      "2021-09-08 01:59:47,961 epoch 10 - iter 32/81 - loss 0.05629019 - samples/sec: 14.57 - lr: 0.010000\n",
-      "2021-09-08 01:59:48,345 epoch 10 - iter 40/81 - loss 0.04516459 - samples/sec: 20.88 - lr: 0.010000\n",
-      "2021-09-08 01:59:48,816 epoch 10 - iter 48/81 - loss 0.03774036 - samples/sec: 17.02 - lr: 0.010000\n",
-      "2021-09-08 01:59:49,272 epoch 10 - iter 56/81 - loss 0.03241651 - samples/sec: 17.57 - lr: 0.010000\n",
-      "2021-09-08 01:59:49,870 epoch 10 - iter 64/81 - loss 0.02838724 - samples/sec: 13.39 - lr: 0.010000\n",
-      "2021-09-08 01:59:50,254 epoch 10 - iter 72/81 - loss 0.02525347 - samples/sec: 20.89 - lr: 0.010000\n",
-      "2021-09-08 01:59:50,683 epoch 10 - iter 80/81 - loss 0.02278604 - samples/sec: 18.67 - lr: 0.010000\n",
-      "2021-09-08 01:59:50,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:50,793 EPOCH 10 done: loss 0.0225 - lr 0.0100000\n",
-      "2021-09-08 01:59:51,390 DEV : loss 0.31702813506126404 - score 0.7778\n",
-      "2021-09-08 01:59:51,392 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:59:55,474 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:55,475 Testing using best model ...\n",
-      "2021-09-08 01:59:55,476 loading file None/best-model.pt\n",
+      "2021-09-21 21:58:01,733 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:01,734 EPOCH 6 done: loss 0.0957 - lr 0.0100000\n",
+      "2021-09-21 21:58:02,333 DEV : loss 0.43614211678504944 - score 0.6667\n",
+      "2021-09-21 21:58:02,334 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:58:02,338 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:02,874 epoch 7 - iter 8/81 - loss 0.06274312 - samples/sec: 15.58 - lr: 0.010000\n",
+      "2021-09-21 21:58:03,342 epoch 7 - iter 16/81 - loss 0.03160500 - samples/sec: 17.13 - lr: 0.010000\n",
+      "2021-09-21 21:58:04,279 epoch 7 - iter 24/81 - loss 0.02302103 - samples/sec: 8.55 - lr: 0.010000\n",
+      "2021-09-21 21:58:05,159 epoch 7 - iter 32/81 - loss 0.01741000 - samples/sec: 9.10 - lr: 0.010000\n",
+      "2021-09-21 21:58:05,745 epoch 7 - iter 40/81 - loss 0.04615969 - samples/sec: 13.67 - lr: 0.010000\n",
+      "2021-09-21 21:58:06,368 epoch 7 - iter 48/81 - loss 0.03857155 - samples/sec: 12.85 - lr: 0.010000\n",
+      "2021-09-21 21:58:06,762 epoch 7 - iter 56/81 - loss 0.03313573 - samples/sec: 20.36 - lr: 0.010000\n",
+      "2021-09-21 21:58:07,272 epoch 7 - iter 64/81 - loss 0.02957242 - samples/sec: 15.69 - lr: 0.010000\n",
+      "2021-09-21 21:58:07,847 epoch 7 - iter 72/81 - loss 0.02636030 - samples/sec: 13.94 - lr: 0.010000\n",
+      "2021-09-21 21:58:08,358 epoch 7 - iter 80/81 - loss 0.02761980 - samples/sec: 15.69 - lr: 0.010000\n",
+      "2021-09-21 21:58:08,424 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:08,424 EPOCH 7 done: loss 0.0273 - lr 0.0100000\n",
+      "2021-09-21 21:58:08,862 DEV : loss 0.4528473913669586 - score 0.6667\n",
+      "2021-09-21 21:58:08,864 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:58:08,932 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:09,366 epoch 8 - iter 8/81 - loss 0.10019656 - samples/sec: 19.11 - lr: 0.010000\n",
+      "2021-09-21 21:58:10,181 epoch 8 - iter 16/81 - loss 0.05032537 - samples/sec: 9.82 - lr: 0.010000\n",
+      "2021-09-21 21:58:10,622 epoch 8 - iter 24/81 - loss 0.03409613 - samples/sec: 18.20 - lr: 0.010000\n",
+      "2021-09-21 21:58:11,176 epoch 8 - iter 32/81 - loss 0.02595702 - samples/sec: 14.46 - lr: 0.010000\n",
+      "2021-09-21 21:58:11,733 epoch 8 - iter 40/81 - loss 0.02651003 - samples/sec: 14.38 - lr: 0.010000\n",
+      "2021-09-21 21:58:12,516 epoch 8 - iter 48/81 - loss 0.02231206 - samples/sec: 10.22 - lr: 0.010000\n",
+      "2021-09-21 21:58:13,003 epoch 8 - iter 56/81 - loss 0.01919290 - samples/sec: 16.48 - lr: 0.010000\n",
+      "2021-09-21 21:58:13,753 epoch 8 - iter 64/81 - loss 0.01687870 - samples/sec: 10.66 - lr: 0.010000\n",
+      "2021-09-21 21:58:14,266 epoch 8 - iter 72/81 - loss 0.01505101 - samples/sec: 15.63 - lr: 0.010000\n",
+      "2021-09-21 21:58:14,965 epoch 8 - iter 80/81 - loss 0.04086289 - samples/sec: 11.45 - lr: 0.010000\n",
+      "2021-09-21 21:58:15,184 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:15,185 EPOCH 8 done: loss 0.0404 - lr 0.0100000\n",
+      "2021-09-21 21:58:15,712 DEV : loss 0.4853934645652771 - score 0.6667\n",
+      "2021-09-21 21:58:15,713 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:58:15,715 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:16,217 epoch 9 - iter 8/81 - loss 0.00060668 - samples/sec: 16.79 - lr: 0.010000\n",
+      "2021-09-21 21:58:17,028 epoch 9 - iter 16/81 - loss 0.00054781 - samples/sec: 9.88 - lr: 0.010000\n",
+      "2021-09-21 21:58:17,995 epoch 9 - iter 24/81 - loss 0.00056573 - samples/sec: 8.28 - lr: 0.010000\n",
+      "2021-09-21 21:58:18,616 epoch 9 - iter 32/81 - loss 0.00050761 - samples/sec: 12.89 - lr: 0.010000\n",
+      "2021-09-21 21:58:19,281 epoch 9 - iter 40/81 - loss 0.00074709 - samples/sec: 12.05 - lr: 0.010000\n",
+      "2021-09-21 21:58:19,655 epoch 9 - iter 48/81 - loss 0.00070691 - samples/sec: 21.44 - lr: 0.010000\n",
+      "2021-09-21 21:58:20,095 epoch 9 - iter 56/81 - loss 0.00066176 - samples/sec: 18.22 - lr: 0.010000\n",
+      "2021-09-21 21:58:20,517 epoch 9 - iter 64/81 - loss 0.00077255 - samples/sec: 18.99 - lr: 0.010000\n",
+      "2021-09-21 21:58:20,913 epoch 9 - iter 72/81 - loss 0.00072223 - samples/sec: 20.21 - lr: 0.010000\n",
+      "2021-09-21 21:58:21,447 epoch 9 - iter 80/81 - loss 0.00068820 - samples/sec: 15.02 - lr: 0.010000\n",
+      "2021-09-21 21:58:21,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:21,496 EPOCH 9 done: loss 0.0007 - lr 0.0100000\n",
+      "2021-09-21 21:58:21,976 DEV : loss 0.49749407172203064 - score 0.6667\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 21:58:21,978 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:58:22,055 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:22,779 epoch 10 - iter 8/81 - loss 0.00105700 - samples/sec: 11.29 - lr: 0.005000\n",
+      "2021-09-21 21:58:23,178 epoch 10 - iter 16/81 - loss 0.00069663 - samples/sec: 20.08 - lr: 0.005000\n",
+      "2021-09-21 21:58:23,971 epoch 10 - iter 24/81 - loss 0.00268981 - samples/sec: 10.09 - lr: 0.005000\n",
+      "2021-09-21 21:58:24,696 epoch 10 - iter 32/81 - loss 0.00215772 - samples/sec: 11.06 - lr: 0.005000\n",
+      "2021-09-21 21:58:25,161 epoch 10 - iter 40/81 - loss 0.00179873 - samples/sec: 17.24 - lr: 0.005000\n",
+      "2021-09-21 21:58:25,775 epoch 10 - iter 48/81 - loss 0.01401098 - samples/sec: 13.03 - lr: 0.005000\n",
+      "2021-09-21 21:58:26,462 epoch 10 - iter 56/81 - loss 0.01204956 - samples/sec: 11.67 - lr: 0.005000\n",
+      "2021-09-21 21:58:27,277 epoch 10 - iter 64/81 - loss 0.01058507 - samples/sec: 9.82 - lr: 0.005000\n",
+      "2021-09-21 21:58:27,791 epoch 10 - iter 72/81 - loss 0.00946699 - samples/sec: 15.59 - lr: 0.005000\n",
+      "2021-09-21 21:58:28,369 epoch 10 - iter 80/81 - loss 0.00855085 - samples/sec: 13.87 - lr: 0.005000\n",
+      "2021-09-21 21:58:28,421 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:28,422 EPOCH 10 done: loss 0.0084 - lr 0.0050000\n",
+      "2021-09-21 21:58:28,937 DEV : loss 0.5565004944801331 - score 0.5556\n",
+      "2021-09-21 21:58:28,938 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:58:33,300 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:33,301 Testing using best model ...\n",
+      "2021-09-21 21:58:33,324 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:00:00,854 \t0.6\n",
-      "2021-09-08 02:00:00,854 \n",
+      "2021-09-21 21:58:40,380 \t0.2\n",
+      "2021-09-21 21:58:40,380 \n",
       "Results:\n",
-      "- F-score (micro) 0.6\n",
-      "- F-score (macro) 0.32\n",
-      "- Accuracy 0.6\n",
+      "- F-score (micro) 0.2\n",
+      "- F-score (macro) 0.1667\n",
+      "- Accuracy 0.2\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
-      "Family & Relationships     0.2500    1.0000    0.4000         1\n",
-      "                Health     0.0000    0.0000    0.0000         0\n",
-      " Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      "Family & Relationships     0.0000    0.0000    0.0000         1\n",
+      "                Health     1.0000    0.5000    0.6667         2\n",
+      " Science & Mathematics     0.0000    0.0000    0.0000         2\n",
       " Entertainment & Music     0.0000    0.0000    0.0000         1\n",
       "  Computers & Internet     1.0000    1.0000    1.0000         1\n",
-      " Education & Reference     0.0000    0.0000    0.0000         1\n",
-      "                Sports     1.0000    1.0000    1.0000         2\n",
-      "     Society & Culture     0.0000    0.0000    0.0000         1\n",
-      "    Business & Finance     1.0000    0.6667    0.8000         3\n",
+      " Education & Reference     0.0000    0.0000    0.0000         2\n",
+      "                Sports     0.0000    0.0000    0.0000         0\n",
+      "     Society & Culture     0.0000    0.0000    0.0000         0\n",
+      "    Business & Finance     0.0000    0.0000    0.0000         1\n",
       " Politics & Government     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "             micro avg     0.6000    0.6000    0.6000        10\n",
-      "             macro avg     0.3250    0.3667    0.3200        10\n",
-      "          weighted avg     0.6250    0.6000    0.5800        10\n",
-      "           samples avg     0.6000    0.6000    0.6000        10\n",
+      "             micro avg     0.2000    0.2000    0.2000        10\n",
+      "             macro avg     0.2000    0.1500    0.1667        10\n",
+      "          weighted avg     0.3000    0.2000    0.2333        10\n",
+      "           samples avg     0.2000    0.2000    0.2000        10\n",
       "\n",
-      "2021-09-08 02:00:00,855 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:14,561 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 21:58:40,381 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:57,609 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:00:22,540 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:59:02,048 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 14939.79it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 17050.78it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:22,548 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
-      "2021-09-08 02:00:22,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:22,566 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:59:02,055 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government']\n",
+      "2021-09-21 21:59:02,064 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:02,066 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2614,25 +2614,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:22,567 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:22,567 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 02:00:22,568 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:22,568 Parameters:\n",
-      "2021-09-08 02:00:22,568  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:00:22,569  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:00:22,569  - patience: \"3\"\n",
-      "2021-09-08 02:00:22,569  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:00:22,570  - max_epochs: \"10\"\n",
-      "2021-09-08 02:00:22,570  - shuffle: \"True\"\n",
-      "2021-09-08 02:00:22,570  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:00:22,571  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:00:22,571 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:22,571 Model training base path: \"None\"\n",
-      "2021-09-08 02:00:22,572 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:22,572 Device: cuda:1\n",
-      "2021-09-08 02:00:22,572 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:22,573 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:00:22,609 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:59:02,067 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:02,067 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 21:59:02,067 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:02,067 Parameters:\n",
+      "2021-09-21 21:59:02,068  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:59:02,068  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:59:02,068  - patience: \"3\"\n",
+      "2021-09-21 21:59:02,069  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:59:02,070  - max_epochs: \"10\"\n",
+      "2021-09-21 21:59:02,070  - shuffle: \"True\"\n",
+      "2021-09-21 21:59:02,070  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:59:02,071  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:59:02,071 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:02,071 Model training base path: \"None\"\n",
+      "2021-09-21 21:59:02,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:02,072 Device: cuda:0\n",
+      "2021-09-21 21:59:02,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:02,072 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:59:02,078 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2646,197 +2646,199 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:23,119 epoch 1 - iter 8/81 - loss 0.68521024 - samples/sec: 16.23 - lr: 0.020000\n",
-      "2021-09-08 02:00:23,651 epoch 1 - iter 16/81 - loss 0.38027627 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 02:00:24,078 epoch 1 - iter 24/81 - loss 0.43939032 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 02:00:24,539 epoch 1 - iter 32/81 - loss 0.42851309 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 02:00:24,954 epoch 1 - iter 40/81 - loss 0.42393109 - samples/sec: 19.29 - lr: 0.020000\n",
-      "2021-09-08 02:00:25,462 epoch 1 - iter 48/81 - loss 0.61522846 - samples/sec: 15.76 - lr: 0.020000\n",
-      "2021-09-08 02:00:25,896 epoch 1 - iter 56/81 - loss 0.54599971 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 02:00:26,443 epoch 1 - iter 64/81 - loss 0.55991301 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 02:00:26,975 epoch 1 - iter 72/81 - loss 0.54911988 - samples/sec: 15.06 - lr: 0.020000\n",
-      "2021-09-08 02:00:27,422 epoch 1 - iter 80/81 - loss 0.53684204 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 02:00:27,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:27,472 EPOCH 1 done: loss 0.5304 - lr 0.0200000\n",
-      "2021-09-08 02:00:27,729 DEV : loss 0.4071762263774872 - score 0.7778\n",
-      "2021-09-08 02:00:27,731 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:59:02,711 epoch 1 - iter 8/81 - loss 0.46701678 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 21:59:03,290 epoch 1 - iter 16/81 - loss 0.29727089 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 21:59:03,734 epoch 1 - iter 24/81 - loss 0.35113760 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 21:59:04,237 epoch 1 - iter 32/81 - loss 0.49372648 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 21:59:04,732 epoch 1 - iter 40/81 - loss 0.40194966 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 21:59:05,407 epoch 1 - iter 48/81 - loss 0.48686968 - samples/sec: 11.86 - lr: 0.020000\n",
+      "2021-09-21 21:59:05,930 epoch 1 - iter 56/81 - loss 0.46099904 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 21:59:06,441 epoch 1 - iter 64/81 - loss 0.46059039 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 21:59:06,899 epoch 1 - iter 72/81 - loss 0.49508808 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 21:59:07,796 epoch 1 - iter 80/81 - loss 0.48876462 - samples/sec: 8.93 - lr: 0.020000\n",
+      "2021-09-21 21:59:07,854 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:07,854 EPOCH 1 done: loss 0.4832 - lr 0.0200000\n",
+      "2021-09-21 21:59:08,454 DEV : loss 0.7888447642326355 - score 0.1111\n",
+      "2021-09-21 21:59:08,455 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:00:32,443 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:32,884 epoch 2 - iter 8/81 - loss 0.81448542 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 02:00:33,505 epoch 2 - iter 16/81 - loss 0.76842967 - samples/sec: 12.92 - lr: 0.020000\n",
-      "2021-09-08 02:00:33,955 epoch 2 - iter 24/81 - loss 0.66015437 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 02:00:34,384 epoch 2 - iter 32/81 - loss 0.69230222 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 02:00:34,795 epoch 2 - iter 40/81 - loss 0.57835213 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 02:00:35,321 epoch 2 - iter 48/81 - loss 0.55940821 - samples/sec: 15.26 - lr: 0.020000\n",
-      "2021-09-08 02:00:35,759 epoch 2 - iter 56/81 - loss 0.51234545 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 02:00:36,292 epoch 2 - iter 64/81 - loss 0.49179636 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 02:00:36,740 epoch 2 - iter 72/81 - loss 0.47900362 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 02:00:37,145 epoch 2 - iter 80/81 - loss 0.44556562 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 02:00:37,194 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:37,194 EPOCH 2 done: loss 0.4401 - lr 0.0200000\n",
-      "2021-09-08 02:00:37,450 DEV : loss 0.2587222158908844 - score 0.7778\n",
-      "2021-09-08 02:00:37,451 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:59:12,533 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:13,074 epoch 2 - iter 8/81 - loss 0.79070835 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 21:59:13,715 epoch 2 - iter 16/81 - loss 0.54934621 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 21:59:14,352 epoch 2 - iter 24/81 - loss 0.60922302 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 21:59:14,824 epoch 2 - iter 32/81 - loss 0.48832375 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:59:15,342 epoch 2 - iter 40/81 - loss 0.49368204 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 21:59:15,863 epoch 2 - iter 48/81 - loss 0.46569605 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 21:59:16,469 epoch 2 - iter 56/81 - loss 0.48316292 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 21:59:17,050 epoch 2 - iter 64/81 - loss 0.45755091 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 21:59:17,557 epoch 2 - iter 72/81 - loss 0.44141501 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 21:59:18,071 epoch 2 - iter 80/81 - loss 0.45727054 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 21:59:18,165 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:18,166 EPOCH 2 done: loss 0.4532 - lr 0.0200000\n",
+      "2021-09-21 21:59:18,650 DEV : loss 0.7573666572570801 - score 0.1111\n",
+      "2021-09-21 21:59:18,651 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:00:45,126 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:45,602 epoch 3 - iter 8/81 - loss 0.17455283 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 02:00:46,012 epoch 3 - iter 16/81 - loss 0.27915044 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 02:00:46,490 epoch 3 - iter 24/81 - loss 0.30700563 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 02:00:46,920 epoch 3 - iter 32/81 - loss 0.31407243 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 02:00:47,363 epoch 3 - iter 40/81 - loss 0.29127742 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 02:00:47,880 epoch 3 - iter 48/81 - loss 0.31198492 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 02:00:48,299 epoch 3 - iter 56/81 - loss 0.27170841 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 02:00:48,912 epoch 3 - iter 64/81 - loss 0.23943144 - samples/sec: 13.06 - lr: 0.020000\n",
-      "2021-09-08 02:00:49,386 epoch 3 - iter 72/81 - loss 0.23911460 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 02:00:49,871 epoch 3 - iter 80/81 - loss 0.22344001 - samples/sec: 16.52 - lr: 0.020000\n",
-      "2021-09-08 02:00:49,925 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:49,925 EPOCH 3 done: loss 0.2208 - lr 0.0200000\n",
-      "2021-09-08 02:00:51,165 DEV : loss 0.2787739038467407 - score 0.6667\n",
-      "2021-09-08 02:00:51,166 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:00:51,287 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,725 epoch 4 - iter 8/81 - loss 0.30981711 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 02:00:52,182 epoch 4 - iter 16/81 - loss 0.15779272 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 02:00:52,669 epoch 4 - iter 24/81 - loss 0.17579573 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 02:00:53,092 epoch 4 - iter 32/81 - loss 0.13361128 - samples/sec: 18.93 - lr: 0.020000\n",
-      "2021-09-08 02:00:53,494 epoch 4 - iter 40/81 - loss 0.12564627 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 02:00:53,943 epoch 4 - iter 48/81 - loss 0.12164942 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 02:00:54,406 epoch 4 - iter 56/81 - loss 0.13791708 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 02:00:54,849 epoch 4 - iter 64/81 - loss 0.16267091 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 02:00:55,373 epoch 4 - iter 72/81 - loss 0.16417816 - samples/sec: 15.27 - lr: 0.020000\n",
-      "2021-09-08 02:00:55,873 epoch 4 - iter 80/81 - loss 0.14920427 - samples/sec: 16.04 - lr: 0.020000\n",
-      "2021-09-08 02:00:55,926 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:55,926 EPOCH 4 done: loss 0.1474 - lr 0.0200000\n",
-      "2021-09-08 02:00:56,188 DEV : loss 0.31693369150161743 - score 0.7778\n",
-      "2021-09-08 02:00:56,189 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:00:56,193 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:56,666 epoch 5 - iter 8/81 - loss 0.06496693 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,061 epoch 5 - iter 16/81 - loss 0.03357895 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,526 epoch 5 - iter 24/81 - loss 0.02273193 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,973 epoch 5 - iter 32/81 - loss 0.01813442 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 02:00:58,482 epoch 5 - iter 40/81 - loss 0.09564699 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,020 epoch 5 - iter 48/81 - loss 0.08274743 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,448 epoch 5 - iter 56/81 - loss 0.07125470 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,840 epoch 5 - iter 64/81 - loss 0.06240643 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 02:01:00,302 epoch 5 - iter 72/81 - loss 0.07116500 - samples/sec: 17.34 - lr: 0.020000\n",
-      "2021-09-08 02:01:00,746 epoch 5 - iter 80/81 - loss 0.07275557 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 02:01:00,799 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:00,800 EPOCH 5 done: loss 0.0719 - lr 0.0200000\n",
-      "2021-09-08 02:01:01,066 DEV : loss 0.4217536449432373 - score 0.6667\n",
-      "2021-09-08 02:01:01,067 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:01:01,071 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:01,608 epoch 6 - iter 8/81 - loss 0.18805886 - samples/sec: 15.33 - lr: 0.020000\n",
-      "2021-09-08 02:01:02,023 epoch 6 - iter 16/81 - loss 0.10212614 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 02:01:02,511 epoch 6 - iter 24/81 - loss 0.07392268 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 02:01:02,901 epoch 6 - iter 32/81 - loss 0.05583596 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 02:01:03,388 epoch 6 - iter 40/81 - loss 0.06102333 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 02:01:03,927 epoch 6 - iter 48/81 - loss 0.05096627 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 02:01:04,352 epoch 6 - iter 56/81 - loss 0.04378797 - samples/sec: 18.88 - lr: 0.020000\n",
-      "2021-09-08 02:01:04,756 epoch 6 - iter 64/81 - loss 0.07902251 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 02:01:05,237 epoch 6 - iter 72/81 - loss 0.10949600 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 02:01:05,673 epoch 6 - iter 80/81 - loss 0.11260928 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:01:05,727 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:59:22,740 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:23,269 epoch 3 - iter 8/81 - loss 0.17582746 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 21:59:23,724 epoch 3 - iter 16/81 - loss 0.12071298 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:59:24,352 epoch 3 - iter 24/81 - loss 0.28983873 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 21:59:24,927 epoch 3 - iter 32/81 - loss 0.26876794 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 21:59:25,467 epoch 3 - iter 40/81 - loss 0.23132392 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 21:59:26,021 epoch 3 - iter 48/81 - loss 0.25116805 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 21:59:26,526 epoch 3 - iter 56/81 - loss 0.28328432 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 21:59:27,355 epoch 3 - iter 64/81 - loss 0.29308462 - samples/sec: 9.66 - lr: 0.020000\n",
+      "2021-09-21 21:59:27,889 epoch 3 - iter 72/81 - loss 0.28557604 - samples/sec: 15.00 - lr: 0.020000\n",
+      "2021-09-21 21:59:28,382 epoch 3 - iter 80/81 - loss 0.29462892 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:59:28,440 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:28,440 EPOCH 3 done: loss 0.2911 - lr 0.0200000\n",
+      "2021-09-21 21:59:28,899 DEV : loss 0.8344436287879944 - score 0.3333\n",
+      "2021-09-21 21:59:28,900 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:59:32,966 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:33,553 epoch 4 - iter 8/81 - loss 0.24848583 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 21:59:34,011 epoch 4 - iter 16/81 - loss 0.15501721 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 21:59:34,640 epoch 4 - iter 24/81 - loss 0.15730142 - samples/sec: 12.73 - lr: 0.020000\n",
+      "2021-09-21 21:59:35,077 epoch 4 - iter 32/81 - loss 0.19614568 - samples/sec: 18.32 - lr: 0.020000\n",
+      "2021-09-21 21:59:35,533 epoch 4 - iter 40/81 - loss 0.25518534 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 21:59:36,024 epoch 4 - iter 48/81 - loss 0.21519764 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 21:59:36,396 epoch 4 - iter 56/81 - loss 0.22093087 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 21:59:36,830 epoch 4 - iter 64/81 - loss 0.21475012 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 21:59:37,207 epoch 4 - iter 72/81 - loss 0.20410856 - samples/sec: 21.23 - lr: 0.020000\n",
+      "2021-09-21 21:59:37,701 epoch 4 - iter 80/81 - loss 0.20178117 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 21:59:37,749 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:37,750 EPOCH 4 done: loss 0.2007 - lr 0.0200000\n",
+      "2021-09-21 21:59:38,802 DEV : loss 0.683146059513092 - score 0.3333\n",
+      "2021-09-21 21:59:38,804 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:59:42,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:43,443 epoch 5 - iter 8/81 - loss 0.00266747 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 21:59:43,874 epoch 5 - iter 16/81 - loss 0.07980417 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 21:59:44,337 epoch 5 - iter 24/81 - loss 0.07948783 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 21:59:44,740 epoch 5 - iter 32/81 - loss 0.11800994 - samples/sec: 19.90 - lr: 0.020000\n",
+      "2021-09-21 21:59:45,166 epoch 5 - iter 40/81 - loss 0.10127687 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 21:59:45,634 epoch 5 - iter 48/81 - loss 0.09367578 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:59:46,127 epoch 5 - iter 56/81 - loss 0.11938954 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 21:59:46,548 epoch 5 - iter 64/81 - loss 0.14229875 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 21:59:46,973 epoch 5 - iter 72/81 - loss 0.14645988 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 21:59:47,577 epoch 5 - iter 80/81 - loss 0.17228916 - samples/sec: 13.27 - lr: 0.020000\n",
+      "2021-09-21 21:59:47,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:47,624 EPOCH 5 done: loss 0.1702 - lr 0.0200000\n",
+      "2021-09-21 21:59:48,036 DEV : loss 0.535755455493927 - score 0.4444\n",
+      "2021-09-21 21:59:48,037 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:59:51,940 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:52,419 epoch 6 - iter 8/81 - loss 0.03465172 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 21:59:52,903 epoch 6 - iter 16/81 - loss 0.02032082 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:59:53,374 epoch 6 - iter 24/81 - loss 0.01503733 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 21:59:53,804 epoch 6 - iter 32/81 - loss 0.01171791 - samples/sec: 18.66 - lr: 0.020000\n",
+      "2021-09-21 21:59:54,441 epoch 6 - iter 40/81 - loss 0.03427507 - samples/sec: 12.56 - lr: 0.020000\n",
+      "2021-09-21 21:59:54,843 epoch 6 - iter 48/81 - loss 0.02899172 - samples/sec: 19.95 - lr: 0.020000\n",
+      "2021-09-21 21:59:55,344 epoch 6 - iter 56/81 - loss 0.06203000 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 21:59:55,812 epoch 6 - iter 64/81 - loss 0.09141674 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:59:56,233 epoch 6 - iter 72/81 - loss 0.08280061 - samples/sec: 19.05 - lr: 0.020000\n",
+      "2021-09-21 21:59:56,847 epoch 6 - iter 80/81 - loss 0.07497563 - samples/sec: 13.03 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:01:05,728 EPOCH 6 done: loss 0.1127 - lr 0.0200000\n",
-      "2021-09-08 02:01:07,383 DEV : loss 0.3073955774307251 - score 0.6667\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:01:07,385 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:01:07,406 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:07,941 epoch 7 - iter 8/81 - loss 0.00404686 - samples/sec: 15.38 - lr: 0.010000\n",
-      "2021-09-08 02:01:08,375 epoch 7 - iter 16/81 - loss 0.00334561 - samples/sec: 18.45 - lr: 0.010000\n",
-      "2021-09-08 02:01:08,776 epoch 7 - iter 24/81 - loss 0.10257860 - samples/sec: 19.98 - lr: 0.010000\n",
-      "2021-09-08 02:01:09,248 epoch 7 - iter 32/81 - loss 0.07769266 - samples/sec: 16.96 - lr: 0.010000\n",
-      "2021-09-08 02:01:09,732 epoch 7 - iter 40/81 - loss 0.06230299 - samples/sec: 16.58 - lr: 0.010000\n",
-      "2021-09-08 02:01:10,116 epoch 7 - iter 48/81 - loss 0.09557344 - samples/sec: 20.87 - lr: 0.010000\n",
-      "2021-09-08 02:01:10,609 epoch 7 - iter 56/81 - loss 0.08351479 - samples/sec: 16.25 - lr: 0.010000\n",
-      "2021-09-08 02:01:11,307 epoch 7 - iter 64/81 - loss 0.07316261 - samples/sec: 11.47 - lr: 0.010000\n",
-      "2021-09-08 02:01:11,753 epoch 7 - iter 72/81 - loss 0.06510531 - samples/sec: 17.96 - lr: 0.010000\n",
-      "2021-09-08 02:01:12,152 epoch 7 - iter 80/81 - loss 0.05864846 - samples/sec: 20.12 - lr: 0.010000\n",
-      "2021-09-08 02:01:12,199 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:12,200 EPOCH 7 done: loss 0.0579 - lr 0.0100000\n",
-      "2021-09-08 02:01:12,471 DEV : loss 0.3368164896965027 - score 0.6667\n",
-      "2021-09-08 02:01:12,472 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:01:12,475 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:12,879 epoch 8 - iter 8/81 - loss 0.22657165 - samples/sec: 20.57 - lr: 0.010000\n",
-      "2021-09-08 02:01:13,379 epoch 8 - iter 16/81 - loss 0.23008808 - samples/sec: 16.01 - lr: 0.010000\n",
-      "2021-09-08 02:01:13,985 epoch 8 - iter 24/81 - loss 0.20032715 - samples/sec: 13.22 - lr: 0.010000\n",
-      "2021-09-08 02:01:14,432 epoch 8 - iter 32/81 - loss 0.15121425 - samples/sec: 17.91 - lr: 0.010000\n",
-      "2021-09-08 02:01:14,894 epoch 8 - iter 40/81 - loss 0.12110287 - samples/sec: 17.34 - lr: 0.010000\n",
-      "2021-09-08 02:01:15,293 epoch 8 - iter 48/81 - loss 0.10104697 - samples/sec: 20.08 - lr: 0.010000\n",
-      "2021-09-08 02:01:15,779 epoch 8 - iter 56/81 - loss 0.08672646 - samples/sec: 16.50 - lr: 0.010000\n",
-      "2021-09-08 02:01:16,232 epoch 8 - iter 64/81 - loss 0.07653784 - samples/sec: 17.70 - lr: 0.010000\n",
-      "2021-09-08 02:01:16,659 epoch 8 - iter 72/81 - loss 0.06815273 - samples/sec: 18.75 - lr: 0.010000\n",
-      "2021-09-08 02:01:17,074 epoch 8 - iter 80/81 - loss 0.06145363 - samples/sec: 19.32 - lr: 0.010000\n",
-      "2021-09-08 02:01:17,120 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:17,121 EPOCH 8 done: loss 0.0607 - lr 0.0100000\n",
-      "2021-09-08 02:01:17,522 DEV : loss 0.2950240969657898 - score 0.6667\n",
-      "2021-09-08 02:01:17,523 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:01:17,621 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:18,057 epoch 9 - iter 8/81 - loss 0.00281363 - samples/sec: 18.97 - lr: 0.010000\n",
-      "2021-09-08 02:01:18,510 epoch 9 - iter 16/81 - loss 0.00183622 - samples/sec: 17.70 - lr: 0.010000\n",
-      "2021-09-08 02:01:18,942 epoch 9 - iter 24/81 - loss 0.00140971 - samples/sec: 18.56 - lr: 0.010000\n",
-      "2021-09-08 02:01:19,368 epoch 9 - iter 32/81 - loss 0.00240311 - samples/sec: 18.81 - lr: 0.010000\n",
-      "2021-09-08 02:01:19,863 epoch 9 - iter 40/81 - loss 0.00859204 - samples/sec: 16.19 - lr: 0.010000\n",
-      "2021-09-08 02:01:20,450 epoch 9 - iter 48/81 - loss 0.00726688 - samples/sec: 13.65 - lr: 0.010000\n",
-      "2021-09-08 02:01:20,845 epoch 9 - iter 56/81 - loss 0.00634425 - samples/sec: 20.27 - lr: 0.010000\n",
-      "2021-09-08 02:01:21,257 epoch 9 - iter 64/81 - loss 0.00565803 - samples/sec: 19.46 - lr: 0.010000\n",
-      "2021-09-08 02:01:21,719 epoch 9 - iter 72/81 - loss 0.00511691 - samples/sec: 17.32 - lr: 0.010000\n",
-      "2021-09-08 02:01:22,119 epoch 9 - iter 80/81 - loss 0.00464682 - samples/sec: 20.04 - lr: 0.010000\n",
-      "2021-09-08 02:01:22,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:22,176 EPOCH 9 done: loss 0.0046 - lr 0.0100000\n",
-      "2021-09-08 02:01:29,405 DEV : loss 0.3474740982055664 - score 0.6667\n",
-      "2021-09-08 02:01:29,407 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:01:29,410 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:29,880 epoch 10 - iter 8/81 - loss 0.00331687 - samples/sec: 17.58 - lr: 0.010000\n",
-      "2021-09-08 02:01:30,371 epoch 10 - iter 16/81 - loss 0.00195396 - samples/sec: 16.32 - lr: 0.010000\n",
-      "2021-09-08 02:01:30,942 epoch 10 - iter 24/81 - loss 0.00142203 - samples/sec: 14.05 - lr: 0.010000\n",
-      "2021-09-08 02:01:31,382 epoch 10 - iter 32/81 - loss 0.00123847 - samples/sec: 18.21 - lr: 0.010000\n",
-      "2021-09-08 02:01:31,811 epoch 10 - iter 40/81 - loss 0.01011022 - samples/sec: 18.68 - lr: 0.010000\n",
-      "2021-09-08 02:01:32,202 epoch 10 - iter 48/81 - loss 0.00855287 - samples/sec: 20.46 - lr: 0.010000\n",
-      "2021-09-08 02:01:32,665 epoch 10 - iter 56/81 - loss 0.01055102 - samples/sec: 17.32 - lr: 0.010000\n",
-      "2021-09-08 02:01:33,088 epoch 10 - iter 64/81 - loss 0.00936049 - samples/sec: 18.94 - lr: 0.010000\n",
-      "2021-09-08 02:01:33,554 epoch 10 - iter 72/81 - loss 0.00857122 - samples/sec: 17.19 - lr: 0.010000\n",
-      "2021-09-08 02:01:33,951 epoch 10 - iter 80/81 - loss 0.00784869 - samples/sec: 20.23 - lr: 0.010000\n",
-      "2021-09-08 02:01:34,004 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:34,004 EPOCH 10 done: loss 0.0078 - lr 0.0100000\n",
-      "2021-09-08 02:01:34,363 DEV : loss 0.32442739605903625 - score 0.6667\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 02:01:34,364 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:01:40,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:40,016 Testing using best model ...\n",
-      "2021-09-08 02:01:40,041 loading file None/best-model.pt\n",
+      "2021-09-21 21:59:56,912 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:56,912 EPOCH 6 done: loss 0.0741 - lr 0.0200000\n",
+      "2021-09-21 21:59:57,309 DEV : loss 0.6377374529838562 - score 0.4444\n",
+      "2021-09-21 21:59:57,310 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:59:57,312 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:57,770 epoch 7 - iter 8/81 - loss 0.00116125 - samples/sec: 18.27 - lr: 0.020000\n",
+      "2021-09-21 21:59:58,219 epoch 7 - iter 16/81 - loss 0.22973001 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 21:59:58,650 epoch 7 - iter 24/81 - loss 0.15424199 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 21:59:59,043 epoch 7 - iter 32/81 - loss 0.11684990 - samples/sec: 20.40 - lr: 0.020000\n",
+      "2021-09-21 21:59:59,513 epoch 7 - iter 40/81 - loss 0.09362280 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 22:00:00,025 epoch 7 - iter 48/81 - loss 0.08506628 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 22:00:00,417 epoch 7 - iter 56/81 - loss 0.07311920 - samples/sec: 20.43 - lr: 0.020000\n",
+      "2021-09-21 22:00:00,819 epoch 7 - iter 64/81 - loss 0.06423002 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 22:00:01,211 epoch 7 - iter 72/81 - loss 0.07710914 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 22:00:01,643 epoch 7 - iter 80/81 - loss 0.07166208 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 22:00:01,691 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:01,691 EPOCH 7 done: loss 0.0708 - lr 0.0200000\n",
+      "2021-09-21 22:00:02,047 DEV : loss 0.6742804646492004 - score 0.4444\n",
+      "2021-09-21 22:00:02,049 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:00:02,051 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:02,794 epoch 8 - iter 8/81 - loss 0.00607757 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 22:00:03,440 epoch 8 - iter 16/81 - loss 0.00384311 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 22:00:03,988 epoch 8 - iter 24/81 - loss 0.00269139 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 22:00:04,625 epoch 8 - iter 32/81 - loss 0.00222122 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 22:00:05,159 epoch 8 - iter 40/81 - loss 0.00263748 - samples/sec: 15.00 - lr: 0.020000\n",
+      "2021-09-21 22:00:05,629 epoch 8 - iter 48/81 - loss 0.01695991 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 22:00:06,174 epoch 8 - iter 56/81 - loss 0.01488434 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 22:00:06,865 epoch 8 - iter 64/81 - loss 0.01332181 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 22:00:07,386 epoch 8 - iter 72/81 - loss 0.02295241 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 22:00:07,892 epoch 8 - iter 80/81 - loss 0.02103800 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 22:00:07,944 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:07,944 EPOCH 8 done: loss 0.0209 - lr 0.0200000\n",
+      "2021-09-21 22:00:08,469 DEV : loss 0.6080173254013062 - score 0.4444\n",
+      "2021-09-21 22:00:08,470 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:00:08,473 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:09,144 epoch 9 - iter 8/81 - loss 0.00470196 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 22:00:09,743 epoch 9 - iter 16/81 - loss 0.00319796 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 22:00:10,186 epoch 9 - iter 24/81 - loss 0.00239323 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 22:00:10,604 epoch 9 - iter 32/81 - loss 0.00359399 - samples/sec: 19.17 - lr: 0.020000\n",
+      "2021-09-21 22:00:11,003 epoch 9 - iter 40/81 - loss 0.00296721 - samples/sec: 20.09 - lr: 0.020000\n",
+      "2021-09-21 22:00:11,465 epoch 9 - iter 48/81 - loss 0.00252200 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 22:00:11,857 epoch 9 - iter 56/81 - loss 0.00332329 - samples/sec: 20.47 - lr: 0.020000\n",
+      "2021-09-21 22:00:12,255 epoch 9 - iter 64/81 - loss 0.00292317 - samples/sec: 20.14 - lr: 0.020000\n",
+      "2021-09-21 22:00:12,795 epoch 9 - iter 72/81 - loss 0.00262290 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 22:00:13,190 epoch 9 - iter 80/81 - loss 0.00237681 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 22:00:13,239 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:13,239 EPOCH 9 done: loss 0.0023 - lr 0.0200000\n",
+      "2021-09-21 22:00:13,653 DEV : loss 0.6725186109542847 - score 0.4444\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:00:13,654 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:00:15,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:16,118 epoch 10 - iter 8/81 - loss 0.04728900 - samples/sec: 15.87 - lr: 0.010000\n",
+      "2021-09-21 22:00:16,647 epoch 10 - iter 16/81 - loss 0.02373319 - samples/sec: 15.12 - lr: 0.010000\n",
+      "2021-09-21 22:00:17,231 epoch 10 - iter 24/81 - loss 0.01827466 - samples/sec: 13.71 - lr: 0.010000\n",
+      "2021-09-21 22:00:17,884 epoch 10 - iter 32/81 - loss 0.01378234 - samples/sec: 12.27 - lr: 0.010000\n",
+      "2021-09-21 22:00:18,393 epoch 10 - iter 40/81 - loss 0.01105587 - samples/sec: 15.74 - lr: 0.010000\n",
+      "2021-09-21 22:00:18,917 epoch 10 - iter 48/81 - loss 0.00925908 - samples/sec: 15.29 - lr: 0.010000\n",
+      "2021-09-21 22:00:19,584 epoch 10 - iter 56/81 - loss 0.00795687 - samples/sec: 12.00 - lr: 0.010000\n",
+      "2021-09-21 22:00:20,262 epoch 10 - iter 64/81 - loss 0.00699723 - samples/sec: 11.81 - lr: 0.010000\n",
+      "2021-09-21 22:00:20,849 epoch 10 - iter 72/81 - loss 0.00651224 - samples/sec: 13.64 - lr: 0.010000\n",
+      "2021-09-21 22:00:21,381 epoch 10 - iter 80/81 - loss 0.00588863 - samples/sec: 15.05 - lr: 0.010000\n",
+      "2021-09-21 22:00:21,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:21,478 EPOCH 10 done: loss 0.0064 - lr 0.0100000\n",
+      "2021-09-21 22:00:21,987 DEV : loss 0.7330253720283508 - score 0.3333\n",
+      "2021-09-21 22:00:21,988 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:00:28,593 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:28,594 Testing using best model ...\n",
+      "2021-09-21 22:00:28,595 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:01:50,662 \t0.5\n",
-      "2021-09-08 02:01:50,662 \n",
+      "2021-09-21 22:00:34,010 \t0.3\n",
+      "2021-09-21 22:00:34,011 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.4\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.3\n",
+      "- F-score (macro) 0.14\n",
+      "- Accuracy 0.3\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
-      "Family & Relationships     0.0000    0.0000    0.0000         2\n",
+      "Family & Relationships     0.2500    1.0000    0.4000         1\n",
       "                Health     0.0000    0.0000    0.0000         0\n",
-      " Science & Mathematics     1.0000    0.5000    0.6667         2\n",
-      " Entertainment & Music     1.0000    1.0000    1.0000         1\n",
-      "  Computers & Internet     0.5000    1.0000    0.6667         1\n",
-      " Education & Reference     1.0000    1.0000    1.0000         1\n",
+      " Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      " Entertainment & Music     0.0000    0.0000    0.0000         4\n",
+      "  Computers & Internet     0.0000    0.0000    0.0000         0\n",
+      " Education & Reference     0.0000    0.0000    0.0000         0\n",
       "                Sports     0.0000    0.0000    0.0000         0\n",
-      "     Society & Culture     0.0000    0.0000    0.0000         1\n",
-      "    Business & Finance     0.5000    1.0000    0.6667         1\n",
-      " Politics & Government     0.0000    0.0000    0.0000         1\n",
+      "     Society & Culture     0.0000    0.0000    0.0000         2\n",
+      "    Business & Finance     0.0000    0.0000    0.0000         1\n",
+      " Politics & Government     1.0000    1.0000    1.0000         2\n",
       "\n",
-      "             micro avg     0.5000    0.5000    0.5000        10\n",
-      "             macro avg     0.4000    0.4500    0.4000        10\n",
-      "          weighted avg     0.5000    0.5000    0.4667        10\n",
-      "           samples avg     0.5000    0.5000    0.5000        10\n",
+      "             micro avg     0.3000    0.3000    0.3000        10\n",
+      "             macro avg     0.1250    0.2000    0.1400        10\n",
+      "          weighted avg     0.2250    0.3000    0.2400        10\n",
+      "           samples avg     0.3000    0.3000    0.3000        10\n",
       "\n",
-      "2021-09-08 02:01:50,662 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.7163265306122449\n"
+      "2021-09-21 22:00:34,011 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.7095238095238096\n"
      ]
     }
    ],
@@ -2915,11 +2917,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "0c4025f0",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.6904761904761905, 0.7040816326530612, 0.7448979591836735, 0.6972789115646258, 0.7108843537414966]\n",
+      "0.018950188132223312\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -2931,7 +2945,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2939,25 +2953,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:02:04,451 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 22:00:51,125 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:02:08,467 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:00:55,074 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 13407.39it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 15285.00it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:02:08,476 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
-      "2021-09-08 02:02:08,507 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:08,509 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:00:55,082 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
+      "2021-09-21 22:00:55,092 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:55,094 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3270,25 +3284,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:02:08,509 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:08,510 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 02:02:08,510 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:08,511 Parameters:\n",
-      "2021-09-08 02:02:08,511  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:02:08,511  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:02:08,512  - patience: \"3\"\n",
-      "2021-09-08 02:02:08,512  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:02:08,512  - max_epochs: \"10\"\n",
-      "2021-09-08 02:02:08,513  - shuffle: \"True\"\n",
-      "2021-09-08 02:02:08,513  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:02:08,513  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:02:08,514 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:08,514 Model training base path: \"None\"\n",
-      "2021-09-08 02:02:08,514 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:08,515 Device: cuda:1\n",
-      "2021-09-08 02:02:08,515 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:08,515 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:02:08,539 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:00:55,094 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:55,095 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 22:00:55,095 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:55,095 Parameters:\n",
+      "2021-09-21 22:00:55,095  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:00:55,096  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:00:55,096  - patience: \"3\"\n",
+      "2021-09-21 22:00:55,096  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:00:55,096  - max_epochs: \"10\"\n",
+      "2021-09-21 22:00:55,097  - shuffle: \"True\"\n",
+      "2021-09-21 22:00:55,097  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:00:55,097  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:00:55,098 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:55,098 Model training base path: \"None\"\n",
+      "2021-09-21 22:00:55,098 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:55,098 Device: cuda:0\n",
+      "2021-09-21 22:00:55,099 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:55,099 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:00:55,107 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3302,215 +3316,216 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:02:08,999 epoch 1 - iter 8/81 - loss 0.72776134 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 02:02:09,470 epoch 1 - iter 16/81 - loss 0.50691761 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 02:02:09,868 epoch 1 - iter 24/81 - loss 0.52419746 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 02:02:10,382 epoch 1 - iter 32/81 - loss 0.50543412 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 02:02:10,868 epoch 1 - iter 40/81 - loss 0.45643016 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 02:02:11,437 epoch 1 - iter 48/81 - loss 0.51029856 - samples/sec: 14.08 - lr: 0.020000\n",
-      "2021-09-08 02:02:11,825 epoch 1 - iter 56/81 - loss 0.47965663 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 02:02:12,294 epoch 1 - iter 64/81 - loss 0.54519354 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 02:02:12,782 epoch 1 - iter 72/81 - loss 0.54161990 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 02:02:13,343 epoch 1 - iter 80/81 - loss 0.57101093 - samples/sec: 14.27 - lr: 0.020000\n",
-      "2021-09-08 02:02:13,394 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:13,395 EPOCH 1 done: loss 0.5675 - lr 0.0200000\n",
-      "2021-09-08 02:02:13,816 DEV : loss 0.3137984573841095 - score 0.4444\n",
-      "2021-09-08 02:02:13,818 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:00:55,755 epoch 1 - iter 8/81 - loss 0.80604459 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 22:00:56,522 epoch 1 - iter 16/81 - loss 0.41599010 - samples/sec: 10.44 - lr: 0.020000\n",
+      "2021-09-21 22:00:57,010 epoch 1 - iter 24/81 - loss 0.40770697 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 22:00:57,443 epoch 1 - iter 32/81 - loss 0.47015572 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 22:00:57,846 epoch 1 - iter 40/81 - loss 0.43046542 - samples/sec: 19.88 - lr: 0.020000\n",
+      "2021-09-21 22:00:58,263 epoch 1 - iter 48/81 - loss 0.45981972 - samples/sec: 19.25 - lr: 0.020000\n",
+      "2021-09-21 22:00:58,751 epoch 1 - iter 56/81 - loss 0.45837808 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 22:00:59,245 epoch 1 - iter 64/81 - loss 0.49093241 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 22:00:59,693 epoch 1 - iter 72/81 - loss 0.50271098 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 22:01:00,164 epoch 1 - iter 80/81 - loss 0.49746232 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 22:01:00,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:00,208 EPOCH 1 done: loss 0.4914 - lr 0.0200000\n",
+      "2021-09-21 22:01:01,252 DEV : loss 1.0317378044128418 - score 0.1111\n",
+      "2021-09-21 22:01:01,254 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:02:26,282 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:26,743 epoch 2 - iter 8/81 - loss 0.50743758 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 02:02:27,184 epoch 2 - iter 16/81 - loss 0.29899055 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 02:02:27,663 epoch 2 - iter 24/81 - loss 0.26215407 - samples/sec: 16.70 - lr: 0.020000\n",
-      "2021-09-08 02:02:28,102 epoch 2 - iter 32/81 - loss 0.26599593 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 02:02:28,833 epoch 2 - iter 40/81 - loss 0.38474185 - samples/sec: 10.95 - lr: 0.020000\n",
-      "2021-09-08 02:02:29,237 epoch 2 - iter 48/81 - loss 0.36744337 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 02:02:29,766 epoch 2 - iter 56/81 - loss 0.38992580 - samples/sec: 15.17 - lr: 0.020000\n",
-      "2021-09-08 02:02:30,224 epoch 2 - iter 64/81 - loss 0.37536383 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 02:02:30,658 epoch 2 - iter 72/81 - loss 0.38925855 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 02:02:31,188 epoch 2 - iter 80/81 - loss 0.39598116 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 02:02:31,247 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:31,248 EPOCH 2 done: loss 0.3928 - lr 0.0200000\n",
-      "2021-09-08 02:02:31,581 DEV : loss 0.40924933552742004 - score 0.3333\n",
-      "2021-09-08 02:02:31,582 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:02:31,589 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:32,086 epoch 3 - iter 8/81 - loss 0.29747248 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 02:02:32,475 epoch 3 - iter 16/81 - loss 0.16486733 - samples/sec: 20.64 - lr: 0.020000\n",
-      "2021-09-08 02:02:33,150 epoch 3 - iter 24/81 - loss 0.16921650 - samples/sec: 11.86 - lr: 0.020000\n",
-      "2021-09-08 02:02:33,558 epoch 3 - iter 32/81 - loss 0.13904397 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 02:02:34,112 epoch 3 - iter 40/81 - loss 0.16676562 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 02:02:34,504 epoch 3 - iter 48/81 - loss 0.17362088 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 02:02:34,896 epoch 3 - iter 56/81 - loss 0.15368582 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 02:02:35,344 epoch 3 - iter 64/81 - loss 0.17854745 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 02:02:35,884 epoch 3 - iter 72/81 - loss 0.16210369 - samples/sec: 14.83 - lr: 0.020000\n",
-      "2021-09-08 02:02:36,291 epoch 3 - iter 80/81 - loss 0.17987091 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 02:02:36,351 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:36,352 EPOCH 3 done: loss 0.1778 - lr 0.0200000\n",
-      "2021-09-08 02:02:36,689 DEV : loss 0.4356591999530792 - score 0.5556\n",
-      "2021-09-08 02:02:36,691 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:01:05,613 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:06,304 epoch 2 - iter 8/81 - loss 0.91416748 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 22:01:06,916 epoch 2 - iter 16/81 - loss 0.64436357 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 22:01:07,574 epoch 2 - iter 24/81 - loss 0.56064635 - samples/sec: 12.17 - lr: 0.020000\n",
+      "2021-09-21 22:01:08,021 epoch 2 - iter 32/81 - loss 0.54666901 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 22:01:08,574 epoch 2 - iter 40/81 - loss 0.52592214 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 22:01:09,228 epoch 2 - iter 48/81 - loss 0.54987935 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 22:01:09,856 epoch 2 - iter 56/81 - loss 0.48520329 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 22:01:10,454 epoch 2 - iter 64/81 - loss 0.46289102 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 22:01:11,105 epoch 2 - iter 72/81 - loss 0.43260298 - samples/sec: 12.31 - lr: 0.020000\n",
+      "2021-09-21 22:01:11,682 epoch 2 - iter 80/81 - loss 0.42150872 - samples/sec: 13.87 - lr: 0.020000\n",
+      "2021-09-21 22:01:11,773 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:11,774 EPOCH 2 done: loss 0.4166 - lr 0.0200000\n",
+      "2021-09-21 22:01:12,280 DEV : loss 0.3746955394744873 - score 0.4444\n",
+      "2021-09-21 22:01:12,282 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:02:41,169 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:41,703 epoch 4 - iter 8/81 - loss 0.04849669 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 02:02:42,348 epoch 4 - iter 16/81 - loss 0.07824300 - samples/sec: 12.42 - lr: 0.020000\n",
-      "2021-09-08 02:02:42,804 epoch 4 - iter 24/81 - loss 0.11248401 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 02:02:43,225 epoch 4 - iter 32/81 - loss 0.08483243 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 02:02:43,618 epoch 4 - iter 40/81 - loss 0.10291812 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 02:02:44,151 epoch 4 - iter 48/81 - loss 0.09128620 - samples/sec: 15.02 - lr: 0.020000\n",
-      "2021-09-08 02:02:44,575 epoch 4 - iter 56/81 - loss 0.19664489 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 02:02:44,960 epoch 4 - iter 64/81 - loss 0.17411624 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 02:02:45,337 epoch 4 - iter 72/81 - loss 0.15683298 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 02:02:45,782 epoch 4 - iter 80/81 - loss 0.14164632 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 02:02:45,831 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:45,832 EPOCH 4 done: loss 0.1407 - lr 0.0200000\n",
-      "2021-09-08 02:02:46,145 DEV : loss 0.5596105456352234 - score 0.4444\n",
-      "2021-09-08 02:02:46,146 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:02:46,148 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:46,718 epoch 5 - iter 8/81 - loss 0.76481604 - samples/sec: 14.41 - lr: 0.020000\n",
-      "2021-09-08 02:02:47,120 epoch 5 - iter 16/81 - loss 0.44724154 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 02:02:47,568 epoch 5 - iter 24/81 - loss 0.33671274 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 02:02:48,117 epoch 5 - iter 32/81 - loss 0.25562800 - samples/sec: 14.58 - lr: 0.020000\n",
-      "2021-09-08 02:02:48,807 epoch 5 - iter 40/81 - loss 0.23447051 - samples/sec: 11.61 - lr: 0.020000\n",
-      "2021-09-08 02:02:49,294 epoch 5 - iter 48/81 - loss 0.19644926 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 02:02:49,747 epoch 5 - iter 56/81 - loss 0.22085547 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 02:02:50,176 epoch 5 - iter 64/81 - loss 0.19359622 - samples/sec: 18.65 - lr: 0.020000\n",
-      "2021-09-08 02:02:50,546 epoch 5 - iter 72/81 - loss 0.17248416 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 02:02:50,932 epoch 5 - iter 80/81 - loss 0.20221190 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 02:02:50,979 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:50,980 EPOCH 5 done: loss 0.2184 - lr 0.0200000\n",
-      "2021-09-08 02:02:51,306 DEV : loss 0.32115766406059265 - score 0.6667\n",
-      "2021-09-08 02:02:51,308 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:01:17,040 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:17,603 epoch 3 - iter 8/81 - loss 0.20829492 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 22:01:18,175 epoch 3 - iter 16/81 - loss 0.17156889 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 22:01:18,710 epoch 3 - iter 24/81 - loss 0.24045082 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 22:01:19,345 epoch 3 - iter 32/81 - loss 0.23822396 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 22:01:19,914 epoch 3 - iter 40/81 - loss 0.24994497 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 22:01:20,485 epoch 3 - iter 48/81 - loss 0.26363792 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 22:01:21,112 epoch 3 - iter 56/81 - loss 0.31772913 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 22:01:21,742 epoch 3 - iter 64/81 - loss 0.27880139 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 22:01:22,359 epoch 3 - iter 72/81 - loss 0.32128887 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 22:01:23,045 epoch 3 - iter 80/81 - loss 0.30661606 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 22:01:23,097 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:23,097 EPOCH 3 done: loss 0.3029 - lr 0.0200000\n",
+      "2021-09-21 22:01:23,625 DEV : loss 0.4398990869522095 - score 0.4444\n",
+      "2021-09-21 22:01:23,627 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:01:23,632 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:24,271 epoch 4 - iter 8/81 - loss 0.57478667 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 22:01:24,837 epoch 4 - iter 16/81 - loss 0.44309605 - samples/sec: 14.15 - lr: 0.020000\n",
+      "2021-09-21 22:01:25,363 epoch 4 - iter 24/81 - loss 0.30790082 - samples/sec: 15.21 - lr: 0.020000\n",
+      "2021-09-21 22:01:25,919 epoch 4 - iter 32/81 - loss 0.23405664 - samples/sec: 14.41 - lr: 0.020000\n",
+      "2021-09-21 22:01:26,505 epoch 4 - iter 40/81 - loss 0.21814245 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 22:01:27,020 epoch 4 - iter 48/81 - loss 0.19403347 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 22:01:27,697 epoch 4 - iter 56/81 - loss 0.18427028 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 22:01:28,335 epoch 4 - iter 64/81 - loss 0.16456716 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 22:01:29,088 epoch 4 - iter 72/81 - loss 0.22995548 - samples/sec: 10.64 - lr: 0.020000\n",
+      "2021-09-21 22:01:29,611 epoch 4 - iter 80/81 - loss 0.21343822 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 22:01:29,669 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:29,669 EPOCH 4 done: loss 0.2108 - lr 0.0200000\n",
+      "2021-09-21 22:01:30,174 DEV : loss 0.315221905708313 - score 0.6667\n",
+      "2021-09-21 22:01:30,175 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:02:55,728 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:56,171 epoch 6 - iter 8/81 - loss 0.00235046 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 02:02:56,628 epoch 6 - iter 16/81 - loss 0.18795269 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 02:02:57,115 epoch 6 - iter 24/81 - loss 0.34256517 - samples/sec: 16.50 - lr: 0.020000\n",
-      "2021-09-08 02:02:57,741 epoch 6 - iter 32/81 - loss 0.26251738 - samples/sec: 12.79 - lr: 0.020000\n",
-      "2021-09-08 02:02:58,137 epoch 6 - iter 40/81 - loss 0.21046433 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 02:02:58,529 epoch 6 - iter 48/81 - loss 0.17596137 - samples/sec: 20.47 - lr: 0.020000\n",
-      "2021-09-08 02:02:59,010 epoch 6 - iter 56/81 - loss 0.15134519 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 02:02:59,449 epoch 6 - iter 64/81 - loss 0.15475715 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 02:02:59,971 epoch 6 - iter 72/81 - loss 0.13788841 - samples/sec: 15.35 - lr: 0.020000\n",
-      "2021-09-08 02:03:00,413 epoch 6 - iter 80/81 - loss 0.12425470 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 02:03:00,481 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:01:39,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:39,802 epoch 5 - iter 8/81 - loss 0.00314300 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 22:01:40,370 epoch 5 - iter 16/81 - loss 0.01580082 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 22:01:40,980 epoch 5 - iter 24/81 - loss 0.01110174 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 22:01:41,697 epoch 5 - iter 32/81 - loss 0.07192961 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 22:01:42,264 epoch 5 - iter 40/81 - loss 0.08811792 - samples/sec: 14.13 - lr: 0.020000\n",
+      "2021-09-21 22:01:42,781 epoch 5 - iter 48/81 - loss 0.10529751 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 22:01:43,338 epoch 5 - iter 56/81 - loss 0.09386979 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 22:01:43,927 epoch 5 - iter 64/81 - loss 0.08230163 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 22:01:44,699 epoch 5 - iter 72/81 - loss 0.09264848 - samples/sec: 10.37 - lr: 0.020000\n",
+      "2021-09-21 22:01:45,229 epoch 5 - iter 80/81 - loss 0.11211592 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 22:01:45,303 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:45,303 EPOCH 5 done: loss 0.1111 - lr 0.0200000\n",
+      "2021-09-21 22:01:45,848 DEV : loss 0.3423486649990082 - score 0.6667\n",
+      "2021-09-21 22:01:45,850 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:01:45,852 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:46,477 epoch 6 - iter 8/81 - loss 0.14213189 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 22:01:47,042 epoch 6 - iter 16/81 - loss 0.07493987 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 22:01:47,464 epoch 6 - iter 24/81 - loss 0.05069574 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 22:01:47,872 epoch 6 - iter 32/81 - loss 0.03851945 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 22:01:48,410 epoch 6 - iter 40/81 - loss 0.03128337 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 22:01:48,896 epoch 6 - iter 48/81 - loss 0.02854054 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 22:01:49,333 epoch 6 - iter 56/81 - loss 0.02486173 - samples/sec: 18.35 - lr: 0.020000\n",
+      "2021-09-21 22:01:49,807 epoch 6 - iter 64/81 - loss 0.02190952 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 22:01:50,196 epoch 6 - iter 72/81 - loss 0.01964632 - samples/sec: 20.63 - lr: 0.020000\n",
+      "2021-09-21 22:01:50,570 epoch 6 - iter 80/81 - loss 0.01775095 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 22:01:50,614 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:03:00,481 EPOCH 6 done: loss 0.1229 - lr 0.0200000\n",
-      "2021-09-08 02:03:00,839 DEV : loss 0.37720873951911926 - score 0.6667\n",
-      "2021-09-08 02:03:00,840 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:03:01,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:01,418 epoch 7 - iter 8/81 - loss 0.00162310 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 02:03:01,813 epoch 7 - iter 16/81 - loss 0.00123506 - samples/sec: 20.31 - lr: 0.020000\n",
-      "2021-09-08 02:03:02,187 epoch 7 - iter 24/81 - loss 0.00098418 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 02:03:02,675 epoch 7 - iter 32/81 - loss 0.00296054 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 02:03:03,135 epoch 7 - iter 40/81 - loss 0.00253029 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 02:03:03,698 epoch 7 - iter 48/81 - loss 0.03493481 - samples/sec: 14.22 - lr: 0.020000\n",
-      "2021-09-08 02:03:04,175 epoch 7 - iter 56/81 - loss 0.04759357 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 02:03:04,676 epoch 7 - iter 64/81 - loss 0.04243077 - samples/sec: 16.00 - lr: 0.020000\n",
-      "2021-09-08 02:03:05,122 epoch 7 - iter 72/81 - loss 0.03798781 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 02:03:05,638 epoch 7 - iter 80/81 - loss 0.04902619 - samples/sec: 15.53 - lr: 0.020000\n",
-      "2021-09-08 02:03:05,685 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:05,685 EPOCH 7 done: loss 0.0484 - lr 0.0200000\n",
-      "2021-09-08 02:03:06,006 DEV : loss 0.3637709319591522 - score 0.6667\n",
-      "2021-09-08 02:03:06,007 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:03:06,020 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:06,445 epoch 8 - iter 8/81 - loss 0.00094752 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 02:03:07,144 epoch 8 - iter 16/81 - loss 0.00072830 - samples/sec: 11.46 - lr: 0.020000\n",
-      "2021-09-08 02:03:07,625 epoch 8 - iter 24/81 - loss 0.02480897 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 02:03:08,043 epoch 8 - iter 32/81 - loss 0.01879155 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 02:03:08,442 epoch 8 - iter 40/81 - loss 0.01522924 - samples/sec: 20.07 - lr: 0.020000\n",
-      "2021-09-08 02:03:08,861 epoch 8 - iter 48/81 - loss 0.01915027 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 02:03:09,302 epoch 8 - iter 56/81 - loss 0.01654551 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 02:03:09,923 epoch 8 - iter 64/81 - loss 0.01455455 - samples/sec: 12.89 - lr: 0.020000\n",
-      "2021-09-08 02:03:10,340 epoch 8 - iter 72/81 - loss 0.02831993 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 02:03:10,769 epoch 8 - iter 80/81 - loss 0.02551352 - samples/sec: 18.65 - lr: 0.020000\n",
-      "2021-09-08 02:03:10,817 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:10,817 EPOCH 8 done: loss 0.0252 - lr 0.0200000\n",
-      "2021-09-08 02:03:11,186 DEV : loss 0.3882541060447693 - score 0.7778\n",
-      "2021-09-08 02:03:11,187 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:01:50,614 EPOCH 6 done: loss 0.0175 - lr 0.0200000\n",
+      "2021-09-21 22:01:51,657 DEV : loss 0.38115090131759644 - score 0.7778\n",
+      "2021-09-21 22:01:51,658 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:03:16,320 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:16,871 epoch 9 - iter 8/81 - loss 0.00029552 - samples/sec: 14.96 - lr: 0.020000\n",
-      "2021-09-08 02:03:17,375 epoch 9 - iter 16/81 - loss 0.00038196 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 02:03:17,839 epoch 9 - iter 24/81 - loss 0.00037066 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 02:03:18,276 epoch 9 - iter 32/81 - loss 0.00284515 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 02:03:18,673 epoch 9 - iter 40/81 - loss 0.00232578 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 02:03:19,154 epoch 9 - iter 48/81 - loss 0.01136620 - samples/sec: 16.64 - lr: 0.020000\n",
-      "2021-09-08 02:03:19,601 epoch 9 - iter 56/81 - loss 0.00980283 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 02:03:19,971 epoch 9 - iter 64/81 - loss 0.00860071 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 02:03:20,437 epoch 9 - iter 72/81 - loss 0.00769994 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 02:03:20,937 epoch 9 - iter 80/81 - loss 0.00711019 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 02:03:20,985 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:20,986 EPOCH 9 done: loss 0.0070 - lr 0.0200000\n",
-      "2021-09-08 02:03:21,310 DEV : loss 0.4025845229625702 - score 0.6667\n",
-      "2021-09-08 02:03:21,311 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:03:21,328 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:21,808 epoch 10 - iter 8/81 - loss 0.00325708 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 02:03:22,384 epoch 10 - iter 16/81 - loss 0.00174740 - samples/sec: 13.90 - lr: 0.020000\n",
-      "2021-09-08 02:03:22,818 epoch 10 - iter 24/81 - loss 0.00123029 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 02:03:23,492 epoch 10 - iter 32/81 - loss 0.00103926 - samples/sec: 11.88 - lr: 0.020000\n",
-      "2021-09-08 02:03:23,886 epoch 10 - iter 40/81 - loss 0.00086574 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 02:03:24,290 epoch 10 - iter 48/81 - loss 0.00074656 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 02:03:24,758 epoch 10 - iter 56/81 - loss 0.00066399 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 02:03:25,151 epoch 10 - iter 64/81 - loss 0.00713492 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 02:03:25,545 epoch 10 - iter 72/81 - loss 0.00636971 - samples/sec: 20.33 - lr: 0.020000\n",
-      "2021-09-08 02:03:25,924 epoch 10 - iter 80/81 - loss 0.00575542 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 02:03:25,995 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:25,996 EPOCH 10 done: loss 0.0123 - lr 0.0200000\n",
-      "2021-09-08 02:03:26,410 DEV : loss 0.3854069113731384 - score 0.6667\n",
-      "2021-09-08 02:03:26,410 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:03:34,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:34,881 Testing using best model ...\n",
-      "2021-09-08 02:03:34,883 loading file None/best-model.pt\n",
+      "2021-09-21 22:01:58,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:58,811 epoch 7 - iter 8/81 - loss 0.30785277 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 22:01:59,278 epoch 7 - iter 16/81 - loss 0.29556808 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 22:01:59,731 epoch 7 - iter 24/81 - loss 0.20008984 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 22:02:00,216 epoch 7 - iter 32/81 - loss 0.15065302 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 22:02:00,653 epoch 7 - iter 40/81 - loss 0.12060361 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 22:02:01,161 epoch 7 - iter 48/81 - loss 0.10233246 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 22:02:01,712 epoch 7 - iter 56/81 - loss 0.08796075 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 22:02:02,230 epoch 7 - iter 64/81 - loss 0.07704728 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 22:02:02,746 epoch 7 - iter 72/81 - loss 0.07155202 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 22:02:03,306 epoch 7 - iter 80/81 - loss 0.06546485 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 22:02:03,360 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:03,361 EPOCH 7 done: loss 0.0647 - lr 0.0200000\n",
+      "2021-09-21 22:02:03,801 DEV : loss 0.5101792216300964 - score 0.5556\n",
+      "2021-09-21 22:02:03,802 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:02:03,821 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:04,311 epoch 8 - iter 8/81 - loss 0.20046718 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 22:02:04,831 epoch 8 - iter 16/81 - loss 0.39588822 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 22:02:05,381 epoch 8 - iter 24/81 - loss 0.26629881 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 22:02:05,840 epoch 8 - iter 32/81 - loss 0.19980477 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 22:02:06,283 epoch 8 - iter 40/81 - loss 0.16389520 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 22:02:06,924 epoch 8 - iter 48/81 - loss 0.13666097 - samples/sec: 12.49 - lr: 0.020000\n",
+      "2021-09-21 22:02:07,395 epoch 8 - iter 56/81 - loss 0.11733028 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 22:02:07,869 epoch 8 - iter 64/81 - loss 0.10271774 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 22:02:08,367 epoch 8 - iter 72/81 - loss 0.10801643 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 22:02:08,897 epoch 8 - iter 80/81 - loss 0.09725277 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 22:02:08,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:08,950 EPOCH 8 done: loss 0.0961 - lr 0.0200000\n",
+      "2021-09-21 22:02:09,398 DEV : loss 0.4294782876968384 - score 0.5556\n",
+      "2021-09-21 22:02:09,399 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:02:09,421 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:09,947 epoch 9 - iter 8/81 - loss 0.00049661 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 22:02:10,438 epoch 9 - iter 16/81 - loss 0.00114904 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 22:02:11,036 epoch 9 - iter 24/81 - loss 0.00273528 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 22:02:11,569 epoch 9 - iter 32/81 - loss 0.00210375 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 22:02:12,043 epoch 9 - iter 40/81 - loss 0.00177362 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 22:02:12,591 epoch 9 - iter 48/81 - loss 0.00165590 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 22:02:13,047 epoch 9 - iter 56/81 - loss 0.00146695 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 22:02:13,593 epoch 9 - iter 64/81 - loss 0.00131831 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 22:02:14,036 epoch 9 - iter 72/81 - loss 0.02924503 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 22:02:14,497 epoch 9 - iter 80/81 - loss 0.02655652 - samples/sec: 17.38 - lr: 0.020000\n",
+      "2021-09-21 22:02:14,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:14,558 EPOCH 9 done: loss 0.0374 - lr 0.0200000\n",
+      "2021-09-21 22:02:14,997 DEV : loss 0.36078810691833496 - score 0.6667\n",
+      "2021-09-21 22:02:14,999 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:02:15,033 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:15,587 epoch 10 - iter 8/81 - loss 0.00238548 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 22:02:16,111 epoch 10 - iter 16/81 - loss 0.00156319 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 22:02:16,558 epoch 10 - iter 24/81 - loss 0.00110796 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 22:02:17,005 epoch 10 - iter 32/81 - loss 0.06573550 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 22:02:17,475 epoch 10 - iter 40/81 - loss 0.05294464 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 22:02:17,874 epoch 10 - iter 48/81 - loss 0.04424691 - samples/sec: 20.06 - lr: 0.020000\n",
+      "2021-09-21 22:02:18,428 epoch 10 - iter 56/81 - loss 0.04373662 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 22:02:18,976 epoch 10 - iter 64/81 - loss 0.04540931 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 22:02:19,365 epoch 10 - iter 72/81 - loss 0.04332776 - samples/sec: 20.60 - lr: 0.020000\n",
+      "2021-09-21 22:02:19,826 epoch 10 - iter 80/81 - loss 0.03906075 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 22:02:19,969 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:19,969 EPOCH 10 done: loss 0.0728 - lr 0.0200000\n",
+      "2021-09-21 22:02:20,313 DEV : loss 0.6305676102638245 - score 0.4444\n",
+      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:02:20,314 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:02:25,776 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:25,777 Testing using best model ...\n",
+      "2021-09-21 22:02:25,779 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:03:40,491 \t0.8\n",
-      "2021-09-08 02:03:40,492 \n",
+      "2021-09-21 22:02:31,129 \t0.8\n",
+      "2021-09-21 22:02:31,129 \n",
       "Results:\n",
       "- F-score (micro) 0.8\n",
-      "- F-score (macro) 0.6133\n",
+      "- F-score (macro) 0.3333\n",
       "- Accuracy 0.8\n",
       "\n",
       "By class:\n",
       "                                           precision    recall  f1-score   support\n",
       "\n",
-      "This text is about Family & Relationships     0.6667    1.0000    0.8000         2\n",
-      "                This text is about Health     1.0000    0.5000    0.6667         2\n",
-      " This text is about Science & Mathematics     1.0000    1.0000    1.0000         1\n",
-      " This text is about Entertainment & Music     1.0000    1.0000    1.0000         1\n",
+      "This text is about Family & Relationships     1.0000    0.5000    0.6667         2\n",
+      "                This text is about Health     0.0000    0.0000    0.0000         0\n",
+      " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      " This text is about Entertainment & Music     0.5000    1.0000    0.6667         1\n",
       "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
       " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
-      "                This text is about Sports     1.0000    1.0000    1.0000         1\n",
-      "     This text is about Society & Culture     1.0000    0.5000    0.6667         2\n",
+      "                This text is about Sports     1.0000    1.0000    1.0000         4\n",
+      "     This text is about Society & Culture     0.0000    0.0000    0.0000         1\n",
       "    This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
-      " This text is about Politics & Government     1.0000    1.0000    1.0000         1\n",
+      " This text is about Politics & Government     1.0000    1.0000    1.0000         2\n",
       "\n",
       "                                micro avg     0.8000    0.8000    0.8000        10\n",
-      "                                macro avg     0.6667    0.6000    0.6133        10\n",
-      "                             weighted avg     0.9333    0.8000    0.8267        10\n",
+      "                                macro avg     0.3500    0.3500    0.3333        10\n",
+      "                             weighted avg     0.8500    0.8000    0.8000        10\n",
       "                              samples avg     0.8000    0.8000    0.8000        10\n",
       "\n",
-      "2021-09-08 02:03:40,492 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:56,312 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 22:02:31,130 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:51,461 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:04:00,220 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:02:55,441 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 16145.59it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 16522.30it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:04:00,227 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
-      "2021-09-08 02:04:00,238 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:00,240 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:02:55,448 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
+      "2021-09-21 22:02:55,485 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:55,487 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3823,25 +3838,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:04:00,240 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:00,241 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 02:04:00,241 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:00,241 Parameters:\n",
-      "2021-09-08 02:04:00,242  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:04:00,242  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:04:00,242  - patience: \"3\"\n",
-      "2021-09-08 02:04:00,242  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:04:00,243  - max_epochs: \"10\"\n",
-      "2021-09-08 02:04:00,243  - shuffle: \"True\"\n",
-      "2021-09-08 02:04:00,243  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:04:00,244  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:04:00,244 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:00,244 Model training base path: \"None\"\n",
-      "2021-09-08 02:04:00,244 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:00,245 Device: cuda:1\n",
-      "2021-09-08 02:04:00,245 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:00,245 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:04:00,253 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:02:55,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:55,488 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 22:02:55,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:55,489 Parameters:\n",
+      "2021-09-21 22:02:55,489  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:02:55,489  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:02:55,489  - patience: \"3\"\n",
+      "2021-09-21 22:02:55,490  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:02:55,490  - max_epochs: \"10\"\n",
+      "2021-09-21 22:02:55,490  - shuffle: \"True\"\n",
+      "2021-09-21 22:02:55,490  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:02:55,491  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:02:55,491 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:55,491 Model training base path: \"None\"\n",
+      "2021-09-21 22:02:55,492 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:55,492 Device: cuda:0\n",
+      "2021-09-21 22:02:55,492 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:55,492 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:02:55,500 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3855,215 +3870,217 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:04:00,760 epoch 1 - iter 8/81 - loss 0.41550908 - samples/sec: 16.26 - lr: 0.020000\n",
-      "2021-09-08 02:04:01,219 epoch 1 - iter 16/81 - loss 0.22718545 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 02:04:01,642 epoch 1 - iter 24/81 - loss 0.25519601 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 02:04:02,098 epoch 1 - iter 32/81 - loss 0.34422588 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 02:04:02,497 epoch 1 - iter 40/81 - loss 0.34856766 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 02:04:02,941 epoch 1 - iter 48/81 - loss 0.36933148 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 02:04:03,384 epoch 1 - iter 56/81 - loss 0.46661378 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 02:04:03,880 epoch 1 - iter 64/81 - loss 0.48310871 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 02:04:04,375 epoch 1 - iter 72/81 - loss 0.53146976 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 02:04:04,839 epoch 1 - iter 80/81 - loss 0.50856794 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 02:04:04,986 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:04,987 EPOCH 1 done: loss 0.5045 - lr 0.0200000\n",
-      "2021-09-08 02:04:05,241 DEV : loss 0.18192262947559357 - score 0.5556\n",
-      "2021-09-08 02:04:05,242 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:02:56,267 epoch 1 - iter 8/81 - loss 0.67479025 - samples/sec: 10.66 - lr: 0.020000\n",
+      "2021-09-21 22:02:57,075 epoch 1 - iter 16/81 - loss 0.66878971 - samples/sec: 9.91 - lr: 0.020000\n",
+      "2021-09-21 22:02:57,555 epoch 1 - iter 24/81 - loss 0.51064956 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 22:02:58,059 epoch 1 - iter 32/81 - loss 0.58468499 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 22:02:58,614 epoch 1 - iter 40/81 - loss 0.52859961 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 22:02:59,231 epoch 1 - iter 48/81 - loss 0.52017705 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 22:02:59,800 epoch 1 - iter 56/81 - loss 0.48352610 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 22:03:00,384 epoch 1 - iter 64/81 - loss 0.49692528 - samples/sec: 13.73 - lr: 0.020000\n",
+      "2021-09-21 22:03:00,869 epoch 1 - iter 72/81 - loss 0.51057345 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 22:03:01,468 epoch 1 - iter 80/81 - loss 0.50023543 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 22:03:01,537 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:01,538 EPOCH 1 done: loss 0.4942 - lr 0.0200000\n",
+      "2021-09-21 22:03:02,091 DEV : loss 0.4710555672645569 - score 0.6667\n",
+      "2021-09-21 22:03:02,093 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:03:09,339 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:09,923 epoch 2 - iter 8/81 - loss 0.37973291 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 22:03:10,491 epoch 2 - iter 16/81 - loss 0.28721187 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 22:03:11,073 epoch 2 - iter 24/81 - loss 0.27703345 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 22:03:11,486 epoch 2 - iter 32/81 - loss 0.21230805 - samples/sec: 19.41 - lr: 0.020000\n",
+      "2021-09-21 22:03:11,967 epoch 2 - iter 40/81 - loss 0.38830336 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 22:03:12,488 epoch 2 - iter 48/81 - loss 0.38855524 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 22:03:12,942 epoch 2 - iter 56/81 - loss 0.42986825 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 22:03:13,437 epoch 2 - iter 64/81 - loss 0.42911187 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 22:03:13,884 epoch 2 - iter 72/81 - loss 0.43476214 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 22:03:14,374 epoch 2 - iter 80/81 - loss 0.40722218 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 22:03:14,422 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:14,423 EPOCH 2 done: loss 0.4267 - lr 0.0200000\n",
+      "2021-09-21 22:03:15,183 DEV : loss 0.23734933137893677 - score 0.6667\n",
+      "2021-09-21 22:03:15,184 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:04:09,498 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:10,051 epoch 2 - iter 8/81 - loss 0.71264434 - samples/sec: 14.92 - lr: 0.020000\n",
-      "2021-09-08 02:04:10,478 epoch 2 - iter 16/81 - loss 0.56110892 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 02:04:10,962 epoch 2 - iter 24/81 - loss 0.55872379 - samples/sec: 16.54 - lr: 0.020000\n",
-      "2021-09-08 02:04:11,423 epoch 2 - iter 32/81 - loss 0.49081706 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 02:04:11,933 epoch 2 - iter 40/81 - loss 0.50189426 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 02:04:12,346 epoch 2 - iter 48/81 - loss 0.44637897 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 02:04:12,795 epoch 2 - iter 56/81 - loss 0.46058677 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 02:04:13,315 epoch 2 - iter 64/81 - loss 0.41621177 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 02:04:13,730 epoch 2 - iter 72/81 - loss 0.42132422 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 02:04:14,193 epoch 2 - iter 80/81 - loss 0.42097298 - samples/sec: 17.32 - lr: 0.020000\n",
-      "2021-09-08 02:04:14,253 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:14,253 EPOCH 2 done: loss 0.4159 - lr 0.0200000\n",
-      "2021-09-08 02:04:14,507 DEV : loss 0.09227627515792847 - score 0.8889\n",
-      "2021-09-08 02:04:14,508 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:03:19,233 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:19,745 epoch 3 - iter 8/81 - loss 0.55634570 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 22:03:20,368 epoch 3 - iter 16/81 - loss 0.43465175 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 22:03:21,045 epoch 3 - iter 24/81 - loss 0.39308129 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 22:03:21,588 epoch 3 - iter 32/81 - loss 0.35779038 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 22:03:22,116 epoch 3 - iter 40/81 - loss 0.33251297 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 22:03:22,761 epoch 3 - iter 48/81 - loss 0.32861788 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 22:03:23,195 epoch 3 - iter 56/81 - loss 0.33686500 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 22:03:23,665 epoch 3 - iter 64/81 - loss 0.29716215 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 22:03:24,071 epoch 3 - iter 72/81 - loss 0.29992397 - samples/sec: 19.73 - lr: 0.020000\n",
+      "2021-09-21 22:03:24,487 epoch 3 - iter 80/81 - loss 0.27137865 - samples/sec: 19.26 - lr: 0.020000\n",
+      "2021-09-21 22:03:24,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:24,535 EPOCH 3 done: loss 0.2909 - lr 0.0200000\n",
+      "2021-09-21 22:03:24,987 DEV : loss 0.27701035141944885 - score 0.8889\n",
+      "2021-09-21 22:03:24,988 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:04:18,448 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:19,017 epoch 3 - iter 8/81 - loss 0.29960487 - samples/sec: 14.48 - lr: 0.020000\n",
-      "2021-09-08 02:04:19,513 epoch 3 - iter 16/81 - loss 0.31480598 - samples/sec: 16.15 - lr: 0.020000\n",
-      "2021-09-08 02:04:19,959 epoch 3 - iter 24/81 - loss 0.31375287 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 02:04:20,479 epoch 3 - iter 32/81 - loss 0.27741399 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 02:04:20,889 epoch 3 - iter 40/81 - loss 0.27408247 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 02:04:21,314 epoch 3 - iter 48/81 - loss 0.24163752 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 02:04:21,772 epoch 3 - iter 56/81 - loss 0.24959955 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 02:04:22,210 epoch 3 - iter 64/81 - loss 0.22242844 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 02:04:22,687 epoch 3 - iter 72/81 - loss 0.26694402 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 02:04:23,147 epoch 3 - iter 80/81 - loss 0.27378943 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 02:04:23,196 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:23,197 EPOCH 3 done: loss 0.2705 - lr 0.0200000\n",
-      "2021-09-08 02:04:23,568 DEV : loss 0.09155600517988205 - score 0.8889\n",
-      "2021-09-08 02:04:23,569 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:03:30,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:31,765 epoch 4 - iter 8/81 - loss 0.11771647 - samples/sec: 10.62 - lr: 0.020000\n",
+      "2021-09-21 22:03:32,466 epoch 4 - iter 16/81 - loss 0.23629874 - samples/sec: 11.43 - lr: 0.020000\n",
+      "2021-09-21 22:03:33,025 epoch 4 - iter 24/81 - loss 0.20455058 - samples/sec: 14.31 - lr: 0.020000\n",
+      "2021-09-21 22:03:33,557 epoch 4 - iter 32/81 - loss 0.18740566 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 22:03:34,127 epoch 4 - iter 40/81 - loss 0.26383363 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 22:03:34,705 epoch 4 - iter 48/81 - loss 0.23201412 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 22:03:35,311 epoch 4 - iter 56/81 - loss 0.21719987 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 22:03:35,850 epoch 4 - iter 64/81 - loss 0.22779981 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 22:03:36,440 epoch 4 - iter 72/81 - loss 0.20315432 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 22:03:37,170 epoch 4 - iter 80/81 - loss 0.18539770 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 22:03:37,235 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:37,235 EPOCH 4 done: loss 0.1879 - lr 0.0200000\n",
+      "2021-09-21 22:03:37,771 DEV : loss 0.14430364966392517 - score 0.7778\n",
+      "2021-09-21 22:03:37,772 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:03:37,773 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:38,257 epoch 5 - iter 8/81 - loss 0.02117331 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 22:03:39,003 epoch 5 - iter 16/81 - loss 0.11725984 - samples/sec: 10.73 - lr: 0.020000\n",
+      "2021-09-21 22:03:39,720 epoch 5 - iter 24/81 - loss 0.10510190 - samples/sec: 11.16 - lr: 0.020000\n",
+      "2021-09-21 22:03:40,260 epoch 5 - iter 32/81 - loss 0.15873025 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 22:03:40,798 epoch 5 - iter 40/81 - loss 0.12733270 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 22:03:41,328 epoch 5 - iter 48/81 - loss 0.14235894 - samples/sec: 15.14 - lr: 0.020000\n",
+      "2021-09-21 22:03:41,829 epoch 5 - iter 56/81 - loss 0.12229289 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 22:03:42,321 epoch 5 - iter 64/81 - loss 0.10709801 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 22:03:42,720 epoch 5 - iter 72/81 - loss 0.11215848 - samples/sec: 20.08 - lr: 0.020000\n",
+      "2021-09-21 22:03:43,152 epoch 5 - iter 80/81 - loss 0.13647337 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 22:03:43,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:43,206 EPOCH 5 done: loss 0.1348 - lr 0.0200000\n",
+      "2021-09-21 22:03:43,657 DEV : loss 0.16330406069755554 - score 0.8889\n",
+      "2021-09-21 22:03:43,658 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:04:27,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:28,363 epoch 4 - iter 8/81 - loss 0.32938421 - samples/sec: 13.09 - lr: 0.020000\n",
-      "2021-09-08 02:04:28,750 epoch 4 - iter 16/81 - loss 0.16821616 - samples/sec: 20.67 - lr: 0.020000\n",
-      "2021-09-08 02:04:29,209 epoch 4 - iter 24/81 - loss 0.14973950 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 02:04:29,631 epoch 4 - iter 32/81 - loss 0.12104562 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 02:04:30,043 epoch 4 - iter 40/81 - loss 0.10657436 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 02:04:30,507 epoch 4 - iter 48/81 - loss 0.09297174 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 02:04:30,984 epoch 4 - iter 56/81 - loss 0.14871031 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 02:04:31,456 epoch 4 - iter 64/81 - loss 0.13144017 - samples/sec: 16.95 - lr: 0.020000\n",
-      "2021-09-08 02:04:31,886 epoch 4 - iter 72/81 - loss 0.11781058 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 02:04:32,341 epoch 4 - iter 80/81 - loss 0.11699560 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 02:04:32,389 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:32,389 EPOCH 4 done: loss 0.1156 - lr 0.0200000\n",
-      "2021-09-08 02:04:32,829 DEV : loss 0.12430480122566223 - score 0.8889\n",
-      "2021-09-08 02:04:32,830 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:04:32,832 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:33,234 epoch 5 - iter 8/81 - loss 0.00209209 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 02:04:33,764 epoch 5 - iter 16/81 - loss 0.00710218 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 02:04:34,253 epoch 5 - iter 24/81 - loss 0.03695830 - samples/sec: 16.37 - lr: 0.020000\n",
-      "2021-09-08 02:04:34,716 epoch 5 - iter 32/81 - loss 0.02841364 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 02:04:35,126 epoch 5 - iter 40/81 - loss 0.04970650 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 02:04:35,609 epoch 5 - iter 48/81 - loss 0.12881711 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 02:04:36,031 epoch 5 - iter 56/81 - loss 0.14684353 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 02:04:36,456 epoch 5 - iter 64/81 - loss 0.13829304 - samples/sec: 18.82 - lr: 0.020000\n",
-      "2021-09-08 02:04:36,894 epoch 5 - iter 72/81 - loss 0.12472278 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 02:04:37,357 epoch 5 - iter 80/81 - loss 0.11295948 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 02:04:37,465 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:37,466 EPOCH 5 done: loss 0.1124 - lr 0.0200000\n",
-      "2021-09-08 02:04:37,719 DEV : loss 0.14592920243740082 - score 0.8889\n",
-      "2021-09-08 02:04:37,720 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:04:37,722 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:38,225 epoch 6 - iter 8/81 - loss 0.00126714 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 02:04:38,628 epoch 6 - iter 16/81 - loss 0.01095503 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 02:04:39,115 epoch 6 - iter 24/81 - loss 0.09133249 - samples/sec: 16.45 - lr: 0.020000\n",
-      "2021-09-08 02:04:39,578 epoch 6 - iter 32/81 - loss 0.10418138 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 02:04:40,018 epoch 6 - iter 40/81 - loss 0.12720202 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 02:04:40,480 epoch 6 - iter 48/81 - loss 0.11414722 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 02:04:40,954 epoch 6 - iter 56/81 - loss 0.09817344 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 02:04:41,386 epoch 6 - iter 64/81 - loss 0.08725877 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 02:04:41,850 epoch 6 - iter 72/81 - loss 0.07864909 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 02:04:42,334 epoch 6 - iter 80/81 - loss 0.08930919 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 02:04:42,384 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:03:48,671 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:49,358 epoch 6 - iter 8/81 - loss 0.00356509 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 22:03:49,987 epoch 6 - iter 16/81 - loss 0.02681523 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 22:03:50,622 epoch 6 - iter 24/81 - loss 0.04164727 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 22:03:51,289 epoch 6 - iter 32/81 - loss 0.03441860 - samples/sec: 12.00 - lr: 0.020000\n",
+      "2021-09-21 22:03:51,882 epoch 6 - iter 40/81 - loss 0.02834033 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 22:03:52,346 epoch 6 - iter 48/81 - loss 0.02377525 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 22:03:53,006 epoch 6 - iter 56/81 - loss 0.05100644 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 22:03:53,507 epoch 6 - iter 64/81 - loss 0.04473676 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 22:03:54,180 epoch 6 - iter 72/81 - loss 0.04089216 - samples/sec: 11.91 - lr: 0.020000\n",
+      "2021-09-21 22:03:54,682 epoch 6 - iter 80/81 - loss 0.06532302 - samples/sec: 15.95 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:04:42,385 EPOCH 6 done: loss 0.0882 - lr 0.0200000\n",
-      "2021-09-08 02:04:42,721 DEV : loss 0.12575341761112213 - score 0.8889\n",
-      "2021-09-08 02:04:42,722 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:04:42,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:43,303 epoch 7 - iter 8/81 - loss 0.06420518 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 02:04:43,712 epoch 7 - iter 16/81 - loss 0.19503095 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 02:04:44,204 epoch 7 - iter 24/81 - loss 0.28048330 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 02:04:44,654 epoch 7 - iter 32/81 - loss 0.21058359 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 02:04:45,159 epoch 7 - iter 40/81 - loss 0.16869061 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 02:04:45,648 epoch 7 - iter 48/81 - loss 0.14254387 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 02:04:46,074 epoch 7 - iter 56/81 - loss 0.12299006 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 02:04:46,492 epoch 7 - iter 64/81 - loss 0.10775320 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 02:04:46,968 epoch 7 - iter 72/81 - loss 0.09581531 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 02:04:47,368 epoch 7 - iter 80/81 - loss 0.08628120 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 02:04:47,415 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:47,416 EPOCH 7 done: loss 0.0852 - lr 0.0200000\n",
-      "2021-09-08 02:04:47,794 DEV : loss 0.14614124596118927 - score 0.7778\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:04:47,795 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:04:47,885 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:48,351 epoch 8 - iter 8/81 - loss 0.17890085 - samples/sec: 17.76 - lr: 0.010000\n",
-      "2021-09-08 02:04:48,849 epoch 8 - iter 16/81 - loss 0.12429960 - samples/sec: 16.10 - lr: 0.010000\n",
-      "2021-09-08 02:04:49,256 epoch 8 - iter 24/81 - loss 0.08297602 - samples/sec: 19.70 - lr: 0.010000\n",
-      "2021-09-08 02:04:49,739 epoch 8 - iter 32/81 - loss 0.06283025 - samples/sec: 16.56 - lr: 0.010000\n",
-      "2021-09-08 02:04:50,230 epoch 8 - iter 40/81 - loss 0.05102315 - samples/sec: 16.34 - lr: 0.010000\n",
-      "2021-09-08 02:04:50,681 epoch 8 - iter 48/81 - loss 0.04276248 - samples/sec: 17.77 - lr: 0.010000\n",
-      "2021-09-08 02:04:51,114 epoch 8 - iter 56/81 - loss 0.03671228 - samples/sec: 18.48 - lr: 0.010000\n",
-      "2021-09-08 02:04:51,505 epoch 8 - iter 64/81 - loss 0.03217058 - samples/sec: 20.53 - lr: 0.010000\n",
-      "2021-09-08 02:04:51,937 epoch 8 - iter 72/81 - loss 0.02865312 - samples/sec: 18.54 - lr: 0.010000\n",
-      "2021-09-08 02:04:52,432 epoch 8 - iter 80/81 - loss 0.02583396 - samples/sec: 16.17 - lr: 0.010000\n",
-      "2021-09-08 02:04:52,503 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:52,504 EPOCH 8 done: loss 0.0255 - lr 0.0100000\n",
-      "2021-09-08 02:04:52,758 DEV : loss 0.12165474891662598 - score 0.7778\n",
-      "2021-09-08 02:04:52,759 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:04:52,780 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:53,222 epoch 9 - iter 8/81 - loss 0.00032096 - samples/sec: 18.71 - lr: 0.010000\n",
-      "2021-09-08 02:04:53,647 epoch 9 - iter 16/81 - loss 0.00029316 - samples/sec: 18.86 - lr: 0.010000\n",
-      "2021-09-08 02:04:54,063 epoch 9 - iter 24/81 - loss 0.00040432 - samples/sec: 19.25 - lr: 0.010000\n",
-      "2021-09-08 02:04:54,495 epoch 9 - iter 32/81 - loss 0.00117621 - samples/sec: 18.56 - lr: 0.010000\n",
-      "2021-09-08 02:04:54,921 epoch 9 - iter 40/81 - loss 0.00102981 - samples/sec: 18.84 - lr: 0.010000\n",
-      "2021-09-08 02:04:55,627 epoch 9 - iter 48/81 - loss 0.00091541 - samples/sec: 11.34 - lr: 0.010000\n",
-      "2021-09-08 02:04:56,031 epoch 9 - iter 56/81 - loss 0.00084814 - samples/sec: 19.83 - lr: 0.010000\n",
-      "2021-09-08 02:04:56,491 epoch 9 - iter 64/81 - loss 0.00076908 - samples/sec: 17.40 - lr: 0.010000\n",
-      "2021-09-08 02:04:56,995 epoch 9 - iter 72/81 - loss 0.00073236 - samples/sec: 15.92 - lr: 0.010000\n",
-      "2021-09-08 02:04:57,502 epoch 9 - iter 80/81 - loss 0.01278528 - samples/sec: 15.81 - lr: 0.010000\n",
-      "2021-09-08 02:04:57,550 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:57,551 EPOCH 9 done: loss 0.0127 - lr 0.0100000\n",
-      "2021-09-08 02:04:57,803 DEV : loss 0.1346687376499176 - score 0.7778\n",
-      "2021-09-08 02:04:57,804 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:04:57,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:58,271 epoch 10 - iter 8/81 - loss 0.00095306 - samples/sec: 17.80 - lr: 0.010000\n",
-      "2021-09-08 02:04:58,735 epoch 10 - iter 16/81 - loss 0.00328692 - samples/sec: 17.27 - lr: 0.010000\n",
-      "2021-09-08 02:04:59,238 epoch 10 - iter 24/81 - loss 0.00440517 - samples/sec: 15.91 - lr: 0.010000\n",
-      "2021-09-08 02:04:59,645 epoch 10 - iter 32/81 - loss 0.00368731 - samples/sec: 19.70 - lr: 0.010000\n",
-      "2021-09-08 02:05:00,050 epoch 10 - iter 40/81 - loss 0.00299969 - samples/sec: 19.80 - lr: 0.010000\n",
-      "2021-09-08 02:05:00,486 epoch 10 - iter 48/81 - loss 0.00253519 - samples/sec: 18.35 - lr: 0.010000\n",
-      "2021-09-08 02:05:00,869 epoch 10 - iter 56/81 - loss 0.00223744 - samples/sec: 20.91 - lr: 0.010000\n",
-      "2021-09-08 02:05:01,298 epoch 10 - iter 64/81 - loss 0.00199144 - samples/sec: 18.69 - lr: 0.010000\n",
-      "2021-09-08 02:05:01,828 epoch 10 - iter 72/81 - loss 0.00222832 - samples/sec: 15.12 - lr: 0.010000\n",
-      "2021-09-08 02:05:02,371 epoch 10 - iter 80/81 - loss 0.00202993 - samples/sec: 14.75 - lr: 0.010000\n",
-      "2021-09-08 02:05:02,436 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:02,437 EPOCH 10 done: loss 0.0020 - lr 0.0100000\n",
-      "2021-09-08 02:05:02,698 DEV : loss 0.18143582344055176 - score 0.7778\n",
-      "2021-09-08 02:05:02,699 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:05:07,181 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:07,181 Testing using best model ...\n",
-      "2021-09-08 02:05:07,183 loading file None/best-model.pt\n",
+      "2021-09-21 22:03:54,741 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:54,741 EPOCH 6 done: loss 0.0648 - lr 0.0200000\n",
+      "2021-09-21 22:03:55,299 DEV : loss 0.1410445123910904 - score 0.8889\n",
+      "2021-09-21 22:03:55,301 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:03:59,309 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:59,803 epoch 7 - iter 8/81 - loss 0.12038937 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 22:04:00,196 epoch 7 - iter 16/81 - loss 0.06040946 - samples/sec: 20.38 - lr: 0.020000\n",
+      "2021-09-21 22:04:00,656 epoch 7 - iter 24/81 - loss 0.04053271 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 22:04:01,110 epoch 7 - iter 32/81 - loss 0.04944702 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 22:04:01,580 epoch 7 - iter 40/81 - loss 0.03980811 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 22:04:02,083 epoch 7 - iter 48/81 - loss 0.03328804 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 22:04:02,602 epoch 7 - iter 56/81 - loss 0.02997082 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 22:04:02,980 epoch 7 - iter 64/81 - loss 0.06265554 - samples/sec: 21.21 - lr: 0.020000\n",
+      "2021-09-21 22:04:03,355 epoch 7 - iter 72/81 - loss 0.05573212 - samples/sec: 21.38 - lr: 0.020000\n",
+      "2021-09-21 22:04:03,843 epoch 7 - iter 80/81 - loss 0.06087136 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 22:04:03,903 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:03,904 EPOCH 7 done: loss 0.0601 - lr 0.0200000\n",
+      "2021-09-21 22:04:04,254 DEV : loss 0.10025984048843384 - score 0.8889\n",
+      "2021-09-21 22:04:04,255 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:04:07,880 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:08,526 epoch 8 - iter 8/81 - loss 0.00340342 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 22:04:09,028 epoch 8 - iter 16/81 - loss 0.07406582 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 22:04:09,494 epoch 8 - iter 24/81 - loss 0.04952440 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 22:04:10,294 epoch 8 - iter 32/81 - loss 0.04335519 - samples/sec: 10.00 - lr: 0.020000\n",
+      "2021-09-21 22:04:10,807 epoch 8 - iter 40/81 - loss 0.12073401 - samples/sec: 15.62 - lr: 0.020000\n",
+      "2021-09-21 22:04:11,304 epoch 8 - iter 48/81 - loss 0.10105490 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 22:04:11,804 epoch 8 - iter 56/81 - loss 0.08749611 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 22:04:12,424 epoch 8 - iter 64/81 - loss 0.08217609 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 22:04:12,903 epoch 8 - iter 72/81 - loss 0.07307549 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 22:04:13,542 epoch 8 - iter 80/81 - loss 0.06585503 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 22:04:13,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:13,597 EPOCH 8 done: loss 0.0650 - lr 0.0200000\n",
+      "2021-09-21 22:04:14,052 DEV : loss 0.18752248585224152 - score 0.8889\n",
+      "2021-09-21 22:04:14,053 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:04:14,055 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:14,738 epoch 9 - iter 8/81 - loss 0.00073307 - samples/sec: 12.27 - lr: 0.020000\n",
+      "2021-09-21 22:04:15,275 epoch 9 - iter 16/81 - loss 0.00049257 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 22:04:15,799 epoch 9 - iter 24/81 - loss 0.00072279 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 22:04:16,330 epoch 9 - iter 32/81 - loss 0.00068634 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 22:04:16,892 epoch 9 - iter 40/81 - loss 0.00068483 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 22:04:17,391 epoch 9 - iter 48/81 - loss 0.00059946 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 22:04:17,978 epoch 9 - iter 56/81 - loss 0.00054359 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 22:04:18,468 epoch 9 - iter 64/81 - loss 0.00051952 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 22:04:18,949 epoch 9 - iter 72/81 - loss 0.00048300 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 22:04:19,596 epoch 9 - iter 80/81 - loss 0.00045455 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 22:04:19,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:19,655 EPOCH 9 done: loss 0.0005 - lr 0.0200000\n",
+      "2021-09-21 22:04:20,110 DEV : loss 0.18552063405513763 - score 0.8889\n",
+      "2021-09-21 22:04:20,112 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:04:20,115 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:20,626 epoch 10 - iter 8/81 - loss 0.00024330 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 22:04:21,350 epoch 10 - iter 16/81 - loss 0.08176782 - samples/sec: 11.06 - lr: 0.020000\n",
+      "2021-09-21 22:04:21,838 epoch 10 - iter 24/81 - loss 0.08009884 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 22:04:22,307 epoch 10 - iter 32/81 - loss 0.06011669 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 22:04:22,824 epoch 10 - iter 40/81 - loss 0.04821363 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 22:04:23,405 epoch 10 - iter 48/81 - loss 0.04023427 - samples/sec: 13.79 - lr: 0.020000\n",
+      "2021-09-21 22:04:23,948 epoch 10 - iter 56/81 - loss 0.03457302 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 22:04:24,422 epoch 10 - iter 64/81 - loss 0.03027182 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 22:04:25,005 epoch 10 - iter 72/81 - loss 0.02701661 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 22:04:25,541 epoch 10 - iter 80/81 - loss 0.02433864 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 22:04:25,655 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:25,655 EPOCH 10 done: loss 0.0240 - lr 0.0200000\n",
+      "2021-09-21 22:04:26,124 DEV : loss 0.12272124737501144 - score 0.8889\n",
+      "2021-09-21 22:04:26,125 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:04:30,387 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:30,388 Testing using best model ...\n",
+      "2021-09-21 22:04:30,389 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:05:12,677 \t0.6\n",
-      "2021-09-08 02:05:12,678 \n",
+      "2021-09-21 22:04:35,825 \t0.7\n",
+      "2021-09-21 22:04:35,825 \n",
       "Results:\n",
-      "- F-score (micro) 0.6\n",
-      "- F-score (macro) 0.33\n",
-      "- Accuracy 0.6\n",
+      "- F-score (micro) 0.7\n",
+      "- F-score (macro) 0.4667\n",
+      "- Accuracy 0.7\n",
       "\n",
       "By class:\n",
       "                                           precision    recall  f1-score   support\n",
       "\n",
-      "This text is about Family & Relationships     1.0000    1.0000    1.0000         2\n",
-      "                This text is about Health     1.0000    1.0000    1.0000         1\n",
-      " This text is about Science & Mathematics     0.6667    1.0000    0.8000         2\n",
-      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      "This text is about Family & Relationships     1.0000    1.0000    1.0000         1\n",
+      "                This text is about Health     1.0000    0.5000    0.6667         2\n",
+      " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      " This text is about Entertainment & Music     1.0000    1.0000    1.0000         1\n",
       "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
       " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
-      "                This text is about Sports     0.0000    0.0000    0.0000         0\n",
-      "     This text is about Society & Culture     0.3333    1.0000    0.5000         1\n",
-      "    This text is about Business & Finance     0.0000    0.0000    0.0000         4\n",
-      " This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Sports     1.0000    1.0000    1.0000         2\n",
+      "     This text is about Society & Culture     0.0000    0.0000    0.0000         1\n",
+      "    This text is about Business & Finance     1.0000    1.0000    1.0000         2\n",
+      " This text is about Politics & Government     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                micro avg     0.6000    0.6000    0.6000        10\n",
-      "                                macro avg     0.3000    0.4000    0.3300        10\n",
-      "                             weighted avg     0.4667    0.6000    0.5100        10\n",
-      "                              samples avg     0.6000    0.6000    0.6000        10\n",
+      "                                micro avg     0.7000    0.7000    0.7000        10\n",
+      "                                macro avg     0.5000    0.4500    0.4667        10\n",
+      "                             weighted avg     0.8000    0.7000    0.7333        10\n",
+      "                              samples avg     0.7000    0.7000    0.7000        10\n",
       "\n",
-      "2021-09-08 02:05:12,678 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:29,367 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 22:04:35,826 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:57,086 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:05:33,357 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:05:01,252 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 15255.67it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 14526.97it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:05:33,365 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
-      "2021-09-08 02:05:33,376 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:33,377 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:05:01,260 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
+      "2021-09-21 22:05:01,400 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:01,402 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4376,25 +4393,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:05:33,378 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:33,378 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 02:05:33,379 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:33,379 Parameters:\n",
-      "2021-09-08 02:05:33,379  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:05:33,379  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:05:33,380  - patience: \"3\"\n",
-      "2021-09-08 02:05:33,380  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:05:33,380  - max_epochs: \"10\"\n",
-      "2021-09-08 02:05:33,381  - shuffle: \"True\"\n",
-      "2021-09-08 02:05:33,381  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:05:33,381  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:05:33,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:33,382 Model training base path: \"None\"\n",
-      "2021-09-08 02:05:33,382 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:33,382 Device: cuda:1\n",
-      "2021-09-08 02:05:33,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:33,383 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:05:33,390 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:05:01,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:01,403 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 22:05:01,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:01,403 Parameters:\n",
+      "2021-09-21 22:05:01,404  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:05:01,404  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:05:01,404  - patience: \"3\"\n",
+      "2021-09-21 22:05:01,405  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:05:01,405  - max_epochs: \"10\"\n",
+      "2021-09-21 22:05:01,405  - shuffle: \"True\"\n",
+      "2021-09-21 22:05:01,405  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:05:01,406  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:05:01,406 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:01,406 Model training base path: \"None\"\n",
+      "2021-09-21 22:05:01,407 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:01,407 Device: cuda:0\n",
+      "2021-09-21 22:05:01,407 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:01,407 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -4408,227 +4424,230 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:05:33,876 epoch 1 - iter 8/81 - loss 0.44259126 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 02:05:34,677 epoch 1 - iter 16/81 - loss 0.47509686 - samples/sec: 10.00 - lr: 0.020000\n",
-      "2021-09-08 02:05:35,236 epoch 1 - iter 24/81 - loss 0.45981816 - samples/sec: 14.34 - lr: 0.020000\n",
-      "2021-09-08 02:05:35,676 epoch 1 - iter 32/81 - loss 0.47066639 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 02:05:36,158 epoch 1 - iter 40/81 - loss 0.39911385 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 02:05:36,649 epoch 1 - iter 48/81 - loss 0.47942936 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 02:05:37,130 epoch 1 - iter 56/81 - loss 0.47374831 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 02:05:37,783 epoch 1 - iter 64/81 - loss 0.48834091 - samples/sec: 12.27 - lr: 0.020000\n",
-      "2021-09-08 02:05:38,266 epoch 1 - iter 72/81 - loss 0.45396788 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 02:05:38,837 epoch 1 - iter 80/81 - loss 0.49025726 - samples/sec: 14.03 - lr: 0.020000\n",
-      "2021-09-08 02:05:38,891 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:38,892 EPOCH 1 done: loss 0.4845 - lr 0.0200000\n",
-      "2021-09-08 02:05:39,248 DEV : loss 0.672208845615387 - score 0.2222\n",
-      "2021-09-08 02:05:39,249 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:05:01,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:02,066 epoch 1 - iter 8/81 - loss 0.57385669 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 22:05:02,519 epoch 1 - iter 16/81 - loss 0.57245552 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 22:05:02,929 epoch 1 - iter 24/81 - loss 0.50662246 - samples/sec: 19.54 - lr: 0.020000\n",
+      "2021-09-21 22:05:03,400 epoch 1 - iter 32/81 - loss 0.44559004 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 22:05:04,094 epoch 1 - iter 40/81 - loss 0.41689777 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 22:05:04,747 epoch 1 - iter 48/81 - loss 0.47196713 - samples/sec: 12.26 - lr: 0.020000\n",
+      "2021-09-21 22:05:05,331 epoch 1 - iter 56/81 - loss 0.44581831 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 22:05:05,889 epoch 1 - iter 64/81 - loss 0.46075006 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 22:05:06,644 epoch 1 - iter 72/81 - loss 0.43728307 - samples/sec: 10.61 - lr: 0.020000\n",
+      "2021-09-21 22:05:07,366 epoch 1 - iter 80/81 - loss 0.45535947 - samples/sec: 11.09 - lr: 0.020000\n",
+      "2021-09-21 22:05:07,528 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:07,529 EPOCH 1 done: loss 0.4505 - lr 0.0200000\n",
+      "2021-09-21 22:05:08,146 DEV : loss 0.4695843756198883 - score 0.2222\n",
+      "2021-09-21 22:05:08,147 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:05:11,671 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:12,222 epoch 2 - iter 8/81 - loss 0.63235290 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 22:05:12,852 epoch 2 - iter 16/81 - loss 0.65096157 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 22:05:13,251 epoch 2 - iter 24/81 - loss 0.44756283 - samples/sec: 20.10 - lr: 0.020000\n",
+      "2021-09-21 22:05:13,650 epoch 2 - iter 32/81 - loss 0.42512000 - samples/sec: 20.05 - lr: 0.020000\n",
+      "2021-09-21 22:05:14,079 epoch 2 - iter 40/81 - loss 0.37640826 - samples/sec: 18.70 - lr: 0.020000\n",
+      "2021-09-21 22:05:14,594 epoch 2 - iter 48/81 - loss 0.36799124 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 22:05:15,043 epoch 2 - iter 56/81 - loss 0.37438498 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 22:05:15,512 epoch 2 - iter 64/81 - loss 0.34163559 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 22:05:16,179 epoch 2 - iter 72/81 - loss 0.31490134 - samples/sec: 12.01 - lr: 0.020000\n",
+      "2021-09-21 22:05:16,727 epoch 2 - iter 80/81 - loss 0.28617922 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 22:05:16,788 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:16,789 EPOCH 2 done: loss 0.2830 - lr 0.0200000\n",
+      "2021-09-21 22:05:17,404 DEV : loss 0.6049531102180481 - score 0.6667\n",
+      "2021-09-21 22:05:17,405 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:05:43,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:43,825 epoch 2 - iter 8/81 - loss 0.42379989 - samples/sec: 13.99 - lr: 0.020000\n",
-      "2021-09-08 02:05:44,246 epoch 2 - iter 16/81 - loss 0.42805979 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 02:05:44,664 epoch 2 - iter 24/81 - loss 0.47926810 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 02:05:45,126 epoch 2 - iter 32/81 - loss 0.47473263 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 02:05:45,521 epoch 2 - iter 40/81 - loss 0.41927057 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 02:05:46,049 epoch 2 - iter 48/81 - loss 0.40052538 - samples/sec: 15.18 - lr: 0.020000\n",
-      "2021-09-08 02:05:46,696 epoch 2 - iter 56/81 - loss 0.40934416 - samples/sec: 12.38 - lr: 0.020000\n",
-      "2021-09-08 02:05:47,114 epoch 2 - iter 64/81 - loss 0.38633578 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 02:05:47,559 epoch 2 - iter 72/81 - loss 0.41538353 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 02:05:48,007 epoch 2 - iter 80/81 - loss 0.39799867 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 02:05:48,054 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:48,055 EPOCH 2 done: loss 0.3958 - lr 0.0200000\n",
-      "2021-09-08 02:05:48,500 DEV : loss 0.5494266152381897 - score 0.4444\n",
-      "2021-09-08 02:05:48,501 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:05:21,383 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:22,065 epoch 3 - iter 8/81 - loss 0.23746216 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 22:05:22,705 epoch 3 - iter 16/81 - loss 0.19029071 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 22:05:23,101 epoch 3 - iter 24/81 - loss 0.13130497 - samples/sec: 20.25 - lr: 0.020000\n",
+      "2021-09-21 22:05:23,510 epoch 3 - iter 32/81 - loss 0.11082739 - samples/sec: 19.59 - lr: 0.020000\n",
+      "2021-09-21 22:05:23,978 epoch 3 - iter 40/81 - loss 0.17198527 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 22:05:24,389 epoch 3 - iter 48/81 - loss 0.24644616 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 22:05:24,875 epoch 3 - iter 56/81 - loss 0.23947059 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 22:05:25,306 epoch 3 - iter 64/81 - loss 0.21455819 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 22:05:25,742 epoch 3 - iter 72/81 - loss 0.23703475 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 22:05:26,209 epoch 3 - iter 80/81 - loss 0.21382814 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 22:05:26,275 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:26,275 EPOCH 3 done: loss 0.2114 - lr 0.0200000\n",
+      "2021-09-21 22:05:26,796 DEV : loss 0.46118202805519104 - score 0.6667\n",
+      "2021-09-21 22:05:26,798 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:05:53,952 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:54,605 epoch 3 - iter 8/81 - loss 0.00941894 - samples/sec: 12.56 - lr: 0.020000\n",
-      "2021-09-08 02:05:55,006 epoch 3 - iter 16/81 - loss 0.07674767 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 02:05:55,515 epoch 3 - iter 24/81 - loss 0.07897458 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 02:05:55,933 epoch 3 - iter 32/81 - loss 0.22493101 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 02:05:56,394 epoch 3 - iter 40/81 - loss 0.19723635 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 02:05:56,849 epoch 3 - iter 48/81 - loss 0.24819168 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 02:05:57,342 epoch 3 - iter 56/81 - loss 0.21547744 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 02:05:57,766 epoch 3 - iter 64/81 - loss 0.20205884 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 02:05:58,204 epoch 3 - iter 72/81 - loss 0.23041147 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 02:05:58,665 epoch 3 - iter 80/81 - loss 0.31347007 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 02:05:58,717 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:58,718 EPOCH 3 done: loss 0.3096 - lr 0.0200000\n",
-      "2021-09-08 02:05:59,263 DEV : loss 0.3530953824520111 - score 0.5556\n",
-      "2021-09-08 02:05:59,264 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:05:30,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:31,283 epoch 4 - iter 8/81 - loss 0.10314360 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 22:05:31,804 epoch 4 - iter 16/81 - loss 0.21455739 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 22:05:32,452 epoch 4 - iter 24/81 - loss 0.14652655 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 22:05:33,046 epoch 4 - iter 32/81 - loss 0.14841913 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 22:05:33,720 epoch 4 - iter 40/81 - loss 0.11928958 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 22:05:34,255 epoch 4 - iter 48/81 - loss 0.14656815 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 22:05:34,984 epoch 4 - iter 56/81 - loss 0.13927473 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 22:05:35,570 epoch 4 - iter 64/81 - loss 0.14990054 - samples/sec: 13.66 - lr: 0.020000\n",
+      "2021-09-21 22:05:36,201 epoch 4 - iter 72/81 - loss 0.15330882 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 22:05:36,851 epoch 4 - iter 80/81 - loss 0.15004021 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 22:05:36,961 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:36,962 EPOCH 4 done: loss 0.1482 - lr 0.0200000\n",
+      "2021-09-21 22:05:37,719 DEV : loss 0.280114084482193 - score 0.7778\n",
+      "2021-09-21 22:05:37,720 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:06:03,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:03,678 epoch 4 - iter 8/81 - loss 0.25100988 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 02:06:04,089 epoch 4 - iter 16/81 - loss 0.13134137 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 02:06:04,477 epoch 4 - iter 24/81 - loss 0.13101310 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 02:06:04,909 epoch 4 - iter 32/81 - loss 0.24269579 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 02:06:05,315 epoch 4 - iter 40/81 - loss 0.20813554 - samples/sec: 19.75 - lr: 0.020000\n",
-      "2021-09-08 02:06:05,902 epoch 4 - iter 48/81 - loss 0.23285331 - samples/sec: 13.66 - lr: 0.020000\n",
-      "2021-09-08 02:06:06,403 epoch 4 - iter 56/81 - loss 0.21725983 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 02:06:06,803 epoch 4 - iter 64/81 - loss 0.28289152 - samples/sec: 20.06 - lr: 0.020000\n",
-      "2021-09-08 02:06:07,355 epoch 4 - iter 72/81 - loss 0.28764252 - samples/sec: 14.51 - lr: 0.020000\n",
-      "2021-09-08 02:06:07,854 epoch 4 - iter 80/81 - loss 0.28682115 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 02:06:07,899 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:07,899 EPOCH 4 done: loss 0.2834 - lr 0.0200000\n",
-      "2021-09-08 02:06:08,246 DEV : loss 0.6445792317390442 - score 0.4444\n",
-      "2021-09-08 02:06:08,247 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:06:08,250 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:08,750 epoch 5 - iter 8/81 - loss 0.26273611 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 02:06:09,212 epoch 5 - iter 16/81 - loss 0.35981301 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 02:06:09,820 epoch 5 - iter 24/81 - loss 0.26222497 - samples/sec: 13.18 - lr: 0.020000\n",
-      "2021-09-08 02:06:10,256 epoch 5 - iter 32/81 - loss 0.19907549 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 02:06:10,718 epoch 5 - iter 40/81 - loss 0.24614811 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 02:06:11,248 epoch 5 - iter 48/81 - loss 0.25145547 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 02:06:11,758 epoch 5 - iter 56/81 - loss 0.22227873 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 02:06:12,136 epoch 5 - iter 64/81 - loss 0.19477605 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 02:06:12,532 epoch 5 - iter 72/81 - loss 0.18552743 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 02:06:12,913 epoch 5 - iter 80/81 - loss 0.16714130 - samples/sec: 21.03 - lr: 0.020000\n",
-      "2021-09-08 02:06:12,957 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:12,958 EPOCH 5 done: loss 0.1651 - lr 0.0200000\n",
-      "2021-09-08 02:06:13,485 DEV : loss 0.4665123522281647 - score 0.4444\n",
-      "2021-09-08 02:06:13,486 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:06:13,884 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:14,422 epoch 6 - iter 8/81 - loss 0.00182282 - samples/sec: 15.33 - lr: 0.020000\n",
-      "2021-09-08 02:06:14,968 epoch 6 - iter 16/81 - loss 0.03387465 - samples/sec: 14.66 - lr: 0.020000\n",
-      "2021-09-08 02:06:15,460 epoch 6 - iter 24/81 - loss 0.02306231 - samples/sec: 16.29 - lr: 0.020000\n",
-      "2021-09-08 02:06:15,875 epoch 6 - iter 32/81 - loss 0.02243506 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 02:06:16,289 epoch 6 - iter 40/81 - loss 0.06389710 - samples/sec: 19.35 - lr: 0.020000\n",
-      "2021-09-08 02:06:16,667 epoch 6 - iter 48/81 - loss 0.05341677 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 02:06:17,082 epoch 6 - iter 56/81 - loss 0.04961054 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 02:06:17,553 epoch 6 - iter 64/81 - loss 0.04360812 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 02:06:18,033 epoch 6 - iter 72/81 - loss 0.03915012 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 02:06:18,429 epoch 6 - iter 80/81 - loss 0.05108368 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 02:06:18,541 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:05:41,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:42,427 epoch 5 - iter 8/81 - loss 0.00430949 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 22:05:43,191 epoch 5 - iter 16/81 - loss 0.11790204 - samples/sec: 10.48 - lr: 0.020000\n",
+      "2021-09-21 22:05:43,845 epoch 5 - iter 24/81 - loss 0.14812761 - samples/sec: 12.26 - lr: 0.020000\n",
+      "2021-09-21 22:05:44,431 epoch 5 - iter 32/81 - loss 0.11200072 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 22:05:44,948 epoch 5 - iter 40/81 - loss 0.12186129 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 22:05:45,554 epoch 5 - iter 48/81 - loss 0.11319434 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 22:05:46,134 epoch 5 - iter 56/81 - loss 0.11897467 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 22:05:46,730 epoch 5 - iter 64/81 - loss 0.12296446 - samples/sec: 13.43 - lr: 0.020000\n",
+      "2021-09-21 22:05:47,293 epoch 5 - iter 72/81 - loss 0.10965022 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 22:05:47,981 epoch 5 - iter 80/81 - loss 0.09877320 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 22:05:48,032 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:48,032 EPOCH 5 done: loss 0.0976 - lr 0.0200000\n",
+      "2021-09-21 22:05:48,660 DEV : loss 0.3235897123813629 - score 0.7778\n",
+      "2021-09-21 22:05:48,662 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:05:48,664 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:49,455 epoch 6 - iter 8/81 - loss 0.00385271 - samples/sec: 10.42 - lr: 0.020000\n",
+      "2021-09-21 22:05:49,999 epoch 6 - iter 16/81 - loss 0.00240530 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 22:05:50,611 epoch 6 - iter 24/81 - loss 0.00395630 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 22:05:51,356 epoch 6 - iter 32/81 - loss 0.02686329 - samples/sec: 10.76 - lr: 0.020000\n",
+      "2021-09-21 22:05:51,969 epoch 6 - iter 40/81 - loss 0.02282461 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 22:05:52,432 epoch 6 - iter 48/81 - loss 0.02241216 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 22:05:53,049 epoch 6 - iter 56/81 - loss 0.03919901 - samples/sec: 12.97 - lr: 0.020000\n",
+      "2021-09-21 22:05:53,592 epoch 6 - iter 64/81 - loss 0.03513681 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 22:05:54,178 epoch 6 - iter 72/81 - loss 0.03429146 - samples/sec: 13.67 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:06:18,542 EPOCH 6 done: loss 0.0505 - lr 0.0200000\n",
-      "2021-09-08 02:06:18,883 DEV : loss 0.6729372143745422 - score 0.4444\n",
-      "2021-09-08 02:06:18,884 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:06:18,886 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:19,312 epoch 7 - iter 8/81 - loss 0.00046379 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 02:06:19,789 epoch 7 - iter 16/81 - loss 0.00097316 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 02:06:20,456 epoch 7 - iter 24/81 - loss 0.00079242 - samples/sec: 11.99 - lr: 0.020000\n",
-      "2021-09-08 02:06:20,910 epoch 7 - iter 32/81 - loss 0.00153756 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 02:06:21,326 epoch 7 - iter 40/81 - loss 0.03228065 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 02:06:21,714 epoch 7 - iter 48/81 - loss 0.02696627 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 02:06:22,187 epoch 7 - iter 56/81 - loss 0.02318534 - samples/sec: 16.95 - lr: 0.020000\n",
-      "2021-09-08 02:06:22,866 epoch 7 - iter 64/81 - loss 0.02150773 - samples/sec: 11.79 - lr: 0.020000\n",
-      "2021-09-08 02:06:23,321 epoch 7 - iter 72/81 - loss 0.01999324 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 02:06:23,694 epoch 7 - iter 80/81 - loss 0.01818134 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 02:06:23,740 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:23,741 EPOCH 7 done: loss 0.0180 - lr 0.0200000\n",
-      "2021-09-08 02:06:24,162 DEV : loss 0.627832293510437 - score 0.5556\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:06:24,163 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:06:24,944 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:25,382 epoch 8 - iter 8/81 - loss 0.33584251 - samples/sec: 18.94 - lr: 0.010000\n",
-      "2021-09-08 02:06:25,756 epoch 8 - iter 16/81 - loss 0.16828228 - samples/sec: 21.45 - lr: 0.010000\n",
-      "2021-09-08 02:06:26,150 epoch 8 - iter 24/81 - loss 0.11234695 - samples/sec: 20.33 - lr: 0.010000\n",
-      "2021-09-08 02:06:26,828 epoch 8 - iter 32/81 - loss 0.09254007 - samples/sec: 11.81 - lr: 0.010000\n",
-      "2021-09-08 02:06:27,239 epoch 8 - iter 40/81 - loss 0.07411537 - samples/sec: 19.50 - lr: 0.010000\n",
-      "2021-09-08 02:06:27,790 epoch 8 - iter 48/81 - loss 0.06182376 - samples/sec: 14.54 - lr: 0.010000\n",
-      "2021-09-08 02:06:28,137 epoch 8 - iter 56/81 - loss 0.05305090 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 02:06:28,693 epoch 8 - iter 64/81 - loss 0.05255667 - samples/sec: 14.41 - lr: 0.010000\n",
-      "2021-09-08 02:06:29,066 epoch 8 - iter 72/81 - loss 0.04675554 - samples/sec: 21.46 - lr: 0.010000\n",
-      "2021-09-08 02:06:29,531 epoch 8 - iter 80/81 - loss 0.04214177 - samples/sec: 17.25 - lr: 0.010000\n",
-      "2021-09-08 02:06:29,583 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:29,584 EPOCH 8 done: loss 0.0416 - lr 0.0100000\n",
-      "2021-09-08 02:06:29,925 DEV : loss 0.7040160298347473 - score 0.4444\n",
-      "2021-09-08 02:06:29,926 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:06:29,928 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:30,366 epoch 9 - iter 8/81 - loss 0.00044978 - samples/sec: 18.91 - lr: 0.010000\n",
-      "2021-09-08 02:06:30,788 epoch 9 - iter 16/81 - loss 0.00055944 - samples/sec: 18.97 - lr: 0.010000\n",
-      "2021-09-08 02:06:31,198 epoch 9 - iter 24/81 - loss 0.00048743 - samples/sec: 19.55 - lr: 0.010000\n",
-      "2021-09-08 02:06:31,686 epoch 9 - iter 32/81 - loss 0.00077772 - samples/sec: 16.43 - lr: 0.010000\n",
-      "2021-09-08 02:06:32,251 epoch 9 - iter 40/81 - loss 0.00072622 - samples/sec: 14.18 - lr: 0.010000\n",
-      "2021-09-08 02:06:32,798 epoch 9 - iter 48/81 - loss 0.00067861 - samples/sec: 14.63 - lr: 0.010000\n",
-      "2021-09-08 02:06:33,327 epoch 9 - iter 56/81 - loss 0.00192131 - samples/sec: 15.14 - lr: 0.010000\n",
-      "2021-09-08 02:06:33,737 epoch 9 - iter 64/81 - loss 0.00174096 - samples/sec: 19.58 - lr: 0.010000\n",
-      "2021-09-08 02:06:34,133 epoch 9 - iter 72/81 - loss 0.00157434 - samples/sec: 20.24 - lr: 0.010000\n",
-      "2021-09-08 02:06:34,522 epoch 9 - iter 80/81 - loss 0.00144868 - samples/sec: 20.56 - lr: 0.010000\n",
-      "2021-09-08 02:06:34,566 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:34,567 EPOCH 9 done: loss 0.0014 - lr 0.0100000\n",
-      "2021-09-08 02:06:35,154 DEV : loss 0.7057857513427734 - score 0.4444\n",
-      "2021-09-08 02:06:35,155 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:06:35,158 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:35,752 epoch 10 - iter 8/81 - loss 0.00021290 - samples/sec: 13.81 - lr: 0.010000\n",
-      "2021-09-08 02:06:36,474 epoch 10 - iter 16/81 - loss 0.00021334 - samples/sec: 11.09 - lr: 0.010000\n",
-      "2021-09-08 02:06:36,897 epoch 10 - iter 24/81 - loss 0.00044541 - samples/sec: 18.94 - lr: 0.010000\n",
-      "2021-09-08 02:06:37,255 epoch 10 - iter 32/81 - loss 0.00052712 - samples/sec: 22.38 - lr: 0.010000\n",
-      "2021-09-08 02:06:37,692 epoch 10 - iter 40/81 - loss 0.03700455 - samples/sec: 18.36 - lr: 0.010000\n",
-      "2021-09-08 02:06:38,056 epoch 10 - iter 48/81 - loss 0.03086904 - samples/sec: 21.99 - lr: 0.010000\n",
-      "2021-09-08 02:06:38,516 epoch 10 - iter 56/81 - loss 0.02650332 - samples/sec: 17.43 - lr: 0.010000\n",
-      "2021-09-08 02:06:38,973 epoch 10 - iter 64/81 - loss 0.02414741 - samples/sec: 17.52 - lr: 0.010000\n",
-      "2021-09-08 02:06:39,442 epoch 10 - iter 72/81 - loss 0.02150231 - samples/sec: 17.06 - lr: 0.010000\n",
-      "2021-09-08 02:06:39,896 epoch 10 - iter 80/81 - loss 0.01937999 - samples/sec: 17.67 - lr: 0.010000\n",
-      "2021-09-08 02:06:39,963 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:39,964 EPOCH 10 done: loss 0.0191 - lr 0.0100000\n",
-      "2021-09-08 02:06:40,309 DEV : loss 0.673952043056488 - score 0.4444\n",
-      "2021-09-08 02:06:40,310 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:06:44,438 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:44,438 Testing using best model ...\n",
-      "2021-09-08 02:06:44,440 loading file None/best-model.pt\n",
+      "2021-09-21 22:05:54,898 epoch 6 - iter 80/81 - loss 0.03089772 - samples/sec: 11.13 - lr: 0.020000\n",
+      "2021-09-21 22:05:54,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:54,957 EPOCH 6 done: loss 0.0305 - lr 0.0200000\n",
+      "2021-09-21 22:05:55,584 DEV : loss 0.38728150725364685 - score 0.7778\n",
+      "2021-09-21 22:05:55,585 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:05:55,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:56,151 epoch 7 - iter 8/81 - loss 0.00040588 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 22:05:56,913 epoch 7 - iter 16/81 - loss 0.00058964 - samples/sec: 10.50 - lr: 0.020000\n",
+      "2021-09-21 22:05:57,508 epoch 7 - iter 24/81 - loss 0.00054269 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 22:05:58,020 epoch 7 - iter 32/81 - loss 0.00135562 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 22:05:58,686 epoch 7 - iter 40/81 - loss 0.05686272 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 22:05:59,232 epoch 7 - iter 48/81 - loss 0.04745163 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 22:05:59,880 epoch 7 - iter 56/81 - loss 0.09731644 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 22:06:00,323 epoch 7 - iter 64/81 - loss 0.08522491 - samples/sec: 18.07 - lr: 0.020000\n",
+      "2021-09-21 22:06:00,979 epoch 7 - iter 72/81 - loss 0.07829986 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 22:06:01,794 epoch 7 - iter 80/81 - loss 0.07053848 - samples/sec: 9.83 - lr: 0.020000\n",
+      "2021-09-21 22:06:01,851 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:01,851 EPOCH 7 done: loss 0.0697 - lr 0.0200000\n",
+      "2021-09-21 22:06:02,475 DEV : loss 0.3303304612636566 - score 0.7778\n",
+      "2021-09-21 22:06:02,477 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:06:02,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:03,001 epoch 8 - iter 8/81 - loss 0.00044455 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 22:06:03,586 epoch 8 - iter 16/81 - loss 0.00055132 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 22:06:04,071 epoch 8 - iter 24/81 - loss 0.00052535 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 22:06:04,506 epoch 8 - iter 32/81 - loss 0.00062786 - samples/sec: 18.43 - lr: 0.020000\n",
+      "2021-09-21 22:06:04,972 epoch 8 - iter 40/81 - loss 0.00057997 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 22:06:05,521 epoch 8 - iter 48/81 - loss 0.00055885 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 22:06:05,978 epoch 8 - iter 56/81 - loss 0.00060391 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 22:06:06,577 epoch 8 - iter 64/81 - loss 0.00058569 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 22:06:06,996 epoch 8 - iter 72/81 - loss 0.00060686 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 22:06:07,371 epoch 8 - iter 80/81 - loss 0.00058154 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 22:06:07,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:07,437 EPOCH 8 done: loss 0.0006 - lr 0.0200000\n",
+      "2021-09-21 22:06:08,057 DEV : loss 0.33986252546310425 - score 0.7778\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:06:08,059 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:06:08,062 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:08,594 epoch 9 - iter 8/81 - loss 0.00038331 - samples/sec: 15.46 - lr: 0.010000\n",
+      "2021-09-21 22:06:08,997 epoch 9 - iter 16/81 - loss 0.00039326 - samples/sec: 19.89 - lr: 0.010000\n",
+      "2021-09-21 22:06:09,409 epoch 9 - iter 24/81 - loss 0.00033038 - samples/sec: 19.47 - lr: 0.010000\n",
+      "2021-09-21 22:06:09,899 epoch 9 - iter 32/81 - loss 0.00039720 - samples/sec: 16.35 - lr: 0.010000\n",
+      "2021-09-21 22:06:10,373 epoch 9 - iter 40/81 - loss 0.00035624 - samples/sec: 16.89 - lr: 0.010000\n",
+      "2021-09-21 22:06:10,800 epoch 9 - iter 48/81 - loss 0.00033741 - samples/sec: 18.77 - lr: 0.010000\n",
+      "2021-09-21 22:06:11,262 epoch 9 - iter 56/81 - loss 0.00040195 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 22:06:11,701 epoch 9 - iter 64/81 - loss 0.00046189 - samples/sec: 18.23 - lr: 0.010000\n",
+      "2021-09-21 22:06:12,108 epoch 9 - iter 72/81 - loss 0.00044034 - samples/sec: 19.67 - lr: 0.010000\n",
+      "2021-09-21 22:06:12,551 epoch 9 - iter 80/81 - loss 0.00048134 - samples/sec: 18.10 - lr: 0.010000\n",
+      "2021-09-21 22:06:12,596 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:12,596 EPOCH 9 done: loss 0.0005 - lr 0.0100000\n",
+      "2021-09-21 22:06:13,050 DEV : loss 0.3488784432411194 - score 0.7778\n",
+      "2021-09-21 22:06:13,052 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:06:13,054 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:13,650 epoch 10 - iter 8/81 - loss 0.00031973 - samples/sec: 13.78 - lr: 0.010000\n",
+      "2021-09-21 22:06:14,195 epoch 10 - iter 16/81 - loss 0.00034791 - samples/sec: 14.69 - lr: 0.010000\n",
+      "2021-09-21 22:06:14,811 epoch 10 - iter 24/81 - loss 0.00030556 - samples/sec: 13.01 - lr: 0.010000\n",
+      "2021-09-21 22:06:15,267 epoch 10 - iter 32/81 - loss 0.00026733 - samples/sec: 17.54 - lr: 0.010000\n",
+      "2021-09-21 22:06:15,853 epoch 10 - iter 40/81 - loss 0.00027656 - samples/sec: 13.67 - lr: 0.010000\n",
+      "2021-09-21 22:06:16,324 epoch 10 - iter 48/81 - loss 0.00026516 - samples/sec: 17.02 - lr: 0.010000\n",
+      "2021-09-21 22:06:16,799 epoch 10 - iter 56/81 - loss 0.00028561 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 22:06:17,385 epoch 10 - iter 64/81 - loss 0.00026915 - samples/sec: 13.67 - lr: 0.010000\n",
+      "2021-09-21 22:06:18,121 epoch 10 - iter 72/81 - loss 0.00026286 - samples/sec: 10.88 - lr: 0.010000\n",
+      "2021-09-21 22:06:18,677 epoch 10 - iter 80/81 - loss 0.00026311 - samples/sec: 14.39 - lr: 0.010000\n",
+      "2021-09-21 22:06:18,731 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:18,731 EPOCH 10 done: loss 0.0003 - lr 0.0100000\n",
+      "2021-09-21 22:06:19,315 DEV : loss 0.3521053194999695 - score 0.7778\n",
+      "2021-09-21 22:06:19,317 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:06:23,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:23,940 Testing using best model ...\n",
+      "2021-09-21 22:06:23,941 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:06:49,334 \t0.0\n",
-      "2021-09-08 02:06:49,334 \n",
+      "2021-09-21 22:06:28,286 \t0.7\n",
+      "2021-09-21 22:06:28,286 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.7\n",
+      "- F-score (macro) 0.5667\n",
+      "- Accuracy 0.7\n",
       "\n",
       "By class:\n",
-      "                                               precision    recall  f1-score   support\n",
+      "                                           precision    recall  f1-score   support\n",
       "\n",
-      "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
-      "                this text is about technology     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about women     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
+      "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Health     0.5000    1.0000    0.6667         1\n",
+      " This text is about Science & Mathematics     0.0000    0.0000    0.0000         1\n",
+      " This text is about Entertainment & Music     1.0000    1.0000    1.0000         1\n",
+      "  This text is about Computers & Internet     1.0000    1.0000    1.0000         1\n",
+      " This text is about Education & Reference     0.5000    1.0000    0.6667         1\n",
+      "                This text is about Sports     1.0000    1.0000    1.0000         1\n",
+      "     This text is about Society & Culture     1.0000    0.5000    0.6667         2\n",
+      "    This text is about Business & Finance     0.0000    0.0000    0.0000         1\n",
+      " This text is about Politics & Government     0.5000    1.0000    0.6667         1\n",
       "\n",
-      "                                    micro avg     0.0000    0.0000    0.0000         0\n",
-      "                                    macro avg     0.0000    0.0000    0.0000         0\n",
-      "                                 weighted avg     0.0000    0.0000    0.0000         0\n",
-      "                                  samples avg     0.0000    0.0000    0.0000         0\n",
+      "                                micro avg     0.7000    0.7000    0.7000        10\n",
+      "                                macro avg     0.5500    0.6500    0.5667        10\n",
+      "                             weighted avg     0.6500    0.7000    0.6333        10\n",
+      "                              samples avg     0.7000    0.7000    0.7000        10\n",
       "\n",
-      "2021-09-08 02:06:49,335 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:06:28,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:48,919 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 22:06:53,162 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 91/91 [00:00<00:00, 15808.55it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:07:05,339 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 02:07:09,217 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:06:53,170 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 15151.51it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:07:09,226 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
-      "2021-09-08 02:07:09,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:09,256 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:06:53,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:53,368 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4941,259 +4960,242 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:07:09,257 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:09,257 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 02:07:09,258 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:09,258 Parameters:\n",
-      "2021-09-08 02:07:09,258  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:07:09,258  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:07:09,259  - patience: \"3\"\n",
-      "2021-09-08 02:07:09,259  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:07:09,259  - max_epochs: \"10\"\n",
-      "2021-09-08 02:07:09,260  - shuffle: \"True\"\n",
-      "2021-09-08 02:07:09,260  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:07:09,260  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:07:09,260 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:09,261 Model training base path: \"None\"\n",
-      "2021-09-08 02:07:09,261 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:09,261 Device: cuda:1\n",
-      "2021-09-08 02:07:09,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:09,262 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:07:09,271 ----------------------------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 02:07:09,801 epoch 1 - iter 8/81 - loss 0.38559937 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 02:07:10,245 epoch 1 - iter 16/81 - loss 0.26781972 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 02:07:10,613 epoch 1 - iter 24/81 - loss 0.34741786 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 02:07:11,157 epoch 1 - iter 32/81 - loss 0.52639557 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 02:07:11,555 epoch 1 - iter 40/81 - loss 0.44630387 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 02:07:11,957 epoch 1 - iter 48/81 - loss 0.44026014 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 02:07:12,365 epoch 1 - iter 56/81 - loss 0.41273499 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 02:07:12,864 epoch 1 - iter 64/81 - loss 0.46303913 - samples/sec: 16.06 - lr: 0.020000\n",
-      "2021-09-08 02:07:13,364 epoch 1 - iter 72/81 - loss 0.43767362 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 02:07:13,956 epoch 1 - iter 80/81 - loss 0.44689028 - samples/sec: 13.53 - lr: 0.020000\n",
-      "2021-09-08 02:07:14,011 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:14,012 EPOCH 1 done: loss 0.4416 - lr 0.0200000\n",
-      "2021-09-08 02:07:14,627 DEV : loss 0.4477969706058502 - score 0.2222\n",
-      "2021-09-08 02:07:14,628 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:06:53,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:53,369 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 22:06:53,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:53,369 Parameters:\n",
+      "2021-09-21 22:06:53,370  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:06:53,370  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:06:53,370  - patience: \"3\"\n",
+      "2021-09-21 22:06:53,370  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:06:53,371  - max_epochs: \"10\"\n",
+      "2021-09-21 22:06:53,371  - shuffle: \"True\"\n",
+      "2021-09-21 22:06:53,371  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:06:53,371  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:06:53,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:53,372 Model training base path: \"None\"\n",
+      "2021-09-21 22:06:53,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:53,373 Device: cuda:0\n",
+      "2021-09-21 22:06:53,373 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:53,373 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:06:53,420 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:54,000 epoch 1 - iter 8/81 - loss 0.50566902 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 22:06:54,600 epoch 1 - iter 16/81 - loss 0.32159297 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 22:06:55,106 epoch 1 - iter 24/81 - loss 0.44294452 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 22:06:55,626 epoch 1 - iter 32/81 - loss 0.49120439 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 22:06:56,225 epoch 1 - iter 40/81 - loss 0.42779696 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 22:06:56,814 epoch 1 - iter 48/81 - loss 0.57662675 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 22:06:57,321 epoch 1 - iter 56/81 - loss 0.54927837 - samples/sec: 15.80 - lr: 0.020000\n",
+      "2021-09-21 22:06:57,973 epoch 1 - iter 64/81 - loss 0.56216402 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 22:06:58,495 epoch 1 - iter 72/81 - loss 0.52261214 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 22:06:59,315 epoch 1 - iter 80/81 - loss 0.55068027 - samples/sec: 9.76 - lr: 0.020000\n",
+      "2021-09-21 22:06:59,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:59,371 EPOCH 1 done: loss 0.5445 - lr 0.0200000\n",
+      "2021-09-21 22:06:59,949 DEV : loss 0.8435221910476685 - score 0.1111\n",
+      "2021-09-21 22:06:59,950 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:07:21,961 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:22,426 epoch 2 - iter 8/81 - loss 0.06171898 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 02:07:22,795 epoch 2 - iter 16/81 - loss 0.18027839 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 02:07:23,317 epoch 2 - iter 24/81 - loss 0.17577991 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 02:07:23,832 epoch 2 - iter 32/81 - loss 0.23696919 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 02:07:24,309 epoch 2 - iter 40/81 - loss 0.32270161 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 02:07:24,802 epoch 2 - iter 48/81 - loss 0.33390833 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 02:07:25,260 epoch 2 - iter 56/81 - loss 0.33471440 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 02:07:25,681 epoch 2 - iter 64/81 - loss 0.31348978 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 02:07:26,147 epoch 2 - iter 72/81 - loss 0.34274642 - samples/sec: 17.19 - lr: 0.020000\n",
-      "2021-09-08 02:07:26,557 epoch 2 - iter 80/81 - loss 0.32304228 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 02:07:26,672 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:26,673 EPOCH 2 done: loss 0.3252 - lr 0.0200000\n",
-      "2021-09-08 02:07:27,159 DEV : loss 0.44190725684165955 - score 0.4444\n",
-      "2021-09-08 02:07:27,160 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:07:04,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:05,374 epoch 2 - iter 8/81 - loss 0.44120414 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 22:07:05,914 epoch 2 - iter 16/81 - loss 0.44709556 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 22:07:06,481 epoch 2 - iter 24/81 - loss 0.44730488 - samples/sec: 14.12 - lr: 0.020000\n",
+      "2021-09-21 22:07:07,172 epoch 2 - iter 32/81 - loss 0.39809908 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 22:07:07,907 epoch 2 - iter 40/81 - loss 0.43072906 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 22:07:08,436 epoch 2 - iter 48/81 - loss 0.46537371 - samples/sec: 15.13 - lr: 0.020000\n",
+      "2021-09-21 22:07:09,098 epoch 2 - iter 56/81 - loss 0.41226609 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 22:07:09,742 epoch 2 - iter 64/81 - loss 0.41939357 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 22:07:10,427 epoch 2 - iter 72/81 - loss 0.40719660 - samples/sec: 11.70 - lr: 0.020000\n",
+      "2021-09-21 22:07:11,057 epoch 2 - iter 80/81 - loss 0.38925759 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 22:07:11,141 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:11,142 EPOCH 2 done: loss 0.4044 - lr 0.0200000\n",
+      "2021-09-21 22:07:11,879 DEV : loss 0.5089170336723328 - score 0.5556\n",
+      "2021-09-21 22:07:11,880 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:07:33,053 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:33,550 epoch 3 - iter 8/81 - loss 0.00567954 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 02:07:34,099 epoch 3 - iter 16/81 - loss 0.04691915 - samples/sec: 14.59 - lr: 0.020000\n",
-      "2021-09-08 02:07:34,568 epoch 3 - iter 24/81 - loss 0.11406264 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 02:07:35,004 epoch 3 - iter 32/81 - loss 0.10344006 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 02:07:35,546 epoch 3 - iter 40/81 - loss 0.09149871 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 02:07:35,983 epoch 3 - iter 48/81 - loss 0.14252801 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:07:36,370 epoch 3 - iter 56/81 - loss 0.13058874 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 02:07:36,789 epoch 3 - iter 64/81 - loss 0.13100820 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 02:07:37,230 epoch 3 - iter 72/81 - loss 0.14241569 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 02:07:37,668 epoch 3 - iter 80/81 - loss 0.15963969 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 02:07:37,712 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:37,713 EPOCH 3 done: loss 0.1577 - lr 0.0200000\n",
-      "2021-09-08 02:07:38,213 DEV : loss 0.6105287671089172 - score 0.4444\n",
-      "2021-09-08 02:07:38,215 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:07:38,313 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:38,724 epoch 4 - iter 8/81 - loss 0.03761112 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 02:07:39,128 epoch 4 - iter 16/81 - loss 0.03378346 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 02:07:39,625 epoch 4 - iter 24/81 - loss 0.18628906 - samples/sec: 16.12 - lr: 0.020000\n",
-      "2021-09-08 02:07:40,035 epoch 4 - iter 32/81 - loss 0.17954374 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 02:07:40,519 epoch 4 - iter 40/81 - loss 0.16305262 - samples/sec: 16.55 - lr: 0.020000\n",
-      "2021-09-08 02:07:41,005 epoch 4 - iter 48/81 - loss 0.16542846 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 02:07:41,420 epoch 4 - iter 56/81 - loss 0.14702715 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 02:07:41,965 epoch 4 - iter 64/81 - loss 0.13388829 - samples/sec: 14.69 - lr: 0.020000\n",
-      "2021-09-08 02:07:42,463 epoch 4 - iter 72/81 - loss 0.11939147 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 02:07:42,885 epoch 4 - iter 80/81 - loss 0.13788298 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 02:07:42,932 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:42,932 EPOCH 4 done: loss 0.1453 - lr 0.0200000\n",
-      "2021-09-08 02:07:43,523 DEV : loss 0.6989648938179016 - score 0.4444\n",
-      "2021-09-08 02:07:43,525 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:07:43,528 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:44,008 epoch 5 - iter 8/81 - loss 0.14507836 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 02:07:44,453 epoch 5 - iter 16/81 - loss 0.21469002 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 02:07:45,067 epoch 5 - iter 24/81 - loss 0.23198774 - samples/sec: 13.04 - lr: 0.020000\n",
-      "2021-09-08 02:07:45,476 epoch 5 - iter 32/81 - loss 0.17519075 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 02:07:45,854 epoch 5 - iter 40/81 - loss 0.14033553 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 02:07:46,230 epoch 5 - iter 48/81 - loss 0.17171551 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 02:07:46,684 epoch 5 - iter 56/81 - loss 0.14758874 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 02:07:47,190 epoch 5 - iter 64/81 - loss 0.12926487 - samples/sec: 15.83 - lr: 0.020000\n",
-      "2021-09-08 02:07:47,618 epoch 5 - iter 72/81 - loss 0.11521112 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 02:07:48,054 epoch 5 - iter 80/81 - loss 0.10398579 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:07:48,126 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:48,127 EPOCH 5 done: loss 0.1027 - lr 0.0200000\n",
-      "2021-09-08 02:07:48,639 DEV : loss 0.6099467873573303 - score 0.5556\n",
-      "2021-09-08 02:07:48,640 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:07:16,132 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:16,796 epoch 3 - iter 8/81 - loss 0.07566477 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 22:07:17,347 epoch 3 - iter 16/81 - loss 0.17395519 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 22:07:18,004 epoch 3 - iter 24/81 - loss 0.20243074 - samples/sec: 12.18 - lr: 0.020000\n",
+      "2021-09-21 22:07:18,734 epoch 3 - iter 32/81 - loss 0.22972016 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 22:07:19,291 epoch 3 - iter 40/81 - loss 0.28040800 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 22:07:19,962 epoch 3 - iter 48/81 - loss 0.30485489 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 22:07:20,504 epoch 3 - iter 56/81 - loss 0.28388778 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 22:07:21,092 epoch 3 - iter 64/81 - loss 0.31664836 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 22:07:21,684 epoch 3 - iter 72/81 - loss 0.34590054 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 22:07:22,310 epoch 3 - iter 80/81 - loss 0.31539866 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 22:07:22,361 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:22,362 EPOCH 3 done: loss 0.3116 - lr 0.0200000\n",
+      "2021-09-21 22:07:22,928 DEV : loss 0.421911358833313 - score 0.6667\n",
+      "2021-09-21 22:07:22,929 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:07:56,141 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:56,543 epoch 6 - iter 8/81 - loss 0.00381343 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 02:07:56,959 epoch 6 - iter 16/81 - loss 0.00209116 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 02:07:57,455 epoch 6 - iter 24/81 - loss 0.00154923 - samples/sec: 16.16 - lr: 0.020000\n",
-      "2021-09-08 02:07:57,915 epoch 6 - iter 32/81 - loss 0.00127489 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 02:07:58,423 epoch 6 - iter 40/81 - loss 0.00131586 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 02:07:58,936 epoch 6 - iter 48/81 - loss 0.00137967 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 02:07:59,366 epoch 6 - iter 56/81 - loss 0.00145378 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 02:07:59,817 epoch 6 - iter 64/81 - loss 0.04140549 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 02:08:00,304 epoch 6 - iter 72/81 - loss 0.03709345 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 02:08:00,670 epoch 6 - iter 80/81 - loss 0.03342940 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 02:08:00,719 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:07:26,935 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:27,475 epoch 4 - iter 8/81 - loss 0.24638653 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 22:07:28,111 epoch 4 - iter 16/81 - loss 0.29319466 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 22:07:28,829 epoch 4 - iter 24/81 - loss 0.20308173 - samples/sec: 11.16 - lr: 0.020000\n",
+      "2021-09-21 22:07:29,456 epoch 4 - iter 32/81 - loss 0.18629390 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 22:07:30,182 epoch 4 - iter 40/81 - loss 0.27503567 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 22:07:30,798 epoch 4 - iter 48/81 - loss 0.22997481 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 22:07:31,438 epoch 4 - iter 56/81 - loss 0.22793149 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 22:07:31,933 epoch 4 - iter 64/81 - loss 0.19992426 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 22:07:32,577 epoch 4 - iter 72/81 - loss 0.18153999 - samples/sec: 12.44 - lr: 0.020000\n",
+      "2021-09-21 22:07:33,088 epoch 4 - iter 80/81 - loss 0.17793020 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 22:07:33,160 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:33,160 EPOCH 4 done: loss 0.1758 - lr 0.0200000\n",
+      "2021-09-21 22:07:33,722 DEV : loss 0.4642851650714874 - score 0.6667\n",
+      "2021-09-21 22:07:33,723 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:07:33,725 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:34,466 epoch 5 - iter 8/81 - loss 0.25157596 - samples/sec: 11.07 - lr: 0.020000\n",
+      "2021-09-21 22:07:35,141 epoch 5 - iter 16/81 - loss 0.26295578 - samples/sec: 11.85 - lr: 0.020000\n",
+      "2021-09-21 22:07:35,813 epoch 5 - iter 24/81 - loss 0.20636983 - samples/sec: 11.93 - lr: 0.020000\n",
+      "2021-09-21 22:07:36,338 epoch 5 - iter 32/81 - loss 0.17421946 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 22:07:37,076 epoch 5 - iter 40/81 - loss 0.20830975 - samples/sec: 10.85 - lr: 0.020000\n",
+      "2021-09-21 22:07:37,647 epoch 5 - iter 48/81 - loss 0.22341062 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 22:07:38,168 epoch 5 - iter 56/81 - loss 0.21761441 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 22:07:38,844 epoch 5 - iter 64/81 - loss 0.21762381 - samples/sec: 11.84 - lr: 0.020000\n",
+      "2021-09-21 22:07:39,384 epoch 5 - iter 72/81 - loss 0.19658673 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 22:07:39,869 epoch 5 - iter 80/81 - loss 0.18277418 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 22:07:39,988 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:00,720 EPOCH 6 done: loss 0.0330 - lr 0.0200000\n",
-      "2021-09-08 02:08:02,023 DEV : loss 0.6181188225746155 - score 0.4444\n",
-      "2021-09-08 02:08:02,024 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:08:02,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:02,501 epoch 7 - iter 8/81 - loss 0.00112002 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 02:08:02,866 epoch 7 - iter 16/81 - loss 0.00071987 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 02:08:03,269 epoch 7 - iter 24/81 - loss 0.00060112 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 02:08:03,791 epoch 7 - iter 32/81 - loss 0.00064253 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 02:08:04,188 epoch 7 - iter 40/81 - loss 0.00056915 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 02:08:04,597 epoch 7 - iter 48/81 - loss 0.00058217 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 02:08:05,018 epoch 7 - iter 56/81 - loss 0.06091380 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 02:08:05,518 epoch 7 - iter 64/81 - loss 0.08946782 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 02:08:06,062 epoch 7 - iter 72/81 - loss 0.07971592 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 02:08:06,477 epoch 7 - iter 80/81 - loss 0.07196867 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 02:08:06,589 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:06,590 EPOCH 7 done: loss 0.0711 - lr 0.0200000\n",
-      "2021-09-08 02:08:07,168 DEV : loss 0.5836068987846375 - score 0.4444\n",
-      "2021-09-08 02:08:07,169 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:08:07,174 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:07,615 epoch 8 - iter 8/81 - loss 0.00030424 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 02:08:08,101 epoch 8 - iter 16/81 - loss 0.00066276 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 02:08:08,473 epoch 8 - iter 24/81 - loss 0.00519500 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 02:08:08,864 epoch 8 - iter 32/81 - loss 0.01991209 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 02:08:09,416 epoch 8 - iter 40/81 - loss 0.01616086 - samples/sec: 14.51 - lr: 0.020000\n",
-      "2021-09-08 02:08:09,874 epoch 8 - iter 48/81 - loss 0.01370858 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 02:08:10,291 epoch 8 - iter 56/81 - loss 0.01181245 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 02:08:10,686 epoch 8 - iter 64/81 - loss 0.01243609 - samples/sec: 20.25 - lr: 0.020000\n",
-      "2021-09-08 02:08:11,210 epoch 8 - iter 72/81 - loss 0.01116680 - samples/sec: 15.29 - lr: 0.020000\n",
-      "2021-09-08 02:08:11,689 epoch 8 - iter 80/81 - loss 0.01016666 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 02:08:11,741 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:11,742 EPOCH 8 done: loss 0.0100 - lr 0.0200000\n",
-      "2021-09-08 02:08:12,163 DEV : loss 0.693640947341919 - score 0.4444\n",
-      "2021-09-08 02:08:12,165 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:08:12,179 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:12,613 epoch 9 - iter 8/81 - loss 0.00040124 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 02:08:13,265 epoch 9 - iter 16/81 - loss 0.00732784 - samples/sec: 12.28 - lr: 0.020000\n",
-      "2021-09-08 02:08:13,640 epoch 9 - iter 24/81 - loss 0.13160966 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 02:08:14,149 epoch 9 - iter 32/81 - loss 0.09886334 - samples/sec: 15.73 - lr: 0.020000\n",
-      "2021-09-08 02:08:14,546 epoch 9 - iter 40/81 - loss 0.07952560 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 02:08:14,945 epoch 9 - iter 48/81 - loss 0.06632382 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 02:08:15,439 epoch 9 - iter 56/81 - loss 0.05690492 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 02:08:15,817 epoch 9 - iter 64/81 - loss 0.06239152 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 02:08:16,298 epoch 9 - iter 72/81 - loss 0.05548688 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 02:08:16,685 epoch 9 - iter 80/81 - loss 0.06167730 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 02:08:16,739 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:16,739 EPOCH 9 done: loss 0.0610 - lr 0.0200000\n",
-      "2021-09-08 02:08:17,138 DEV : loss 0.634753406047821 - score 0.4444\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:08:17,139 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:08:17,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:17,642 epoch 10 - iter 8/81 - loss 0.00061926 - samples/sec: 16.92 - lr: 0.010000\n",
-      "2021-09-08 02:08:18,095 epoch 10 - iter 16/81 - loss 0.00043928 - samples/sec: 17.67 - lr: 0.010000\n",
-      "2021-09-08 02:08:18,603 epoch 10 - iter 24/81 - loss 0.01560676 - samples/sec: 15.76 - lr: 0.010000\n",
-      "2021-09-08 02:08:19,028 epoch 10 - iter 32/81 - loss 0.01204260 - samples/sec: 18.87 - lr: 0.010000\n",
-      "2021-09-08 02:08:19,493 epoch 10 - iter 40/81 - loss 0.03244476 - samples/sec: 17.22 - lr: 0.010000\n",
-      "2021-09-08 02:08:19,913 epoch 10 - iter 48/81 - loss 0.02720380 - samples/sec: 19.10 - lr: 0.010000\n",
-      "2021-09-08 02:08:20,363 epoch 10 - iter 56/81 - loss 0.02335481 - samples/sec: 17.79 - lr: 0.010000\n",
-      "2021-09-08 02:08:20,834 epoch 10 - iter 64/81 - loss 0.02046343 - samples/sec: 17.02 - lr: 0.010000\n",
-      "2021-09-08 02:08:21,225 epoch 10 - iter 72/81 - loss 0.01823049 - samples/sec: 20.53 - lr: 0.010000\n",
-      "2021-09-08 02:08:21,698 epoch 10 - iter 80/81 - loss 0.01646946 - samples/sec: 16.91 - lr: 0.010000\n",
-      "2021-09-08 02:08:21,743 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:21,743 EPOCH 10 done: loss 0.0163 - lr 0.0100000\n",
-      "2021-09-08 02:08:22,501 DEV : loss 0.6594274640083313 - score 0.4444\n",
-      "2021-09-08 02:08:22,502 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:08:26,977 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:26,978 Testing using best model ...\n",
-      "2021-09-08 02:08:26,979 loading file None/best-model.pt\n",
+      "2021-09-21 22:07:39,989 EPOCH 5 done: loss 0.1805 - lr 0.0200000\n",
+      "2021-09-21 22:07:40,678 DEV : loss 0.2597292959690094 - score 0.6667\n",
+      "2021-09-21 22:07:40,680 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:07:44,196 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:44,817 epoch 6 - iter 8/81 - loss 0.05107903 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 22:07:45,311 epoch 6 - iter 16/81 - loss 0.02780954 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 22:07:46,061 epoch 6 - iter 24/81 - loss 0.01983144 - samples/sec: 10.67 - lr: 0.020000\n",
+      "2021-09-21 22:07:46,677 epoch 6 - iter 32/81 - loss 0.04153727 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 22:07:47,182 epoch 6 - iter 40/81 - loss 0.03408805 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 22:07:47,808 epoch 6 - iter 48/81 - loss 0.04652076 - samples/sec: 12.80 - lr: 0.020000\n",
+      "2021-09-21 22:07:48,406 epoch 6 - iter 56/81 - loss 0.03999583 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 22:07:48,953 epoch 6 - iter 64/81 - loss 0.03525459 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 22:07:49,664 epoch 6 - iter 72/81 - loss 0.03717355 - samples/sec: 11.27 - lr: 0.020000\n",
+      "2021-09-21 22:07:50,282 epoch 6 - iter 80/81 - loss 0.04504202 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 22:07:50,354 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:50,355 EPOCH 6 done: loss 0.0445 - lr 0.0200000\n",
+      "2021-09-21 22:07:50,925 DEV : loss 0.3716989755630493 - score 0.6667\n",
+      "2021-09-21 22:07:50,926 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:07:50,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:51,613 epoch 7 - iter 8/81 - loss 0.00560363 - samples/sec: 12.21 - lr: 0.020000\n",
+      "2021-09-21 22:07:52,177 epoch 7 - iter 16/81 - loss 0.03609258 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 22:07:52,702 epoch 7 - iter 24/81 - loss 0.02688431 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 22:07:53,255 epoch 7 - iter 32/81 - loss 0.06731406 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 22:07:53,825 epoch 7 - iter 40/81 - loss 0.05937261 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 22:07:54,418 epoch 7 - iter 48/81 - loss 0.10032072 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 22:07:55,049 epoch 7 - iter 56/81 - loss 0.08763064 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 22:07:55,515 epoch 7 - iter 64/81 - loss 0.07680570 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 22:07:56,048 epoch 7 - iter 72/81 - loss 0.06830429 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 22:07:56,595 epoch 7 - iter 80/81 - loss 0.06160709 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 22:07:56,645 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:56,646 EPOCH 7 done: loss 0.0616 - lr 0.0200000\n",
+      "2021-09-21 22:07:57,128 DEV : loss 0.4192495346069336 - score 0.7778\n",
+      "2021-09-21 22:07:57,129 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:08:01,935 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:02,405 epoch 8 - iter 8/81 - loss 0.13931329 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 22:08:02,882 epoch 8 - iter 16/81 - loss 0.09244351 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 22:08:03,270 epoch 8 - iter 24/81 - loss 0.06172305 - samples/sec: 20.64 - lr: 0.020000\n",
+      "2021-09-21 22:08:03,709 epoch 8 - iter 32/81 - loss 0.04646682 - samples/sec: 18.28 - lr: 0.020000\n",
+      "2021-09-21 22:08:04,125 epoch 8 - iter 40/81 - loss 0.03738767 - samples/sec: 19.23 - lr: 0.020000\n",
+      "2021-09-21 22:08:04,533 epoch 8 - iter 48/81 - loss 0.03670600 - samples/sec: 19.67 - lr: 0.020000\n",
+      "2021-09-21 22:08:04,978 epoch 8 - iter 56/81 - loss 0.04993562 - samples/sec: 17.99 - lr: 0.020000\n",
+      "2021-09-21 22:08:05,429 epoch 8 - iter 64/81 - loss 0.04442245 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 22:08:06,128 epoch 8 - iter 72/81 - loss 0.04061676 - samples/sec: 11.46 - lr: 0.020000\n",
+      "2021-09-21 22:08:06,651 epoch 8 - iter 80/81 - loss 0.03657216 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 22:08:06,731 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:06,732 EPOCH 8 done: loss 0.0361 - lr 0.0200000\n",
+      "2021-09-21 22:08:07,406 DEV : loss 0.3973667621612549 - score 0.7778\n",
+      "2021-09-21 22:08:07,407 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:08:11,569 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:12,138 epoch 9 - iter 8/81 - loss 0.01665948 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 22:08:12,689 epoch 9 - iter 16/81 - loss 0.00870783 - samples/sec: 14.54 - lr: 0.020000\n",
+      "2021-09-21 22:08:13,156 epoch 9 - iter 24/81 - loss 0.00709524 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 22:08:13,662 epoch 9 - iter 32/81 - loss 0.00539424 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 22:08:14,170 epoch 9 - iter 40/81 - loss 0.00434467 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 22:08:14,631 epoch 9 - iter 48/81 - loss 0.00882020 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 22:08:15,235 epoch 9 - iter 56/81 - loss 0.00758622 - samples/sec: 13.27 - lr: 0.020000\n",
+      "2021-09-21 22:08:15,788 epoch 9 - iter 64/81 - loss 0.00665760 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 22:08:16,394 epoch 9 - iter 72/81 - loss 0.00593231 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 22:08:16,961 epoch 9 - iter 80/81 - loss 0.00777704 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 22:08:17,022 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:17,022 EPOCH 9 done: loss 0.0077 - lr 0.0200000\n",
+      "2021-09-21 22:08:17,521 DEV : loss 0.45854106545448303 - score 0.7778\n",
+      "2021-09-21 22:08:17,523 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:08:17,525 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:18,007 epoch 10 - iter 8/81 - loss 0.37625542 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 22:08:18,603 epoch 10 - iter 16/81 - loss 0.18828371 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 22:08:19,065 epoch 10 - iter 24/81 - loss 0.12568376 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 22:08:19,527 epoch 10 - iter 32/81 - loss 0.12926090 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 22:08:20,117 epoch 10 - iter 40/81 - loss 0.11056904 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 22:08:20,681 epoch 10 - iter 48/81 - loss 0.09217102 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 22:08:21,240 epoch 10 - iter 56/81 - loss 0.07904903 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 22:08:21,764 epoch 10 - iter 64/81 - loss 0.08776858 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 22:08:22,158 epoch 10 - iter 72/81 - loss 0.07803849 - samples/sec: 20.31 - lr: 0.020000\n",
+      "2021-09-21 22:08:22,600 epoch 10 - iter 80/81 - loss 0.07026607 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 22:08:22,644 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:22,645 EPOCH 10 done: loss 0.0694 - lr 0.0200000\n",
+      "2021-09-21 22:08:23,015 DEV : loss 0.4579916000366211 - score 0.6667\n",
+      "2021-09-21 22:08:23,016 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:08:26,953 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:26,953 Testing using best model ...\n",
+      "2021-09-21 22:08:26,955 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:08:32,610 \t0.0\n",
-      "2021-09-08 02:08:32,611 \n",
+      "2021-09-21 22:08:32,087 \t0.8\n",
+      "2021-09-21 22:08:32,088 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.8\n",
+      "- F-score (macro) 0.5333\n",
+      "- Accuracy 0.8\n",
       "\n",
       "By class:\n",
-      "                                               precision    recall  f1-score   support\n",
-      "\n",
-      "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
-      "                this text is about technology     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about women     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
+      "                                           precision    recall  f1-score   support\n",
       "\n",
-      "                                    micro avg     0.0000    0.0000    0.0000         0\n",
-      "                                    macro avg     0.0000    0.0000    0.0000         0\n",
-      "                                 weighted avg     0.0000    0.0000    0.0000         0\n",
-      "                                  samples avg     0.0000    0.0000    0.0000         0\n",
+      "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Health     0.5000    1.0000    0.6667         1\n",
+      " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      "  This text is about Computers & Internet     1.0000    1.0000    1.0000         1\n",
+      " This text is about Education & Reference     1.0000    1.0000    1.0000         3\n",
+      "                This text is about Sports     1.0000    0.5000    0.6667         2\n",
+      "     This text is about Society & Culture     1.0000    1.0000    1.0000         1\n",
+      "    This text is about Business & Finance     0.0000    0.0000    0.0000         1\n",
+      " This text is about Politics & Government     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "2021-09-08 02:08:32,611 ----------------------------------------------------------------------------------------------------\n"
+      "                                micro avg     0.8000    0.8000    0.8000        10\n",
+      "                                macro avg     0.5500    0.5500    0.5333        10\n",
+      "                             weighted avg     0.8500    0.8000    0.8000        10\n",
+      "                              samples avg     0.8000    0.8000    0.8000        10\n",
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:48,518 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 22:08:32,088 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:53,524 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:08:52,460 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:08:57,562 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 91/91 [00:00<00:00, 13990.24it/s]"
+      "100%|██████████| 91/91 [00:00<00:00, 16358.02it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:52,468 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
-      "2021-09-08 02:08:52,492 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:52,493 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:08:57,570 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
+      "2021-09-21 22:08:57,581 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:57,583 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5506,25 +5508,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:52,494 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:52,494 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
-      "2021-09-08 02:08:52,494 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:52,495 Parameters:\n",
-      "2021-09-08 02:08:52,495  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:08:52,495  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:08:52,496  - patience: \"3\"\n",
-      "2021-09-08 02:08:52,496  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:08:52,496  - max_epochs: \"10\"\n",
-      "2021-09-08 02:08:52,496  - shuffle: \"True\"\n",
-      "2021-09-08 02:08:52,497  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:08:52,497  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:08:52,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:52,498 Model training base path: \"None\"\n",
-      "2021-09-08 02:08:52,498 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:52,498 Device: cuda:1\n",
-      "2021-09-08 02:08:52,498 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:52,499 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:08:52,504 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:08:57,583 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:57,584 Corpus: \"Corpus: 81 train + 9 dev + 10 test sentences\"\n",
+      "2021-09-21 22:08:57,584 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:57,584 Parameters:\n",
+      "2021-09-21 22:08:57,585  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:08:57,585  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:08:57,585  - patience: \"3\"\n",
+      "2021-09-21 22:08:57,585  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:08:57,586  - max_epochs: \"10\"\n",
+      "2021-09-21 22:08:57,586  - shuffle: \"True\"\n",
+      "2021-09-21 22:08:57,586  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:08:57,587  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:08:57,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:57,587 Model training base path: \"None\"\n",
+      "2021-09-21 22:08:57,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:57,588 Device: cuda:0\n",
+      "2021-09-21 22:08:57,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:57,588 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:08:57,595 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -5538,198 +5540,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:53,000 epoch 1 - iter 8/81 - loss 0.77944482 - samples/sec: 16.64 - lr: 0.020000\n",
-      "2021-09-08 02:08:53,518 epoch 1 - iter 16/81 - loss 0.59694667 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 02:08:53,910 epoch 1 - iter 24/81 - loss 0.55287983 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 02:08:54,474 epoch 1 - iter 32/81 - loss 0.53683308 - samples/sec: 14.21 - lr: 0.020000\n",
-      "2021-09-08 02:08:54,933 epoch 1 - iter 40/81 - loss 0.48282254 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 02:08:55,390 epoch 1 - iter 48/81 - loss 0.48420374 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 02:08:55,770 epoch 1 - iter 56/81 - loss 0.43733887 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 02:08:56,230 epoch 1 - iter 64/81 - loss 0.54506977 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 02:08:56,718 epoch 1 - iter 72/81 - loss 0.51758046 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 02:08:57,303 epoch 1 - iter 80/81 - loss 0.50656356 - samples/sec: 13.70 - lr: 0.020000\n",
-      "2021-09-08 02:08:57,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:57,353 EPOCH 1 done: loss 0.5005 - lr 0.0200000\n",
-      "2021-09-08 02:08:57,724 DEV : loss 0.47669756412506104 - score 0.3333\n",
-      "2021-09-08 02:08:57,726 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:08:58,261 epoch 1 - iter 8/81 - loss 0.76262449 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 22:08:58,812 epoch 1 - iter 16/81 - loss 0.39752190 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 22:08:59,266 epoch 1 - iter 24/81 - loss 0.42833577 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 22:08:59,673 epoch 1 - iter 32/81 - loss 0.43258694 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 22:09:00,246 epoch 1 - iter 40/81 - loss 0.39779668 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 22:09:00,712 epoch 1 - iter 48/81 - loss 0.43440907 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 22:09:01,108 epoch 1 - iter 56/81 - loss 0.37625856 - samples/sec: 20.26 - lr: 0.020000\n",
+      "2021-09-21 22:09:01,544 epoch 1 - iter 64/81 - loss 0.45160467 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 22:09:02,118 epoch 1 - iter 72/81 - loss 0.44230271 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 22:09:02,567 epoch 1 - iter 80/81 - loss 0.44503159 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 22:09:02,677 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:02,678 EPOCH 1 done: loss 0.4402 - lr 0.0200000\n",
+      "2021-09-21 22:09:03,209 DEV : loss 0.9783645272254944 - score 0.1111\n",
+      "2021-09-21 22:09:03,211 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:09:05,819 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:06,347 epoch 2 - iter 8/81 - loss 0.47602722 - samples/sec: 15.64 - lr: 0.020000\n",
-      "2021-09-08 02:09:06,821 epoch 2 - iter 16/81 - loss 0.44482876 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 02:09:07,518 epoch 2 - iter 24/81 - loss 0.55196988 - samples/sec: 11.49 - lr: 0.020000\n",
-      "2021-09-08 02:09:07,926 epoch 2 - iter 32/81 - loss 0.41945040 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 02:09:08,434 epoch 2 - iter 40/81 - loss 0.39824016 - samples/sec: 15.77 - lr: 0.020000\n",
-      "2021-09-08 02:09:08,914 epoch 2 - iter 48/81 - loss 0.37461810 - samples/sec: 16.69 - lr: 0.020000\n",
-      "2021-09-08 02:09:09,292 epoch 2 - iter 56/81 - loss 0.46485359 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 02:09:09,736 epoch 2 - iter 64/81 - loss 0.44786975 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 02:09:10,113 epoch 2 - iter 72/81 - loss 0.46334874 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 02:09:10,724 epoch 2 - iter 80/81 - loss 0.46686288 - samples/sec: 13.10 - lr: 0.020000\n",
-      "2021-09-08 02:09:10,768 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:10,769 EPOCH 2 done: loss 0.4612 - lr 0.0200000\n",
-      "2021-09-08 02:09:11,915 DEV : loss 0.43271535634994507 - score 0.3333\n",
-      "2021-09-08 02:09:11,917 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:09:07,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:07,661 epoch 2 - iter 8/81 - loss 0.59378693 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 22:09:08,219 epoch 2 - iter 16/81 - loss 0.37600163 - samples/sec: 14.35 - lr: 0.020000\n",
+      "2021-09-21 22:09:08,821 epoch 2 - iter 24/81 - loss 0.34737377 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 22:09:09,371 epoch 2 - iter 32/81 - loss 0.37330398 - samples/sec: 14.57 - lr: 0.020000\n",
+      "2021-09-21 22:09:09,957 epoch 2 - iter 40/81 - loss 0.38601496 - samples/sec: 13.66 - lr: 0.020000\n",
+      "2021-09-21 22:09:10,597 epoch 2 - iter 48/81 - loss 0.35677977 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 22:09:11,046 epoch 2 - iter 56/81 - loss 0.40758169 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 22:09:11,475 epoch 2 - iter 64/81 - loss 0.40105521 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 22:09:11,952 epoch 2 - iter 72/81 - loss 0.43815850 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 22:09:12,410 epoch 2 - iter 80/81 - loss 0.41388185 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 22:09:12,519 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:12,520 EPOCH 2 done: loss 0.4094 - lr 0.0200000\n",
+      "2021-09-21 22:09:13,105 DEV : loss 0.3679088056087494 - score 0.4444\n",
+      "2021-09-21 22:09:13,106 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:09:16,244 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:16,900 epoch 3 - iter 8/81 - loss 0.27324419 - samples/sec: 12.52 - lr: 0.020000\n",
-      "2021-09-08 02:09:17,421 epoch 3 - iter 16/81 - loss 0.24122672 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 02:09:17,838 epoch 3 - iter 24/81 - loss 0.25792993 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 02:09:18,265 epoch 3 - iter 32/81 - loss 0.25576570 - samples/sec: 18.76 - lr: 0.020000\n",
-      "2021-09-08 02:09:18,728 epoch 3 - iter 40/81 - loss 0.25303626 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 02:09:19,155 epoch 3 - iter 48/81 - loss 0.21758060 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 02:09:19,601 epoch 3 - iter 56/81 - loss 0.19562877 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 02:09:20,126 epoch 3 - iter 64/81 - loss 0.25507652 - samples/sec: 15.26 - lr: 0.020000\n",
-      "2021-09-08 02:09:20,491 epoch 3 - iter 72/81 - loss 0.24129852 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 02:09:20,894 epoch 3 - iter 80/81 - loss 0.23783243 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 02:09:21,031 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:21,032 EPOCH 3 done: loss 0.2555 - lr 0.0200000\n",
-      "2021-09-08 02:09:21,422 DEV : loss 0.4184982180595398 - score 0.4444\n",
-      "2021-09-08 02:09:21,423 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:09:24,516 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:25,003 epoch 3 - iter 8/81 - loss 0.06702054 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 22:09:25,458 epoch 3 - iter 16/81 - loss 0.16670122 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 22:09:25,888 epoch 3 - iter 24/81 - loss 0.16196578 - samples/sec: 18.64 - lr: 0.020000\n",
+      "2021-09-21 22:09:26,316 epoch 3 - iter 32/81 - loss 0.17244492 - samples/sec: 18.70 - lr: 0.020000\n",
+      "2021-09-21 22:09:26,736 epoch 3 - iter 40/81 - loss 0.18156141 - samples/sec: 19.08 - lr: 0.020000\n",
+      "2021-09-21 22:09:27,184 epoch 3 - iter 48/81 - loss 0.15289088 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 22:09:27,670 epoch 3 - iter 56/81 - loss 0.18353511 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 22:09:28,182 epoch 3 - iter 64/81 - loss 0.22104730 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 22:09:28,644 epoch 3 - iter 72/81 - loss 0.21876388 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 22:09:29,070 epoch 3 - iter 80/81 - loss 0.25868615 - samples/sec: 18.77 - lr: 0.020000\n",
+      "2021-09-21 22:09:29,165 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:29,165 EPOCH 3 done: loss 0.2560 - lr 0.0200000\n",
+      "2021-09-21 22:09:30,070 DEV : loss 0.40927088260650635 - score 0.7778\n",
+      "2021-09-21 22:09:30,072 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:09:26,540 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:27,110 epoch 4 - iter 8/81 - loss 0.27124979 - samples/sec: 14.45 - lr: 0.020000\n",
-      "2021-09-08 02:09:27,506 epoch 4 - iter 16/81 - loss 0.14090132 - samples/sec: 20.25 - lr: 0.020000\n",
-      "2021-09-08 02:09:28,020 epoch 4 - iter 24/81 - loss 0.21006189 - samples/sec: 15.59 - lr: 0.020000\n",
-      "2021-09-08 02:09:28,470 epoch 4 - iter 32/81 - loss 0.20418647 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 02:09:28,917 epoch 4 - iter 40/81 - loss 0.21700418 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 02:09:29,362 epoch 4 - iter 48/81 - loss 0.21282836 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 02:09:29,739 epoch 4 - iter 56/81 - loss 0.30412820 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 02:09:30,304 epoch 4 - iter 64/81 - loss 0.29969950 - samples/sec: 14.17 - lr: 0.020000\n",
-      "2021-09-08 02:09:30,743 epoch 4 - iter 72/81 - loss 0.26722750 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 02:09:31,273 epoch 4 - iter 80/81 - loss 0.24069384 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 02:09:31,326 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:31,327 EPOCH 4 done: loss 0.2377 - lr 0.0200000\n",
-      "2021-09-08 02:09:31,686 DEV : loss 0.5014543533325195 - score 0.3333\n",
-      "2021-09-08 02:09:31,688 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:09:31,690 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:32,326 epoch 5 - iter 8/81 - loss 0.00105732 - samples/sec: 12.88 - lr: 0.020000\n",
-      "2021-09-08 02:09:32,720 epoch 5 - iter 16/81 - loss 0.04546365 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 02:09:33,214 epoch 5 - iter 24/81 - loss 0.07109385 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 02:09:33,750 epoch 5 - iter 32/81 - loss 0.12339931 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 02:09:34,232 epoch 5 - iter 40/81 - loss 0.14762874 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 02:09:34,747 epoch 5 - iter 48/81 - loss 0.12338218 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 02:09:35,238 epoch 5 - iter 56/81 - loss 0.12515210 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 02:09:35,645 epoch 5 - iter 64/81 - loss 0.12935665 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 02:09:36,036 epoch 5 - iter 72/81 - loss 0.11761259 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 02:09:36,467 epoch 5 - iter 80/81 - loss 0.12555636 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 02:09:36,559 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:36,560 EPOCH 5 done: loss 0.1240 - lr 0.0200000\n",
-      "2021-09-08 02:09:36,924 DEV : loss 0.43313372135162354 - score 0.4444\n",
-      "2021-09-08 02:09:36,925 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:09:36,930 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:37,467 epoch 6 - iter 8/81 - loss 0.00197911 - samples/sec: 15.32 - lr: 0.020000\n",
-      "2021-09-08 02:09:38,080 epoch 6 - iter 16/81 - loss 0.04075485 - samples/sec: 13.07 - lr: 0.020000\n",
-      "2021-09-08 02:09:38,492 epoch 6 - iter 24/81 - loss 0.02814612 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 02:09:38,916 epoch 6 - iter 32/81 - loss 0.02901920 - samples/sec: 18.88 - lr: 0.020000\n",
-      "2021-09-08 02:09:39,314 epoch 6 - iter 40/81 - loss 0.02374664 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 02:09:39,794 epoch 6 - iter 48/81 - loss 0.01993221 - samples/sec: 16.68 - lr: 0.020000\n",
-      "2021-09-08 02:09:40,203 epoch 6 - iter 56/81 - loss 0.11232099 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 02:09:40,786 epoch 6 - iter 64/81 - loss 0.10926516 - samples/sec: 13.74 - lr: 0.020000\n",
-      "2021-09-08 02:09:41,212 epoch 6 - iter 72/81 - loss 0.12906923 - samples/sec: 18.82 - lr: 0.020000\n",
-      "2021-09-08 02:09:41,617 epoch 6 - iter 80/81 - loss 0.11741203 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 02:09:41,661 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:09:34,302 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:34,839 epoch 4 - iter 8/81 - loss 0.03082059 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 22:09:35,495 epoch 4 - iter 16/81 - loss 0.08030647 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 22:09:36,118 epoch 4 - iter 24/81 - loss 0.11288101 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 22:09:36,681 epoch 4 - iter 32/81 - loss 0.16762551 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 22:09:37,269 epoch 4 - iter 40/81 - loss 0.14439695 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 22:09:37,980 epoch 4 - iter 48/81 - loss 0.13225743 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 22:09:38,483 epoch 4 - iter 56/81 - loss 0.11375806 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 22:09:39,058 epoch 4 - iter 64/81 - loss 0.12298087 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 22:09:39,639 epoch 4 - iter 72/81 - loss 0.11104950 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 22:09:40,339 epoch 4 - iter 80/81 - loss 0.10138389 - samples/sec: 11.45 - lr: 0.020000\n",
+      "2021-09-21 22:09:40,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:40,420 EPOCH 4 done: loss 0.1002 - lr 0.0200000\n",
+      "2021-09-21 22:09:41,143 DEV : loss 0.5671953558921814 - score 0.7778\n",
+      "2021-09-21 22:09:41,145 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:09:41,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:41,778 epoch 5 - iter 8/81 - loss 0.25835265 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 22:09:42,517 epoch 5 - iter 16/81 - loss 0.14037048 - samples/sec: 10.84 - lr: 0.020000\n",
+      "2021-09-21 22:09:43,026 epoch 5 - iter 24/81 - loss 0.09383973 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 22:09:43,569 epoch 5 - iter 32/81 - loss 0.10600231 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 22:09:44,150 epoch 5 - iter 40/81 - loss 0.09376632 - samples/sec: 13.79 - lr: 0.020000\n",
+      "2021-09-21 22:09:44,837 epoch 5 - iter 48/81 - loss 0.07821251 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 22:09:45,327 epoch 5 - iter 56/81 - loss 0.06715709 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 22:09:45,996 epoch 5 - iter 64/81 - loss 0.08804179 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 22:09:46,657 epoch 5 - iter 72/81 - loss 0.07842859 - samples/sec: 12.12 - lr: 0.020000\n",
+      "2021-09-21 22:09:47,233 epoch 5 - iter 80/81 - loss 0.07889999 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 22:09:47,308 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:47,308 EPOCH 5 done: loss 0.0779 - lr 0.0200000\n",
+      "2021-09-21 22:09:48,040 DEV : loss 0.4565085470676422 - score 0.7778\n",
+      "2021-09-21 22:09:48,042 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:09:48,044 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:48,670 epoch 6 - iter 8/81 - loss 0.00087023 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 22:09:49,332 epoch 6 - iter 16/81 - loss 0.00087394 - samples/sec: 12.09 - lr: 0.020000\n",
+      "2021-09-21 22:09:49,861 epoch 6 - iter 24/81 - loss 0.03110075 - samples/sec: 15.14 - lr: 0.020000\n",
+      "2021-09-21 22:09:50,402 epoch 6 - iter 32/81 - loss 0.02743217 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 22:09:50,964 epoch 6 - iter 40/81 - loss 0.02643029 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 22:09:51,583 epoch 6 - iter 48/81 - loss 0.02210488 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 22:09:52,042 epoch 6 - iter 56/81 - loss 0.01902220 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 22:09:52,544 epoch 6 - iter 64/81 - loss 0.01669907 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 22:09:53,206 epoch 6 - iter 72/81 - loss 0.03789039 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 22:09:53,672 epoch 6 - iter 80/81 - loss 0.05306346 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 22:09:53,720 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:09:41,662 EPOCH 6 done: loss 0.1160 - lr 0.0200000\n",
-      "2021-09-08 02:09:42,114 DEV : loss 0.4320748746395111 - score 0.5556\n",
-      "2021-09-08 02:09:42,116 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:09:48,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:49,106 epoch 7 - iter 8/81 - loss 0.21244539 - samples/sec: 12.97 - lr: 0.020000\n",
-      "2021-09-08 02:09:49,500 epoch 7 - iter 16/81 - loss 0.22136068 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 02:09:49,924 epoch 7 - iter 24/81 - loss 0.15543446 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 02:09:50,313 epoch 7 - iter 32/81 - loss 0.11677141 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 02:09:50,865 epoch 7 - iter 40/81 - loss 0.09364893 - samples/sec: 14.51 - lr: 0.020000\n",
-      "2021-09-08 02:09:51,294 epoch 7 - iter 48/81 - loss 0.07840317 - samples/sec: 18.65 - lr: 0.020000\n",
-      "2021-09-08 02:09:51,714 epoch 7 - iter 56/81 - loss 0.06729996 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 02:09:52,134 epoch 7 - iter 64/81 - loss 0.07857491 - samples/sec: 19.11 - lr: 0.020000\n",
-      "2021-09-08 02:09:52,838 epoch 7 - iter 72/81 - loss 0.06992051 - samples/sec: 11.37 - lr: 0.020000\n",
-      "2021-09-08 02:09:53,253 epoch 7 - iter 80/81 - loss 0.06299964 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 02:09:53,301 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:53,301 EPOCH 7 done: loss 0.0711 - lr 0.0200000\n",
-      "2021-09-08 02:09:53,665 DEV : loss 0.32476428151130676 - score 0.6667\n",
-      "2021-09-08 02:09:53,666 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:09:57,668 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:58,087 epoch 8 - iter 8/81 - loss 0.17922038 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 02:09:58,499 epoch 8 - iter 16/81 - loss 0.08991068 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 02:09:59,059 epoch 8 - iter 24/81 - loss 0.07366463 - samples/sec: 14.31 - lr: 0.020000\n",
-      "2021-09-08 02:09:59,472 epoch 8 - iter 32/81 - loss 0.05590349 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 02:09:59,879 epoch 8 - iter 40/81 - loss 0.04647301 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 02:10:00,501 epoch 8 - iter 48/81 - loss 0.03929467 - samples/sec: 12.89 - lr: 0.020000\n",
-      "2021-09-08 02:10:00,991 epoch 8 - iter 56/81 - loss 0.09043900 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 02:10:01,409 epoch 8 - iter 64/81 - loss 0.10564320 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 02:10:01,931 epoch 8 - iter 72/81 - loss 0.09401133 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 02:10:02,325 epoch 8 - iter 80/81 - loss 0.10034562 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 02:10:02,373 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:02,374 EPOCH 8 done: loss 0.1022 - lr 0.0200000\n",
-      "2021-09-08 02:10:02,733 DEV : loss 0.44391146302223206 - score 0.6667\n",
-      "2021-09-08 02:10:02,735 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:10:02,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:03,172 epoch 9 - iter 8/81 - loss 0.00094248 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 02:10:03,688 epoch 9 - iter 16/81 - loss 0.00070553 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 02:10:04,218 epoch 9 - iter 24/81 - loss 0.00063931 - samples/sec: 15.14 - lr: 0.020000\n",
-      "2021-09-08 02:10:04,650 epoch 9 - iter 32/81 - loss 0.00320195 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 02:10:05,176 epoch 9 - iter 40/81 - loss 0.00285117 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 02:10:05,590 epoch 9 - iter 48/81 - loss 0.00244232 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 02:10:06,001 epoch 9 - iter 56/81 - loss 0.00216184 - samples/sec: 19.47 - lr: 0.020000\n",
-      "2021-09-08 02:10:06,362 epoch 9 - iter 64/81 - loss 0.00196735 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 02:10:06,869 epoch 9 - iter 72/81 - loss 0.00177454 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 02:10:07,341 epoch 9 - iter 80/81 - loss 0.00162198 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 02:10:07,396 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:07,397 EPOCH 9 done: loss 0.0016 - lr 0.0200000\n",
-      "2021-09-08 02:10:07,758 DEV : loss 0.5191901922225952 - score 0.5556\n",
-      "2021-09-08 02:10:07,759 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:10:07,773 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:08,147 epoch 10 - iter 8/81 - loss 0.00060439 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 02:10:08,633 epoch 10 - iter 16/81 - loss 0.00087325 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 02:10:09,282 epoch 10 - iter 24/81 - loss 0.00063604 - samples/sec: 12.34 - lr: 0.020000\n",
-      "2021-09-08 02:10:09,677 epoch 10 - iter 32/81 - loss 0.04924586 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 02:10:10,066 epoch 10 - iter 40/81 - loss 0.03948205 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 02:10:10,579 epoch 10 - iter 48/81 - loss 0.04432275 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 02:10:11,092 epoch 10 - iter 56/81 - loss 0.03801295 - samples/sec: 15.63 - lr: 0.020000\n",
-      "2021-09-08 02:10:11,609 epoch 10 - iter 64/81 - loss 0.03329432 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 02:10:12,084 epoch 10 - iter 72/81 - loss 0.02961952 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 02:10:12,592 epoch 10 - iter 80/81 - loss 0.03279543 - samples/sec: 15.79 - lr: 0.020000\n",
-      "2021-09-08 02:10:12,645 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:12,645 EPOCH 10 done: loss 0.0324 - lr 0.0200000\n",
-      "2021-09-08 02:10:13,983 DEV : loss 0.7023094296455383 - score 0.4444\n",
-      "2021-09-08 02:10:13,985 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:10:19,031 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:19,032 Testing using best model ...\n",
-      "2021-09-08 02:10:19,034 loading file None/best-model.pt\n",
+      "2021-09-21 22:09:53,720 EPOCH 6 done: loss 0.0524 - lr 0.0200000\n",
+      "2021-09-21 22:09:54,314 DEV : loss 0.5148096680641174 - score 0.6667\n",
+      "2021-09-21 22:09:54,315 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:09:54,384 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:54,797 epoch 7 - iter 8/81 - loss 0.19676477 - samples/sec: 20.14 - lr: 0.020000\n",
+      "2021-09-21 22:09:55,234 epoch 7 - iter 16/81 - loss 0.16404138 - samples/sec: 18.35 - lr: 0.020000\n",
+      "2021-09-21 22:09:55,636 epoch 7 - iter 24/81 - loss 0.10946188 - samples/sec: 19.90 - lr: 0.020000\n",
+      "2021-09-21 22:09:56,120 epoch 7 - iter 32/81 - loss 0.09685610 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 22:09:56,758 epoch 7 - iter 40/81 - loss 0.07756225 - samples/sec: 12.56 - lr: 0.020000\n",
+      "2021-09-21 22:09:57,276 epoch 7 - iter 48/81 - loss 0.07389264 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 22:09:58,009 epoch 7 - iter 56/81 - loss 0.06349479 - samples/sec: 10.92 - lr: 0.020000\n",
+      "2021-09-21 22:09:58,787 epoch 7 - iter 64/81 - loss 0.06484996 - samples/sec: 10.28 - lr: 0.020000\n",
+      "2021-09-21 22:09:59,293 epoch 7 - iter 72/81 - loss 0.05780264 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 22:09:59,905 epoch 7 - iter 80/81 - loss 0.05216699 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 22:09:59,985 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:59,985 EPOCH 7 done: loss 0.0515 - lr 0.0200000\n",
+      "2021-09-21 22:10:00,715 DEV : loss 0.47037389874458313 - score 0.7778\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:10:00,716 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:10:00,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:01,414 epoch 8 - iter 8/81 - loss 0.00091068 - samples/sec: 11.88 - lr: 0.010000\n",
+      "2021-09-21 22:10:02,000 epoch 8 - iter 16/81 - loss 0.00073065 - samples/sec: 13.67 - lr: 0.010000\n",
+      "2021-09-21 22:10:02,504 epoch 8 - iter 24/81 - loss 0.04951824 - samples/sec: 15.90 - lr: 0.010000\n",
+      "2021-09-21 22:10:03,109 epoch 8 - iter 32/81 - loss 0.04030247 - samples/sec: 13.23 - lr: 0.010000\n",
+      "2021-09-21 22:10:03,704 epoch 8 - iter 40/81 - loss 0.03228763 - samples/sec: 13.47 - lr: 0.010000\n",
+      "2021-09-21 22:10:04,170 epoch 8 - iter 48/81 - loss 0.03284147 - samples/sec: 17.21 - lr: 0.010000\n",
+      "2021-09-21 22:10:04,690 epoch 8 - iter 56/81 - loss 0.02820532 - samples/sec: 15.39 - lr: 0.010000\n",
+      "2021-09-21 22:10:05,153 epoch 8 - iter 64/81 - loss 0.03750951 - samples/sec: 17.34 - lr: 0.010000\n",
+      "2021-09-21 22:10:05,544 epoch 8 - iter 72/81 - loss 0.03338424 - samples/sec: 20.52 - lr: 0.010000\n",
+      "2021-09-21 22:10:06,036 epoch 8 - iter 80/81 - loss 0.04871326 - samples/sec: 16.26 - lr: 0.010000\n",
+      "2021-09-21 22:10:06,080 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:06,081 EPOCH 8 done: loss 0.0481 - lr 0.0100000\n",
+      "2021-09-21 22:10:06,810 DEV : loss 0.4500476121902466 - score 0.7778\n",
+      "2021-09-21 22:10:06,812 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:10:06,890 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:07,317 epoch 9 - iter 8/81 - loss 0.00050286 - samples/sec: 19.41 - lr: 0.010000\n",
+      "2021-09-21 22:10:07,748 epoch 9 - iter 16/81 - loss 0.00117224 - samples/sec: 18.60 - lr: 0.010000\n",
+      "2021-09-21 22:10:08,676 epoch 9 - iter 24/81 - loss 0.00085643 - samples/sec: 18.20 - lr: 0.010000\n",
+      "2021-09-21 22:10:09,183 epoch 9 - iter 32/81 - loss 0.02054093 - samples/sec: 15.79 - lr: 0.010000\n",
+      "2021-09-21 22:10:09,662 epoch 9 - iter 40/81 - loss 0.01649673 - samples/sec: 16.75 - lr: 0.010000\n",
+      "2021-09-21 22:10:10,140 epoch 9 - iter 48/81 - loss 0.01957285 - samples/sec: 16.77 - lr: 0.010000\n",
+      "2021-09-21 22:10:10,547 epoch 9 - iter 56/81 - loss 0.01679989 - samples/sec: 19.68 - lr: 0.010000\n",
+      "2021-09-21 22:10:10,960 epoch 9 - iter 64/81 - loss 0.01485984 - samples/sec: 19.40 - lr: 0.010000\n",
+      "2021-09-21 22:10:11,408 epoch 9 - iter 72/81 - loss 0.01325337 - samples/sec: 17.85 - lr: 0.010000\n",
+      "2021-09-21 22:10:11,842 epoch 9 - iter 80/81 - loss 0.01196340 - samples/sec: 18.46 - lr: 0.010000\n",
+      "2021-09-21 22:10:11,887 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:11,887 EPOCH 9 done: loss 0.0118 - lr 0.0100000\n",
+      "2021-09-21 22:10:12,374 DEV : loss 0.44446563720703125 - score 0.7778\n",
+      "2021-09-21 22:10:12,375 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:10:12,377 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:12,943 epoch 10 - iter 8/81 - loss 0.00053971 - samples/sec: 14.52 - lr: 0.010000\n",
+      "2021-09-21 22:10:13,373 epoch 10 - iter 16/81 - loss 0.01747102 - samples/sec: 18.66 - lr: 0.010000\n",
+      "2021-09-21 22:10:13,829 epoch 10 - iter 24/81 - loss 0.01187607 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 22:10:14,377 epoch 10 - iter 32/81 - loss 0.00900429 - samples/sec: 14.61 - lr: 0.010000\n",
+      "2021-09-21 22:10:14,856 epoch 10 - iter 40/81 - loss 0.00725454 - samples/sec: 16.75 - lr: 0.010000\n",
+      "2021-09-21 22:10:15,462 epoch 10 - iter 48/81 - loss 0.00612027 - samples/sec: 13.20 - lr: 0.010000\n",
+      "2021-09-21 22:10:16,048 epoch 10 - iter 56/81 - loss 0.00527550 - samples/sec: 13.66 - lr: 0.010000\n",
+      "2021-09-21 22:10:16,640 epoch 10 - iter 64/81 - loss 0.00463474 - samples/sec: 13.54 - lr: 0.010000\n",
+      "2021-09-21 22:10:17,244 epoch 10 - iter 72/81 - loss 0.00415306 - samples/sec: 13.26 - lr: 0.010000\n",
+      "2021-09-21 22:10:17,718 epoch 10 - iter 80/81 - loss 0.00377547 - samples/sec: 16.88 - lr: 0.010000\n",
+      "2021-09-21 22:10:17,777 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:17,778 EPOCH 10 done: loss 0.0037 - lr 0.0100000\n",
+      "2021-09-21 22:10:18,404 DEV : loss 0.46898582577705383 - score 0.7778\n",
+      "2021-09-21 22:10:18,406 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:10:22,421 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:22,421 Testing using best model ...\n",
+      "2021-09-21 22:10:22,423 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:10:23,541 \t0.6\n",
-      "2021-09-08 02:10:23,541 \n",
+      "2021-09-21 22:10:27,115 \t0.0\n",
+      "2021-09-21 22:10:27,115 \n",
       "Results:\n",
-      "- F-score (micro) 0.6\n",
-      "- F-score (macro) 0.55\n",
-      "- Accuracy 0.6\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
-      "                                           precision    recall  f1-score   support\n",
+      "                                               precision    recall  f1-score   support\n",
       "\n",
-      "This text is about Family & Relationships     1.0000    1.0000    1.0000         1\n",
-      "                This text is about Health     1.0000    1.0000    1.0000         1\n",
-      " This text is about Science & Mathematics     1.0000    1.0000    1.0000         1\n",
-      " This text is about Entertainment & Music     0.5000    0.5000    0.5000         2\n",
-      "  This text is about Computers & Internet     1.0000    1.0000    1.0000         1\n",
-      " This text is about Education & Reference     0.0000    0.0000    0.0000         1\n",
-      "                This text is about Sports     0.0000    0.0000    0.0000         0\n",
-      "     This text is about Society & Culture     0.0000    0.0000    0.0000         1\n",
-      "    This text is about Business & Finance     0.0000    0.0000    0.0000         1\n",
-      " This text is about Politics & Government     1.0000    1.0000    1.0000         1\n",
+      "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
+      "                this text is about technology     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
+      "                     this text is about women     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
+      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
+      "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
+      "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
+      "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                micro avg     0.6000    0.6000    0.6000        10\n",
-      "                                macro avg     0.5500    0.5500    0.5500        10\n",
-      "                             weighted avg     0.6000    0.6000    0.6000        10\n",
-      "                              samples avg     0.6000    0.6000    0.6000        10\n",
+      "                                    micro avg     0.0000    0.0000    0.0000         0\n",
+      "                                    macro avg     0.0000    0.0000    0.0000         0\n",
+      "                                 weighted avg     0.0000    0.0000    0.0000         0\n",
+      "                                  samples avg     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "2021-09-08 02:10:23,542 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.6979228486646885\n"
+      "2021-09-21 22:10:27,116 ----------------------------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy Durchschnitt: 0.6997032640949555\n"
      ]
     }
    ],
@@ -5809,11 +5822,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "86988d74",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.6735905044510386, 0.7181008902077152, 0.7032640949554896, 0.7062314540059347, 0.6973293768545994]\n",
+      "0.014705651861559471\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -5825,7 +5850,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5833,38 +5858,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:10:39,422 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 22:23:12,181 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:10:43,553 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:23:16,070 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 14114.12it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 16429.39it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:10:43,561 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 02:10:44,725 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:44,727 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:23:16,077 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
+      "2021-09-21 22:23:16,080 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:16,082 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6177,219 +6189,226 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:10:44,728 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:44,728 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 02:10:44,728 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:44,729 Parameters:\n",
-      "2021-09-08 02:10:44,729  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:10:44,729  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:10:44,730  - patience: \"3\"\n",
-      "2021-09-08 02:10:44,730  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:10:44,730  - max_epochs: \"10\"\n",
-      "2021-09-08 02:10:44,731  - shuffle: \"True\"\n",
-      "2021-09-08 02:10:44,731  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:10:44,731  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:10:44,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:44,732 Model training base path: \"None\"\n",
-      "2021-09-08 02:10:44,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:44,734 Device: cuda:1\n",
-      "2021-09-08 02:10:44,734 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:44,734 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:10:44,795 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:45,182 epoch 1 - iter 7/73 - loss 0.58352097 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 02:10:45,538 epoch 1 - iter 14/73 - loss 0.30145771 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 02:10:45,910 epoch 1 - iter 21/73 - loss 0.35561652 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 02:10:46,277 epoch 1 - iter 28/73 - loss 0.31579623 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 02:10:46,638 epoch 1 - iter 35/73 - loss 0.47155836 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 02:10:47,107 epoch 1 - iter 42/73 - loss 0.43960441 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 02:10:47,593 epoch 1 - iter 49/73 - loss 0.43594445 - samples/sec: 14.41 - lr: 0.020000\n",
-      "2021-09-08 02:10:48,278 epoch 1 - iter 56/73 - loss 0.50083079 - samples/sec: 10.24 - lr: 0.020000\n",
-      "2021-09-08 02:10:48,699 epoch 1 - iter 63/73 - loss 0.48774130 - samples/sec: 16.65 - lr: 0.020000\n",
-      "2021-09-08 02:10:49,145 epoch 1 - iter 70/73 - loss 0.51207569 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 02:10:49,310 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:49,310 EPOCH 1 done: loss 0.4984 - lr 0.0200000\n",
-      "2021-09-08 02:10:49,514 DEV : loss 0.5118496417999268 - score 0.375\n",
-      "2021-09-08 02:10:49,515 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:23:16,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:16,083 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 22:23:16,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:16,084 Parameters:\n",
+      "2021-09-21 22:23:16,084  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:23:16,084  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:23:16,084  - patience: \"3\"\n",
+      "2021-09-21 22:23:16,085  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:23:16,085  - max_epochs: \"10\"\n",
+      "2021-09-21 22:23:16,085  - shuffle: \"True\"\n",
+      "2021-09-21 22:23:16,086  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:23:16,086  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:23:16,086 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:16,086 Model training base path: \"None\"\n",
+      "2021-09-21 22:23:16,087 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:16,087 Device: cuda:0\n",
+      "2021-09-21 22:23:16,087 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:16,088 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:23:16,094 ----------------------------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 22:23:16,537 epoch 1 - iter 7/73 - loss 0.95919188 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 22:23:16,970 epoch 1 - iter 14/73 - loss 0.64695363 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 22:23:17,297 epoch 1 - iter 21/73 - loss 0.53110411 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 22:23:17,654 epoch 1 - iter 28/73 - loss 0.57603645 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 22:23:18,144 epoch 1 - iter 35/73 - loss 0.62369392 - samples/sec: 14.31 - lr: 0.020000\n",
+      "2021-09-21 22:23:18,468 epoch 1 - iter 42/73 - loss 0.57974431 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 22:23:18,883 epoch 1 - iter 49/73 - loss 0.58478037 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 22:23:19,281 epoch 1 - iter 56/73 - loss 0.60216298 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 22:23:19,683 epoch 1 - iter 63/73 - loss 0.57666358 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 22:23:20,090 epoch 1 - iter 70/73 - loss 0.62695859 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 22:23:20,397 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:20,397 EPOCH 1 done: loss 0.6102 - lr 0.0200000\n",
+      "2021-09-21 22:23:20,892 DEV : loss 0.41448143124580383 - score 0.25\n",
+      "2021-09-21 22:23:20,893 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:23:25,075 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:25,604 epoch 2 - iter 7/73 - loss 0.62612782 - samples/sec: 13.66 - lr: 0.020000\n",
+      "2021-09-21 22:23:25,932 epoch 2 - iter 14/73 - loss 0.59566648 - samples/sec: 21.37 - lr: 0.020000\n",
+      "2021-09-21 22:23:26,278 epoch 2 - iter 21/73 - loss 0.59435953 - samples/sec: 20.27 - lr: 0.020000\n",
+      "2021-09-21 22:23:26,759 epoch 2 - iter 28/73 - loss 0.55172654 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 22:23:27,160 epoch 2 - iter 35/73 - loss 0.49746503 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 22:23:27,643 epoch 2 - iter 42/73 - loss 0.43557178 - samples/sec: 14.50 - lr: 0.020000\n",
+      "2021-09-21 22:23:27,982 epoch 2 - iter 49/73 - loss 0.44820086 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 22:23:28,354 epoch 2 - iter 56/73 - loss 0.49490749 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 22:23:28,698 epoch 2 - iter 63/73 - loss 0.47067864 - samples/sec: 20.34 - lr: 0.020000\n",
+      "2021-09-21 22:23:29,097 epoch 2 - iter 70/73 - loss 0.45121679 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 22:23:29,288 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:29,288 EPOCH 2 done: loss 0.4343 - lr 0.0200000\n",
+      "2021-09-21 22:23:29,768 DEV : loss 0.47719916701316833 - score 0.375\n",
+      "2021-09-21 22:23:29,770 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:23:34,328 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:34,696 epoch 3 - iter 7/73 - loss 0.13946222 - samples/sec: 19.92 - lr: 0.020000\n",
+      "2021-09-21 22:23:35,188 epoch 3 - iter 14/73 - loss 0.10106360 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 22:23:35,580 epoch 3 - iter 21/73 - loss 0.16625739 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 22:23:35,953 epoch 3 - iter 28/73 - loss 0.14618301 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 22:23:36,479 epoch 3 - iter 35/73 - loss 0.14052648 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 22:23:36,839 epoch 3 - iter 42/73 - loss 0.16471865 - samples/sec: 19.47 - lr: 0.020000\n",
+      "2021-09-21 22:23:37,176 epoch 3 - iter 49/73 - loss 0.17669273 - samples/sec: 20.80 - lr: 0.020000\n",
+      "2021-09-21 22:23:37,493 epoch 3 - iter 56/73 - loss 0.19882782 - samples/sec: 22.10 - lr: 0.020000\n",
+      "2021-09-21 22:23:37,929 epoch 3 - iter 63/73 - loss 0.18980466 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 22:23:38,312 epoch 3 - iter 70/73 - loss 0.20616470 - samples/sec: 18.26 - lr: 0.020000\n",
+      "2021-09-21 22:23:38,455 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:38,456 EPOCH 3 done: loss 0.2472 - lr 0.0200000\n",
+      "2021-09-21 22:23:39,078 DEV : loss 0.546220064163208 - score 0.375\n",
+      "2021-09-21 22:23:39,080 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:23:39,082 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:39,441 epoch 4 - iter 7/73 - loss 0.37943507 - samples/sec: 20.27 - lr: 0.020000\n",
+      "2021-09-21 22:23:39,761 epoch 4 - iter 14/73 - loss 0.19949404 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 22:23:40,215 epoch 4 - iter 21/73 - loss 0.24504172 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 22:23:40,524 epoch 4 - iter 28/73 - loss 0.18589568 - samples/sec: 22.70 - lr: 0.020000\n",
+      "2021-09-21 22:23:40,967 epoch 4 - iter 35/73 - loss 0.26775985 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 22:23:41,318 epoch 4 - iter 42/73 - loss 0.29100273 - samples/sec: 20.00 - lr: 0.020000\n",
+      "2021-09-21 22:23:41,646 epoch 4 - iter 49/73 - loss 0.26676620 - samples/sec: 21.33 - lr: 0.020000\n",
+      "2021-09-21 22:23:42,056 epoch 4 - iter 56/73 - loss 0.26814153 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 22:23:42,515 epoch 4 - iter 63/73 - loss 0.25246750 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 22:23:43,008 epoch 4 - iter 70/73 - loss 0.24040854 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 22:23:43,175 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:43,175 EPOCH 4 done: loss 0.2307 - lr 0.0200000\n",
+      "2021-09-21 22:23:43,652 DEV : loss 0.4873404800891876 - score 0.5\n",
+      "2021-09-21 22:23:43,654 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:10:53,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:54,117 epoch 2 - iter 7/73 - loss 0.49984439 - samples/sec: 14.18 - lr: 0.020000\n",
-      "2021-09-08 02:10:54,590 epoch 2 - iter 14/73 - loss 0.67336303 - samples/sec: 14.83 - lr: 0.020000\n",
-      "2021-09-08 02:10:54,926 epoch 2 - iter 21/73 - loss 0.58838933 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 02:10:55,286 epoch 2 - iter 28/73 - loss 0.62721412 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 02:10:55,668 epoch 2 - iter 35/73 - loss 0.56889311 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:10:56,187 epoch 2 - iter 42/73 - loss 0.52804077 - samples/sec: 13.49 - lr: 0.020000\n",
-      "2021-09-08 02:10:56,570 epoch 2 - iter 49/73 - loss 0.51197601 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 02:10:56,941 epoch 2 - iter 56/73 - loss 0.50403193 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 02:10:57,415 epoch 2 - iter 63/73 - loss 0.48375467 - samples/sec: 14.78 - lr: 0.020000\n",
-      "2021-09-08 02:10:57,875 epoch 2 - iter 70/73 - loss 0.47100698 - samples/sec: 15.24 - lr: 0.020000\n",
-      "2021-09-08 02:10:58,051 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:58,051 EPOCH 2 done: loss 0.4703 - lr 0.0200000\n",
-      "2021-09-08 02:10:58,257 DEV : loss 0.3392651379108429 - score 0.625\n",
-      "2021-09-08 02:10:58,258 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:23:47,711 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:48,071 epoch 5 - iter 7/73 - loss 0.00863220 - samples/sec: 20.39 - lr: 0.020000\n",
+      "2021-09-21 22:23:48,488 epoch 5 - iter 14/73 - loss 0.00538804 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 22:23:48,896 epoch 5 - iter 21/73 - loss 0.02149492 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 22:23:49,237 epoch 5 - iter 28/73 - loss 0.01633451 - samples/sec: 20.57 - lr: 0.020000\n",
+      "2021-09-21 22:23:49,610 epoch 5 - iter 35/73 - loss 0.08870501 - samples/sec: 18.76 - lr: 0.020000\n",
+      "2021-09-21 22:23:50,027 epoch 5 - iter 42/73 - loss 0.07492725 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 22:23:50,354 epoch 5 - iter 49/73 - loss 0.06441015 - samples/sec: 21.48 - lr: 0.020000\n",
+      "2021-09-21 22:23:50,721 epoch 5 - iter 56/73 - loss 0.05645361 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 22:23:51,224 epoch 5 - iter 63/73 - loss 0.12378614 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 22:23:51,600 epoch 5 - iter 70/73 - loss 0.13584369 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 22:23:51,794 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:51,795 EPOCH 5 done: loss 0.1304 - lr 0.0200000\n",
+      "2021-09-21 22:23:52,426 DEV : loss 0.3410467505455017 - score 0.625\n",
+      "2021-09-21 22:23:52,428 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:11:02,825 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:03,217 epoch 3 - iter 7/73 - loss 0.27122019 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 02:11:03,625 epoch 3 - iter 14/73 - loss 0.26504042 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 02:11:03,957 epoch 3 - iter 21/73 - loss 0.28782828 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 02:11:04,444 epoch 3 - iter 28/73 - loss 0.32498976 - samples/sec: 14.38 - lr: 0.020000\n",
-      "2021-09-08 02:11:04,951 epoch 3 - iter 35/73 - loss 0.29430609 - samples/sec: 13.82 - lr: 0.020000\n",
-      "2021-09-08 02:11:05,377 epoch 3 - iter 42/73 - loss 0.24603099 - samples/sec: 16.45 - lr: 0.020000\n",
-      "2021-09-08 02:11:05,787 epoch 3 - iter 49/73 - loss 0.23121673 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 02:11:06,168 epoch 3 - iter 56/73 - loss 0.26220269 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:11:06,553 epoch 3 - iter 63/73 - loss 0.24061952 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 02:11:07,025 epoch 3 - iter 70/73 - loss 0.23017688 - samples/sec: 14.83 - lr: 0.020000\n",
-      "2021-09-08 02:11:07,161 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:07,161 EPOCH 3 done: loss 0.2207 - lr 0.0200000\n",
-      "2021-09-08 02:11:07,364 DEV : loss 0.8107700943946838 - score 0.5\n",
-      "2021-09-08 02:11:07,365 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:11:07,367 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:07,748 epoch 4 - iter 7/73 - loss 0.43299687 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 02:11:08,239 epoch 4 - iter 14/73 - loss 0.26339278 - samples/sec: 14.27 - lr: 0.020000\n",
-      "2021-09-08 02:11:08,625 epoch 4 - iter 21/73 - loss 0.22457328 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 02:11:09,013 epoch 4 - iter 28/73 - loss 0.26383247 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 02:11:09,433 epoch 4 - iter 35/73 - loss 0.21478237 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 02:11:09,814 epoch 4 - iter 42/73 - loss 0.25263855 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 02:11:10,137 epoch 4 - iter 49/73 - loss 0.30970732 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 02:11:10,638 epoch 4 - iter 56/73 - loss 0.29470938 - samples/sec: 13.98 - lr: 0.020000\n",
-      "2021-09-08 02:11:11,115 epoch 4 - iter 63/73 - loss 0.26239545 - samples/sec: 14.69 - lr: 0.020000\n",
-      "2021-09-08 02:11:11,449 epoch 4 - iter 70/73 - loss 0.29160054 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 02:11:11,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:11,689 EPOCH 4 done: loss 0.3010 - lr 0.0200000\n",
-      "2021-09-08 02:11:11,893 DEV : loss 0.3616025745868683 - score 0.5\n",
-      "2021-09-08 02:11:11,894 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:11:11,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:12,358 epoch 5 - iter 7/73 - loss 0.04100475 - samples/sec: 15.64 - lr: 0.020000\n",
-      "2021-09-08 02:11:12,756 epoch 5 - iter 14/73 - loss 0.07524351 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 02:11:13,309 epoch 5 - iter 21/73 - loss 0.12535382 - samples/sec: 12.66 - lr: 0.020000\n",
-      "2021-09-08 02:11:13,925 epoch 5 - iter 28/73 - loss 0.09820822 - samples/sec: 11.37 - lr: 0.020000\n",
-      "2021-09-08 02:11:14,305 epoch 5 - iter 35/73 - loss 0.08084282 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 02:11:14,729 epoch 5 - iter 42/73 - loss 0.10148260 - samples/sec: 16.54 - lr: 0.020000\n",
-      "2021-09-08 02:11:15,088 epoch 5 - iter 49/73 - loss 0.17076536 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 02:11:15,517 epoch 5 - iter 56/73 - loss 0.17975466 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 02:11:15,860 epoch 5 - iter 63/73 - loss 0.16567850 - samples/sec: 20.48 - lr: 0.020000\n",
-      "2021-09-08 02:11:16,242 epoch 5 - iter 70/73 - loss 0.15011494 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 02:11:16,381 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:23:56,300 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:23:56,771 epoch 6 - iter 7/73 - loss 0.25156961 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 22:23:57,171 epoch 6 - iter 14/73 - loss 0.12766548 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 22:23:57,648 epoch 6 - iter 21/73 - loss 0.08624626 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 22:23:58,078 epoch 6 - iter 28/73 - loss 0.09008733 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 22:23:58,417 epoch 6 - iter 35/73 - loss 0.07337510 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 22:23:58,811 epoch 6 - iter 42/73 - loss 0.10234807 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 22:23:59,178 epoch 6 - iter 49/73 - loss 0.09817538 - samples/sec: 19.10 - lr: 0.020000\n",
+      "2021-09-21 22:23:59,575 epoch 6 - iter 56/73 - loss 0.08664043 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 22:23:59,902 epoch 6 - iter 63/73 - loss 0.12357284 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 22:24:00,263 epoch 6 - iter 70/73 - loss 0.12365065 - samples/sec: 19.41 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:11:16,381 EPOCH 5 done: loss 0.1605 - lr 0.0200000\n",
-      "2021-09-08 02:11:16,590 DEV : loss 0.40416526794433594 - score 0.5\n",
-      "2021-09-08 02:11:16,591 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:11:16,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:17,099 epoch 6 - iter 7/73 - loss 0.00430496 - samples/sec: 14.69 - lr: 0.020000\n",
-      "2021-09-08 02:11:17,466 epoch 6 - iter 14/73 - loss 0.00404168 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 02:11:17,851 epoch 6 - iter 21/73 - loss 0.00511784 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 02:11:18,231 epoch 6 - iter 28/73 - loss 0.00414192 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 02:11:18,663 epoch 6 - iter 35/73 - loss 0.02967942 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 02:11:19,133 epoch 6 - iter 42/73 - loss 0.02820404 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 02:11:19,488 epoch 6 - iter 49/73 - loss 0.02436646 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 02:11:19,902 epoch 6 - iter 56/73 - loss 0.03207761 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 02:11:20,395 epoch 6 - iter 63/73 - loss 0.06066096 - samples/sec: 14.21 - lr: 0.020000\n",
-      "2021-09-08 02:11:20,722 epoch 6 - iter 70/73 - loss 0.05536391 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 02:11:20,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:20,897 EPOCH 6 done: loss 0.0532 - lr 0.0200000\n",
-      "2021-09-08 02:11:21,102 DEV : loss 0.8147181868553162 - score 0.5\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:11:21,103 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:11:21,106 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:21,503 epoch 7 - iter 7/73 - loss 0.00057325 - samples/sec: 18.28 - lr: 0.010000\n",
-      "2021-09-08 02:11:21,897 epoch 7 - iter 14/73 - loss 0.06880820 - samples/sec: 17.83 - lr: 0.010000\n",
-      "2021-09-08 02:11:22,250 epoch 7 - iter 21/73 - loss 0.08716593 - samples/sec: 19.84 - lr: 0.010000\n",
-      "2021-09-08 02:11:22,612 epoch 7 - iter 28/73 - loss 0.06561027 - samples/sec: 19.35 - lr: 0.010000\n",
-      "2021-09-08 02:11:23,033 epoch 7 - iter 35/73 - loss 0.05406560 - samples/sec: 16.68 - lr: 0.010000\n",
-      "2021-09-08 02:11:23,545 epoch 7 - iter 42/73 - loss 0.08804063 - samples/sec: 13.67 - lr: 0.010000\n",
-      "2021-09-08 02:11:23,880 epoch 7 - iter 49/73 - loss 0.07572777 - samples/sec: 20.97 - lr: 0.010000\n",
-      "2021-09-08 02:11:24,395 epoch 7 - iter 56/73 - loss 0.06836450 - samples/sec: 13.60 - lr: 0.010000\n",
-      "2021-09-08 02:11:24,732 epoch 7 - iter 63/73 - loss 0.06287074 - samples/sec: 20.82 - lr: 0.010000\n",
-      "2021-09-08 02:11:25,205 epoch 7 - iter 70/73 - loss 0.09711331 - samples/sec: 14.82 - lr: 0.010000\n",
-      "2021-09-08 02:11:25,408 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:25,408 EPOCH 7 done: loss 0.0932 - lr 0.0100000\n",
-      "2021-09-08 02:11:25,707 DEV : loss 0.8091738224029541 - score 0.5\n",
-      "2021-09-08 02:11:25,708 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:11:25,713 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:26,078 epoch 8 - iter 7/73 - loss 0.04779542 - samples/sec: 20.01 - lr: 0.010000\n",
-      "2021-09-08 02:11:26,542 epoch 8 - iter 14/73 - loss 0.03919148 - samples/sec: 15.11 - lr: 0.010000\n",
-      "2021-09-08 02:11:27,044 epoch 8 - iter 21/73 - loss 0.02647840 - samples/sec: 13.96 - lr: 0.010000\n",
-      "2021-09-08 02:11:27,399 epoch 8 - iter 28/73 - loss 0.10634374 - samples/sec: 19.76 - lr: 0.010000\n",
-      "2021-09-08 02:11:27,848 epoch 8 - iter 35/73 - loss 0.08536292 - samples/sec: 15.62 - lr: 0.010000\n",
-      "2021-09-08 02:11:28,207 epoch 8 - iter 42/73 - loss 0.07971199 - samples/sec: 19.50 - lr: 0.010000\n",
-      "2021-09-08 02:11:28,586 epoch 8 - iter 49/73 - loss 0.06853857 - samples/sec: 18.53 - lr: 0.010000\n",
-      "2021-09-08 02:11:29,017 epoch 8 - iter 56/73 - loss 0.06013625 - samples/sec: 16.23 - lr: 0.010000\n",
-      "2021-09-08 02:11:29,367 epoch 8 - iter 63/73 - loss 0.05378603 - samples/sec: 20.04 - lr: 0.010000\n",
-      "2021-09-08 02:11:29,834 epoch 8 - iter 70/73 - loss 0.04853954 - samples/sec: 15.00 - lr: 0.010000\n",
-      "2021-09-08 02:11:29,990 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:29,991 EPOCH 8 done: loss 0.0466 - lr 0.0100000\n",
-      "2021-09-08 02:11:30,196 DEV : loss 0.7244435548782349 - score 0.375\n",
-      "2021-09-08 02:11:30,197 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:11:30,203 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:30,576 epoch 9 - iter 7/73 - loss 0.00052133 - samples/sec: 19.49 - lr: 0.010000\n",
-      "2021-09-08 02:11:31,190 epoch 9 - iter 14/73 - loss 0.00070014 - samples/sec: 11.42 - lr: 0.010000\n",
-      "2021-09-08 02:11:31,662 epoch 9 - iter 21/73 - loss 0.00064790 - samples/sec: 14.84 - lr: 0.010000\n",
-      "2021-09-08 02:11:32,124 epoch 9 - iter 28/73 - loss 0.00057917 - samples/sec: 15.19 - lr: 0.010000\n",
-      "2021-09-08 02:11:32,635 epoch 9 - iter 35/73 - loss 0.00090260 - samples/sec: 13.70 - lr: 0.010000\n",
-      "2021-09-08 02:11:32,993 epoch 9 - iter 42/73 - loss 0.00455103 - samples/sec: 19.62 - lr: 0.010000\n",
-      "2021-09-08 02:11:33,357 epoch 9 - iter 49/73 - loss 0.00410325 - samples/sec: 19.24 - lr: 0.010000\n",
-      "2021-09-08 02:11:33,700 epoch 9 - iter 56/73 - loss 0.00376036 - samples/sec: 20.48 - lr: 0.010000\n",
-      "2021-09-08 02:11:34,105 epoch 9 - iter 63/73 - loss 0.00340170 - samples/sec: 17.28 - lr: 0.010000\n",
-      "2021-09-08 02:11:34,513 epoch 9 - iter 70/73 - loss 0.00329731 - samples/sec: 17.22 - lr: 0.010000\n",
-      "2021-09-08 02:11:34,646 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:34,646 EPOCH 9 done: loss 0.0032 - lr 0.0100000\n",
-      "2021-09-08 02:11:34,849 DEV : loss 0.7897816896438599 - score 0.375\n",
-      "2021-09-08 02:11:34,850 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:11:34,852 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:35,294 epoch 10 - iter 7/73 - loss 0.00062350 - samples/sec: 16.38 - lr: 0.010000\n",
-      "2021-09-08 02:11:35,671 epoch 10 - iter 14/73 - loss 0.00046583 - samples/sec: 18.60 - lr: 0.010000\n",
-      "2021-09-08 02:11:36,085 epoch 10 - iter 21/73 - loss 0.00070759 - samples/sec: 16.91 - lr: 0.010000\n",
-      "2021-09-08 02:11:36,444 epoch 10 - iter 28/73 - loss 0.00074447 - samples/sec: 19.55 - lr: 0.010000\n",
-      "2021-09-08 02:11:36,857 epoch 10 - iter 35/73 - loss 0.00070526 - samples/sec: 16.96 - lr: 0.010000\n",
-      "2021-09-08 02:11:37,323 epoch 10 - iter 42/73 - loss 0.00065256 - samples/sec: 15.05 - lr: 0.010000\n",
-      "2021-09-08 02:11:37,774 epoch 10 - iter 49/73 - loss 0.00157546 - samples/sec: 15.52 - lr: 0.010000\n",
-      "2021-09-08 02:11:38,101 epoch 10 - iter 56/73 - loss 0.00143505 - samples/sec: 21.49 - lr: 0.010000\n",
-      "2021-09-08 02:11:38,431 epoch 10 - iter 63/73 - loss 0.00132006 - samples/sec: 21.20 - lr: 0.010000\n",
-      "2021-09-08 02:11:38,864 epoch 10 - iter 70/73 - loss 0.00122154 - samples/sec: 16.21 - lr: 0.010000\n",
-      "2021-09-08 02:11:39,108 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:39,108 EPOCH 10 done: loss 0.0012 - lr 0.0100000\n",
-      "2021-09-08 02:11:39,310 DEV : loss 0.8150217533111572 - score 0.375\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 02:11:39,311 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:11:43,408 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:43,409 Testing using best model ...\n",
-      "2021-09-08 02:11:43,410 loading file None/best-model.pt\n",
+      "2021-09-21 22:24:00,393 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:00,393 EPOCH 6 done: loss 0.1186 - lr 0.0200000\n",
+      "2021-09-21 22:24:00,874 DEV : loss 0.35328391194343567 - score 0.625\n",
+      "2021-09-21 22:24:00,876 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:24:00,878 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:01,434 epoch 7 - iter 7/73 - loss 0.00371233 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 22:24:01,758 epoch 7 - iter 14/73 - loss 0.05299431 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 22:24:02,196 epoch 7 - iter 21/73 - loss 0.05851414 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 22:24:02,591 epoch 7 - iter 28/73 - loss 0.04724238 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 22:24:03,056 epoch 7 - iter 35/73 - loss 0.03809984 - samples/sec: 15.05 - lr: 0.020000\n",
+      "2021-09-21 22:24:03,390 epoch 7 - iter 42/73 - loss 0.03186726 - samples/sec: 20.99 - lr: 0.020000\n",
+      "2021-09-21 22:24:03,779 epoch 7 - iter 49/73 - loss 0.04853930 - samples/sec: 18.05 - lr: 0.020000\n",
+      "2021-09-21 22:24:04,113 epoch 7 - iter 56/73 - loss 0.04277693 - samples/sec: 20.96 - lr: 0.020000\n",
+      "2021-09-21 22:24:04,491 epoch 7 - iter 63/73 - loss 0.03828381 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 22:24:04,803 epoch 7 - iter 70/73 - loss 0.05592362 - samples/sec: 22.54 - lr: 0.020000\n",
+      "2021-09-21 22:24:04,944 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:04,945 EPOCH 7 done: loss 0.0537 - lr 0.0200000\n",
+      "2021-09-21 22:24:05,594 DEV : loss 0.4248224198818207 - score 0.625\n",
+      "2021-09-21 22:24:05,596 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:24:05,598 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:05,957 epoch 8 - iter 7/73 - loss 0.13616064 - samples/sec: 20.39 - lr: 0.020000\n",
+      "2021-09-21 22:24:06,313 epoch 8 - iter 14/73 - loss 0.06836942 - samples/sec: 19.68 - lr: 0.020000\n",
+      "2021-09-21 22:24:06,634 epoch 8 - iter 21/73 - loss 0.04571751 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 22:24:07,083 epoch 8 - iter 28/73 - loss 0.03440597 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 22:24:07,425 epoch 8 - iter 35/73 - loss 0.02797685 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 22:24:07,873 epoch 8 - iter 42/73 - loss 0.04276378 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 22:24:08,245 epoch 8 - iter 49/73 - loss 0.03670402 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 22:24:08,645 epoch 8 - iter 56/73 - loss 0.03220276 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 22:24:09,049 epoch 8 - iter 63/73 - loss 0.02867935 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 22:24:09,444 epoch 8 - iter 70/73 - loss 0.02587010 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 22:24:09,638 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:09,639 EPOCH 8 done: loss 0.0308 - lr 0.0200000\n",
+      "2021-09-21 22:24:10,116 DEV : loss 0.6946208477020264 - score 0.5\n",
+      "2021-09-21 22:24:10,118 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:24:10,132 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:10,501 epoch 9 - iter 7/73 - loss 0.00083523 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 22:24:10,807 epoch 9 - iter 14/73 - loss 0.00065608 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 22:24:11,241 epoch 9 - iter 21/73 - loss 0.00053959 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 22:24:11,602 epoch 9 - iter 28/73 - loss 0.00051275 - samples/sec: 19.41 - lr: 0.020000\n",
+      "2021-09-21 22:24:11,955 epoch 9 - iter 35/73 - loss 0.00046283 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 22:24:12,340 epoch 9 - iter 42/73 - loss 0.00048104 - samples/sec: 18.20 - lr: 0.020000\n",
+      "2021-09-21 22:24:12,763 epoch 9 - iter 49/73 - loss 0.00053616 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 22:24:13,094 epoch 9 - iter 56/73 - loss 0.00052151 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 22:24:13,604 epoch 9 - iter 63/73 - loss 0.01507016 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 22:24:13,925 epoch 9 - iter 70/73 - loss 0.01359105 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 22:24:14,165 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:14,165 EPOCH 9 done: loss 0.0130 - lr 0.0200000\n",
+      "2021-09-21 22:24:14,686 DEV : loss 0.5982265472412109 - score 0.625\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:24:14,688 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:24:14,691 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:15,037 epoch 10 - iter 7/73 - loss 0.00022988 - samples/sec: 21.14 - lr: 0.010000\n",
+      "2021-09-21 22:24:15,453 epoch 10 - iter 14/73 - loss 0.00022494 - samples/sec: 16.85 - lr: 0.010000\n",
+      "2021-09-21 22:24:15,865 epoch 10 - iter 21/73 - loss 0.00027628 - samples/sec: 17.01 - lr: 0.010000\n",
+      "2021-09-21 22:24:16,191 epoch 10 - iter 28/73 - loss 0.00047301 - samples/sec: 21.53 - lr: 0.010000\n",
+      "2021-09-21 22:24:16,553 epoch 10 - iter 35/73 - loss 0.00042877 - samples/sec: 19.37 - lr: 0.010000\n",
+      "2021-09-21 22:24:17,015 epoch 10 - iter 42/73 - loss 0.00041074 - samples/sec: 15.14 - lr: 0.010000\n",
+      "2021-09-21 22:24:17,406 epoch 10 - iter 49/73 - loss 0.00042865 - samples/sec: 17.93 - lr: 0.010000\n",
+      "2021-09-21 22:24:17,798 epoch 10 - iter 56/73 - loss 0.00764540 - samples/sec: 17.90 - lr: 0.010000\n",
+      "2021-09-21 22:24:18,352 epoch 10 - iter 63/73 - loss 0.00681753 - samples/sec: 12.65 - lr: 0.010000\n",
+      "2021-09-21 22:24:18,737 epoch 10 - iter 70/73 - loss 0.00623006 - samples/sec: 18.24 - lr: 0.010000\n",
+      "2021-09-21 22:24:18,866 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:18,866 EPOCH 10 done: loss 0.0060 - lr 0.0100000\n",
+      "2021-09-21 22:24:19,370 DEV : loss 0.6289634108543396 - score 0.625\n",
+      "2021-09-21 22:24:19,371 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:24:23,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:23,497 Testing using best model ...\n",
+      "2021-09-21 22:24:23,498 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:11:48,693 \t0.0\n",
-      "2021-09-08 02:11:48,694 \n",
+      "2021-09-21 22:24:27,814 \t0.7778\n",
+      "2021-09-21 22:24:27,815 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.7778\n",
+      "- F-score (macro) 0.5926\n",
+      "- Accuracy 0.7778\n",
       "\n",
       "By class:\n",
-      "                                               precision    recall  f1-score   support\n",
+      "                                                                                  precision    recall  f1-score   support\n",
       "\n",
-      "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
-      "                this text is about technology     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about women     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
+      "                                                   a social unit living together     1.0000    1.0000    1.0000         1\n",
+      "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         1\n",
+      "                      an activity that is diverting and that holds the attention     1.0000    1.0000    1.0000         1\n",
+      "                             a machine for performing calculations automatically     0.5000    1.0000    0.6667         1\n",
+      "                                  knowledge acquired by learning and instruction     1.0000    1.0000    1.0000         1\n",
+      "                 an active diversion requiring physical exertion and competition     0.5000    1.0000    0.6667         1\n",
+      "an extended social group having a distinctive cultural and economic organization     1.0000    1.0000    1.0000         2\n",
+      "                          the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         1\n",
+      "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                    micro avg     0.0000    0.0000    0.0000         0\n",
-      "                                    macro avg     0.0000    0.0000    0.0000         0\n",
-      "                                 weighted avg     0.0000    0.0000    0.0000         0\n",
-      "                                  samples avg     0.0000    0.0000    0.0000         0\n",
+      "                                                                       micro avg     0.7778    0.7778    0.7778         9\n",
+      "                                                                       macro avg     0.5556    0.6667    0.5926         9\n",
+      "                                                                    weighted avg     0.6667    0.7778    0.7037         9\n",
+      "                                                                     samples avg     0.7778    0.7778    0.7778         9\n",
       "\n"
      ]
     },
@@ -6397,26 +6416,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:11:48,694 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:04,683 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 22:24:27,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:43,666 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:12:08,637 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:24:47,686 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 14671.65it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 14359.86it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:12:08,644 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
-      "2021-09-08 02:12:08,655 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:08,657 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:24:47,694 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
+      "2021-09-21 22:24:47,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:47,706 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6729,25 +6748,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:12:08,657 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:08,658 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 02:12:08,658 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:08,658 Parameters:\n",
-      "2021-09-08 02:12:08,659  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:12:08,659  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:12:08,659  - patience: \"3\"\n",
-      "2021-09-08 02:12:08,660  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:12:08,660  - max_epochs: \"10\"\n",
-      "2021-09-08 02:12:08,660  - shuffle: \"True\"\n",
-      "2021-09-08 02:12:08,660  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:12:08,661  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:12:08,661 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:08,661 Model training base path: \"None\"\n",
-      "2021-09-08 02:12:08,662 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:08,662 Device: cuda:1\n",
-      "2021-09-08 02:12:08,662 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:08,663 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:12:08,669 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:24:47,706 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:47,707 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 22:24:47,707 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:47,707 Parameters:\n",
+      "2021-09-21 22:24:47,708  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:24:47,708  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:24:47,708  - patience: \"3\"\n",
+      "2021-09-21 22:24:47,708  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:24:47,709  - max_epochs: \"10\"\n",
+      "2021-09-21 22:24:47,709  - shuffle: \"True\"\n",
+      "2021-09-21 22:24:47,709  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:24:47,710  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:24:47,710 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:47,710 Model training base path: \"None\"\n",
+      "2021-09-21 22:24:47,710 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:47,711 Device: cuda:0\n",
+      "2021-09-21 22:24:47,711 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:47,711 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:24:47,718 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -6761,220 +6780,220 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:12:09,223 epoch 1 - iter 7/73 - loss 0.85486794 - samples/sec: 13.02 - lr: 0.020000\n",
-      "2021-09-08 02:12:09,634 epoch 1 - iter 14/73 - loss 0.48395649 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 02:12:10,036 epoch 1 - iter 21/73 - loss 0.47890111 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 02:12:10,399 epoch 1 - iter 28/73 - loss 0.45720029 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 02:12:10,807 epoch 1 - iter 35/73 - loss 0.41005548 - samples/sec: 17.19 - lr: 0.020000\n",
-      "2021-09-08 02:12:11,161 epoch 1 - iter 42/73 - loss 0.42607031 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 02:12:11,479 epoch 1 - iter 49/73 - loss 0.45691179 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 02:12:12,130 epoch 1 - iter 56/73 - loss 0.42773051 - samples/sec: 10.77 - lr: 0.020000\n",
-      "2021-09-08 02:12:12,486 epoch 1 - iter 63/73 - loss 0.42311764 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 02:12:12,935 epoch 1 - iter 70/73 - loss 0.41709722 - samples/sec: 15.62 - lr: 0.020000\n",
-      "2021-09-08 02:12:13,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:13,079 EPOCH 1 done: loss 0.4141 - lr 0.0200000\n",
-      "2021-09-08 02:12:13,471 DEV : loss 0.2998962998390198 - score 0.5\n",
-      "2021-09-08 02:12:13,473 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:24:48,212 epoch 1 - iter 7/73 - loss 0.80298648 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 22:24:48,578 epoch 1 - iter 14/73 - loss 0.65073503 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 22:24:49,060 epoch 1 - iter 21/73 - loss 0.55236590 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 22:24:49,389 epoch 1 - iter 28/73 - loss 0.55364660 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 22:24:49,877 epoch 1 - iter 35/73 - loss 0.61890564 - samples/sec: 14.35 - lr: 0.020000\n",
+      "2021-09-21 22:24:50,232 epoch 1 - iter 42/73 - loss 0.55097125 - samples/sec: 19.79 - lr: 0.020000\n",
+      "2021-09-21 22:24:50,730 epoch 1 - iter 49/73 - loss 0.57942198 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 22:24:51,208 epoch 1 - iter 56/73 - loss 0.57648224 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 22:24:51,540 epoch 1 - iter 63/73 - loss 0.56292899 - samples/sec: 21.17 - lr: 0.020000\n",
+      "2021-09-21 22:24:52,077 epoch 1 - iter 70/73 - loss 0.55007230 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 22:24:52,257 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:52,258 EPOCH 1 done: loss 0.5284 - lr 0.0200000\n",
+      "2021-09-21 22:24:52,666 DEV : loss 0.9673811793327332 - score 0.25\n",
+      "2021-09-21 22:24:52,667 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:12:17,493 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:17,925 epoch 2 - iter 7/73 - loss 0.17145948 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 02:12:18,249 epoch 2 - iter 14/73 - loss 0.21516568 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 02:12:18,626 epoch 2 - iter 21/73 - loss 0.25996086 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 02:12:19,002 epoch 2 - iter 28/73 - loss 0.24859693 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 02:12:19,468 epoch 2 - iter 35/73 - loss 0.21555181 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 02:12:19,795 epoch 2 - iter 42/73 - loss 0.27103076 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 02:12:20,170 epoch 2 - iter 49/73 - loss 0.24985095 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 02:12:20,561 epoch 2 - iter 56/73 - loss 0.26802088 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 02:12:20,951 epoch 2 - iter 63/73 - loss 0.25467100 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 02:12:21,503 epoch 2 - iter 70/73 - loss 0.28298734 - samples/sec: 12.69 - lr: 0.020000\n",
-      "2021-09-08 02:12:21,754 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:21,754 EPOCH 2 done: loss 0.2810 - lr 0.0200000\n",
-      "2021-09-08 02:12:22,259 DEV : loss 0.23667696118354797 - score 0.75\n",
-      "2021-09-08 02:12:22,261 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:24:56,627 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:24:57,129 epoch 2 - iter 7/73 - loss 1.01542991 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 22:24:57,527 epoch 2 - iter 14/73 - loss 0.65199436 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 22:24:57,856 epoch 2 - iter 21/73 - loss 0.53459325 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 22:24:58,280 epoch 2 - iter 28/73 - loss 0.50610974 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 22:24:58,715 epoch 2 - iter 35/73 - loss 0.49570361 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 22:24:59,095 epoch 2 - iter 42/73 - loss 0.44366313 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 22:24:59,665 epoch 2 - iter 49/73 - loss 0.45032928 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 22:25:00,084 epoch 2 - iter 56/73 - loss 0.45412731 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 22:25:00,502 epoch 2 - iter 63/73 - loss 0.43750720 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 22:25:01,016 epoch 2 - iter 70/73 - loss 0.41740631 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 22:25:01,165 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:01,166 EPOCH 2 done: loss 0.4530 - lr 0.0200000\n",
+      "2021-09-21 22:25:01,575 DEV : loss 0.5287306904792786 - score 0.5\n",
+      "2021-09-21 22:25:01,576 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:12:28,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:28,702 epoch 3 - iter 7/73 - loss 0.31412417 - samples/sec: 14.74 - lr: 0.020000\n",
-      "2021-09-08 02:12:29,036 epoch 3 - iter 14/73 - loss 0.17796552 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 02:12:29,539 epoch 3 - iter 21/73 - loss 0.12050749 - samples/sec: 13.94 - lr: 0.020000\n",
-      "2021-09-08 02:12:29,965 epoch 3 - iter 28/73 - loss 0.23193865 - samples/sec: 16.45 - lr: 0.020000\n",
-      "2021-09-08 02:12:30,298 epoch 3 - iter 35/73 - loss 0.20681162 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 02:12:30,763 epoch 3 - iter 42/73 - loss 0.19694468 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 02:12:31,126 epoch 3 - iter 49/73 - loss 0.23178752 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 02:12:31,652 epoch 3 - iter 56/73 - loss 0.21468105 - samples/sec: 13.31 - lr: 0.020000\n",
-      "2021-09-08 02:12:32,059 epoch 3 - iter 63/73 - loss 0.19219782 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 02:12:32,452 epoch 3 - iter 70/73 - loss 0.17322987 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 02:12:32,608 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:32,609 EPOCH 3 done: loss 0.1662 - lr 0.0200000\n",
-      "2021-09-08 02:12:33,085 DEV : loss 0.22451446950435638 - score 0.875\n",
-      "2021-09-08 02:12:33,086 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:25:05,781 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:06,290 epoch 3 - iter 7/73 - loss 0.12174178 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 22:25:06,694 epoch 3 - iter 14/73 - loss 0.10081506 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 22:25:07,111 epoch 3 - iter 21/73 - loss 0.13145550 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 22:25:07,432 epoch 3 - iter 28/73 - loss 0.15754848 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 22:25:07,920 epoch 3 - iter 35/73 - loss 0.13020711 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 22:25:08,489 epoch 3 - iter 42/73 - loss 0.10962566 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 22:25:08,848 epoch 3 - iter 49/73 - loss 0.14179068 - samples/sec: 19.52 - lr: 0.020000\n",
+      "2021-09-21 22:25:09,293 epoch 3 - iter 56/73 - loss 0.21777403 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 22:25:09,688 epoch 3 - iter 63/73 - loss 0.23047887 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 22:25:10,219 epoch 3 - iter 70/73 - loss 0.20776704 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 22:25:10,388 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:10,388 EPOCH 3 done: loss 0.2243 - lr 0.0200000\n",
+      "2021-09-21 22:25:10,808 DEV : loss 0.45543983578681946 - score 0.5\n",
+      "2021-09-21 22:25:10,810 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:12:40,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:41,425 epoch 4 - iter 7/73 - loss 0.13183985 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 02:12:41,851 epoch 4 - iter 14/73 - loss 0.07355849 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 02:12:42,174 epoch 4 - iter 21/73 - loss 0.06655040 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 02:12:42,580 epoch 4 - iter 28/73 - loss 0.05197807 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 02:12:43,005 epoch 4 - iter 35/73 - loss 0.10524953 - samples/sec: 16.50 - lr: 0.020000\n",
-      "2021-09-08 02:12:43,422 epoch 4 - iter 42/73 - loss 0.15912978 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 02:12:43,794 epoch 4 - iter 49/73 - loss 0.13671356 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 02:12:44,318 epoch 4 - iter 56/73 - loss 0.14253143 - samples/sec: 13.37 - lr: 0.020000\n",
-      "2021-09-08 02:12:44,652 epoch 4 - iter 63/73 - loss 0.14352590 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 02:12:45,000 epoch 4 - iter 70/73 - loss 0.19520867 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 02:12:45,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:45,175 EPOCH 4 done: loss 0.1872 - lr 0.0200000\n",
-      "2021-09-08 02:12:45,562 DEV : loss 0.25716066360473633 - score 0.75\n",
-      "2021-09-08 02:12:45,564 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:12:45,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:45,917 epoch 5 - iter 7/73 - loss 0.06188044 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 02:12:46,327 epoch 5 - iter 14/73 - loss 0.03512147 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 02:12:46,664 epoch 5 - iter 21/73 - loss 0.08962453 - samples/sec: 20.82 - lr: 0.020000\n",
-      "2021-09-08 02:12:47,015 epoch 5 - iter 28/73 - loss 0.06757563 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 02:12:47,423 epoch 5 - iter 35/73 - loss 0.11127869 - samples/sec: 17.16 - lr: 0.020000\n",
-      "2021-09-08 02:12:47,744 epoch 5 - iter 42/73 - loss 0.09354886 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 02:12:48,259 epoch 5 - iter 49/73 - loss 0.09404904 - samples/sec: 13.61 - lr: 0.020000\n",
-      "2021-09-08 02:12:48,580 epoch 5 - iter 56/73 - loss 0.08277129 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 02:12:48,960 epoch 5 - iter 63/73 - loss 0.07426192 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 02:12:49,547 epoch 5 - iter 70/73 - loss 0.07250900 - samples/sec: 11.94 - lr: 0.020000\n",
-      "2021-09-08 02:12:49,705 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:49,705 EPOCH 5 done: loss 0.0696 - lr 0.0200000\n",
-      "2021-09-08 02:12:50,234 DEV : loss 0.3309642970561981 - score 0.75\n",
-      "2021-09-08 02:12:50,236 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:12:50,237 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:50,759 epoch 6 - iter 7/73 - loss 0.03003965 - samples/sec: 13.81 - lr: 0.020000\n",
-      "2021-09-08 02:12:51,213 epoch 6 - iter 14/73 - loss 0.04732226 - samples/sec: 15.43 - lr: 0.020000\n",
-      "2021-09-08 02:12:51,526 epoch 6 - iter 21/73 - loss 0.03235083 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 02:12:51,948 epoch 6 - iter 28/73 - loss 0.02929453 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 02:12:52,284 epoch 6 - iter 35/73 - loss 0.02352758 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 02:12:52,693 epoch 6 - iter 42/73 - loss 0.02017546 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 02:12:53,065 epoch 6 - iter 49/73 - loss 0.01757904 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 02:12:53,436 epoch 6 - iter 56/73 - loss 0.05157986 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 02:12:53,898 epoch 6 - iter 63/73 - loss 0.04596899 - samples/sec: 15.16 - lr: 0.020000\n",
-      "2021-09-08 02:12:54,236 epoch 6 - iter 70/73 - loss 0.04333656 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 02:12:54,385 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:25:14,803 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:15,146 epoch 4 - iter 7/73 - loss 0.06655467 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 22:25:15,663 epoch 4 - iter 14/73 - loss 0.04544672 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 22:25:16,154 epoch 4 - iter 21/73 - loss 0.05631074 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 22:25:16,513 epoch 4 - iter 28/73 - loss 0.04446956 - samples/sec: 19.53 - lr: 0.020000\n",
+      "2021-09-21 22:25:16,979 epoch 4 - iter 35/73 - loss 0.21695774 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 22:25:17,568 epoch 4 - iter 42/73 - loss 0.19874600 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 22:25:17,879 epoch 4 - iter 49/73 - loss 0.19881534 - samples/sec: 22.50 - lr: 0.020000\n",
+      "2021-09-21 22:25:18,366 epoch 4 - iter 56/73 - loss 0.20416286 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 22:25:18,671 epoch 4 - iter 63/73 - loss 0.18177012 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 22:25:19,079 epoch 4 - iter 70/73 - loss 0.18144070 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 22:25:19,237 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:19,237 EPOCH 4 done: loss 0.1740 - lr 0.0200000\n",
+      "2021-09-21 22:25:19,647 DEV : loss 0.5847054123878479 - score 0.5\n",
+      "2021-09-21 22:25:19,648 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:25:19,653 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:20,219 epoch 5 - iter 7/73 - loss 0.00402711 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 22:25:20,612 epoch 5 - iter 14/73 - loss 0.00281650 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 22:25:21,011 epoch 5 - iter 21/73 - loss 0.05415983 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 22:25:21,505 epoch 5 - iter 28/73 - loss 0.12903875 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 22:25:21,828 epoch 5 - iter 35/73 - loss 0.11803030 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 22:25:22,190 epoch 5 - iter 42/73 - loss 0.13644787 - samples/sec: 19.37 - lr: 0.020000\n",
+      "2021-09-21 22:25:22,501 epoch 5 - iter 49/73 - loss 0.14411160 - samples/sec: 22.56 - lr: 0.020000\n",
+      "2021-09-21 22:25:22,915 epoch 5 - iter 56/73 - loss 0.16882635 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 22:25:23,274 epoch 5 - iter 63/73 - loss 0.15018534 - samples/sec: 19.55 - lr: 0.020000\n",
+      "2021-09-21 22:25:23,785 epoch 5 - iter 70/73 - loss 0.14307091 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 22:25:24,062 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:24,062 EPOCH 5 done: loss 0.1375 - lr 0.0200000\n",
+      "2021-09-21 22:25:24,469 DEV : loss 0.7009537816047668 - score 0.5\n",
+      "2021-09-21 22:25:24,471 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:25:24,473 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:24,848 epoch 6 - iter 7/73 - loss 0.11220025 - samples/sec: 19.36 - lr: 0.020000\n",
+      "2021-09-21 22:25:25,395 epoch 6 - iter 14/73 - loss 0.05752125 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 22:25:25,943 epoch 6 - iter 21/73 - loss 0.08641465 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 22:25:26,277 epoch 6 - iter 28/73 - loss 0.06493183 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 22:25:26,867 epoch 6 - iter 35/73 - loss 0.05282192 - samples/sec: 11.87 - lr: 0.020000\n",
+      "2021-09-21 22:25:27,187 epoch 6 - iter 42/73 - loss 0.05629742 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 22:25:27,554 epoch 6 - iter 49/73 - loss 0.04902449 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 22:25:28,018 epoch 6 - iter 56/73 - loss 0.10755866 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 22:25:28,341 epoch 6 - iter 63/73 - loss 0.09597804 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 22:25:28,785 epoch 6 - iter 70/73 - loss 0.08640799 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 22:25:29,022 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:12:54,385 EPOCH 6 done: loss 0.0417 - lr 0.0200000\n",
-      "2021-09-08 02:12:54,789 DEV : loss 0.3893643021583557 - score 0.625\n",
-      "2021-09-08 02:12:54,790 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:12:54,793 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:55,173 epoch 7 - iter 7/73 - loss 0.00249283 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 02:12:55,602 epoch 7 - iter 14/73 - loss 0.00716229 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 02:12:55,953 epoch 7 - iter 21/73 - loss 0.00501694 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 02:12:56,280 epoch 7 - iter 28/73 - loss 0.00383060 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 02:12:56,789 epoch 7 - iter 35/73 - loss 0.00315411 - samples/sec: 13.79 - lr: 0.020000\n",
-      "2021-09-08 02:12:57,216 epoch 7 - iter 42/73 - loss 0.01173379 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 02:12:57,544 epoch 7 - iter 49/73 - loss 0.04177518 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 02:12:57,964 epoch 7 - iter 56/73 - loss 0.03701073 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 02:12:58,280 epoch 7 - iter 63/73 - loss 0.05369773 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 02:12:58,780 epoch 7 - iter 70/73 - loss 0.04843732 - samples/sec: 14.03 - lr: 0.020000\n",
-      "2021-09-08 02:12:58,928 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:58,929 EPOCH 7 done: loss 0.0465 - lr 0.0200000\n",
-      "2021-09-08 02:12:59,457 DEV : loss 0.3338218927383423 - score 0.75\n",
+      "2021-09-21 22:25:29,022 EPOCH 6 done: loss 0.0829 - lr 0.0200000\n",
+      "2021-09-21 22:25:29,431 DEV : loss 0.5682312846183777 - score 0.5\n",
+      "2021-09-21 22:25:29,432 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:25:29,434 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:29,888 epoch 7 - iter 7/73 - loss 0.00042164 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 22:25:30,370 epoch 7 - iter 14/73 - loss 0.00036298 - samples/sec: 14.54 - lr: 0.020000\n",
+      "2021-09-21 22:25:30,725 epoch 7 - iter 21/73 - loss 0.00165891 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 22:25:31,095 epoch 7 - iter 28/73 - loss 0.00395584 - samples/sec: 18.95 - lr: 0.020000\n",
+      "2021-09-21 22:25:31,519 epoch 7 - iter 35/73 - loss 0.00325145 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 22:25:32,115 epoch 7 - iter 42/73 - loss 0.00275191 - samples/sec: 11.76 - lr: 0.020000\n",
+      "2021-09-21 22:25:32,562 epoch 7 - iter 49/73 - loss 0.00424117 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 22:25:32,875 epoch 7 - iter 56/73 - loss 0.01662471 - samples/sec: 22.45 - lr: 0.020000\n",
+      "2021-09-21 22:25:33,361 epoch 7 - iter 63/73 - loss 0.01524415 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 22:25:33,666 epoch 7 - iter 70/73 - loss 0.01374817 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 22:25:33,802 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:33,802 EPOCH 7 done: loss 0.0132 - lr 0.0200000\n",
+      "2021-09-21 22:25:34,223 DEV : loss 0.6408505439758301 - score 0.5\n",
       "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:12:59,458 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:12:59,471 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:59,836 epoch 8 - iter 7/73 - loss 0.00055739 - samples/sec: 19.99 - lr: 0.010000\n",
-      "2021-09-08 02:13:00,369 epoch 8 - iter 14/73 - loss 0.00047158 - samples/sec: 13.15 - lr: 0.010000\n",
-      "2021-09-08 02:13:00,765 epoch 8 - iter 21/73 - loss 0.00046120 - samples/sec: 17.70 - lr: 0.010000\n",
-      "2021-09-08 02:13:01,092 epoch 8 - iter 28/73 - loss 0.00045876 - samples/sec: 21.47 - lr: 0.010000\n",
-      "2021-09-08 02:13:01,465 epoch 8 - iter 35/73 - loss 0.04718652 - samples/sec: 18.80 - lr: 0.010000\n",
-      "2021-09-08 02:13:01,909 epoch 8 - iter 42/73 - loss 0.03941284 - samples/sec: 15.77 - lr: 0.010000\n",
-      "2021-09-08 02:13:02,261 epoch 8 - iter 49/73 - loss 0.05151143 - samples/sec: 19.97 - lr: 0.010000\n",
-      "2021-09-08 02:13:02,725 epoch 8 - iter 56/73 - loss 0.04644972 - samples/sec: 15.08 - lr: 0.010000\n",
-      "2021-09-08 02:13:03,122 epoch 8 - iter 63/73 - loss 0.04136450 - samples/sec: 17.66 - lr: 0.010000\n",
-      "2021-09-08 02:13:03,445 epoch 8 - iter 70/73 - loss 0.03731054 - samples/sec: 21.73 - lr: 0.010000\n",
-      "2021-09-08 02:13:03,595 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:03,595 EPOCH 8 done: loss 0.0358 - lr 0.0100000\n",
-      "2021-09-08 02:13:03,988 DEV : loss 0.35711658000946045 - score 0.75\n",
-      "2021-09-08 02:13:03,989 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:13:04,064 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:04,440 epoch 9 - iter 7/73 - loss 0.00037410 - samples/sec: 19.33 - lr: 0.010000\n",
-      "2021-09-08 02:13:04,792 epoch 9 - iter 14/73 - loss 0.00064975 - samples/sec: 19.96 - lr: 0.010000\n",
-      "2021-09-08 02:13:05,164 epoch 9 - iter 21/73 - loss 0.00060416 - samples/sec: 18.85 - lr: 0.010000\n",
-      "2021-09-08 02:13:05,599 epoch 9 - iter 28/73 - loss 0.03328331 - samples/sec: 16.09 - lr: 0.010000\n",
-      "2021-09-08 02:13:06,017 epoch 9 - iter 35/73 - loss 0.02670447 - samples/sec: 16.78 - lr: 0.010000\n",
-      "2021-09-08 02:13:06,367 epoch 9 - iter 42/73 - loss 0.02231966 - samples/sec: 20.04 - lr: 0.010000\n",
-      "2021-09-08 02:13:06,767 epoch 9 - iter 49/73 - loss 0.06095083 - samples/sec: 17.51 - lr: 0.010000\n",
-      "2021-09-08 02:13:07,202 epoch 9 - iter 56/73 - loss 0.05338670 - samples/sec: 16.13 - lr: 0.010000\n",
-      "2021-09-08 02:13:07,553 epoch 9 - iter 63/73 - loss 0.04749092 - samples/sec: 19.97 - lr: 0.010000\n",
-      "2021-09-08 02:13:07,945 epoch 9 - iter 70/73 - loss 0.04285059 - samples/sec: 17.87 - lr: 0.010000\n",
-      "2021-09-08 02:13:08,186 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:08,187 EPOCH 9 done: loss 0.0411 - lr 0.0100000\n",
-      "2021-09-08 02:13:08,736 DEV : loss 0.27408596873283386 - score 0.75\n",
-      "2021-09-08 02:13:08,737 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:13:08,741 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:09,137 epoch 10 - iter 7/73 - loss 0.00061523 - samples/sec: 18.35 - lr: 0.010000\n",
-      "2021-09-08 02:13:09,536 epoch 10 - iter 14/73 - loss 0.08434459 - samples/sec: 17.57 - lr: 0.010000\n",
-      "2021-09-08 02:13:09,865 epoch 10 - iter 21/73 - loss 0.05666429 - samples/sec: 21.37 - lr: 0.010000\n",
-      "2021-09-08 02:13:10,341 epoch 10 - iter 28/73 - loss 0.04260383 - samples/sec: 14.70 - lr: 0.010000\n",
-      "2021-09-08 02:13:10,722 epoch 10 - iter 35/73 - loss 0.03415877 - samples/sec: 18.40 - lr: 0.010000\n",
-      "2021-09-08 02:13:11,061 epoch 10 - iter 42/73 - loss 0.02866532 - samples/sec: 20.72 - lr: 0.010000\n",
-      "2021-09-08 02:13:11,487 epoch 10 - iter 49/73 - loss 0.02460907 - samples/sec: 16.46 - lr: 0.010000\n",
-      "2021-09-08 02:13:11,909 epoch 10 - iter 56/73 - loss 0.02489023 - samples/sec: 16.61 - lr: 0.010000\n",
-      "2021-09-08 02:13:12,323 epoch 10 - iter 63/73 - loss 0.02216590 - samples/sec: 16.93 - lr: 0.010000\n",
-      "2021-09-08 02:13:12,688 epoch 10 - iter 70/73 - loss 0.01998675 - samples/sec: 19.22 - lr: 0.010000\n",
-      "2021-09-08 02:13:12,871 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:12,871 EPOCH 10 done: loss 0.0324 - lr 0.0100000\n",
-      "2021-09-08 02:13:13,259 DEV : loss 0.25899678468704224 - score 0.75\n",
-      "2021-09-08 02:13:13,260 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:13:17,459 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:17,460 Testing using best model ...\n",
-      "2021-09-08 02:13:17,461 loading file None/best-model.pt\n",
+      "2021-09-21 22:25:34,225 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:25:34,227 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:34,734 epoch 8 - iter 7/73 - loss 0.00122361 - samples/sec: 14.19 - lr: 0.010000\n",
+      "2021-09-21 22:25:35,074 epoch 8 - iter 14/73 - loss 0.00148807 - samples/sec: 20.63 - lr: 0.010000\n",
+      "2021-09-21 22:25:35,498 epoch 8 - iter 21/73 - loss 0.04400328 - samples/sec: 19.40 - lr: 0.010000\n",
+      "2021-09-21 22:25:36,016 epoch 8 - iter 28/73 - loss 0.03304315 - samples/sec: 13.54 - lr: 0.010000\n",
+      "2021-09-21 22:25:36,331 epoch 8 - iter 35/73 - loss 0.02654141 - samples/sec: 22.29 - lr: 0.010000\n",
+      "2021-09-21 22:25:36,776 epoch 8 - iter 42/73 - loss 0.02219744 - samples/sec: 15.73 - lr: 0.010000\n",
+      "2021-09-21 22:25:37,125 epoch 8 - iter 49/73 - loss 0.01906381 - samples/sec: 20.08 - lr: 0.010000\n",
+      "2021-09-21 22:25:37,661 epoch 8 - iter 56/73 - loss 0.01671568 - samples/sec: 13.09 - lr: 0.010000\n",
+      "2021-09-21 22:25:38,097 epoch 8 - iter 63/73 - loss 0.01490239 - samples/sec: 16.07 - lr: 0.010000\n",
+      "2021-09-21 22:25:38,408 epoch 8 - iter 70/73 - loss 0.01343588 - samples/sec: 22.58 - lr: 0.010000\n",
+      "2021-09-21 22:25:38,634 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:38,635 EPOCH 8 done: loss 0.0129 - lr 0.0100000\n",
+      "2021-09-21 22:25:39,203 DEV : loss 0.669003963470459 - score 0.5\n",
+      "2021-09-21 22:25:39,204 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:25:39,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:39,685 epoch 9 - iter 7/73 - loss 0.00058471 - samples/sec: 15.07 - lr: 0.010000\n",
+      "2021-09-21 22:25:40,158 epoch 9 - iter 14/73 - loss 0.00054814 - samples/sec: 14.82 - lr: 0.010000\n",
+      "2021-09-21 22:25:40,528 epoch 9 - iter 21/73 - loss 0.00042740 - samples/sec: 18.94 - lr: 0.010000\n",
+      "2021-09-21 22:25:40,936 epoch 9 - iter 28/73 - loss 0.01385755 - samples/sec: 17.18 - lr: 0.010000\n",
+      "2021-09-21 22:25:41,250 epoch 9 - iter 35/73 - loss 0.01119588 - samples/sec: 22.33 - lr: 0.010000\n",
+      "2021-09-21 22:25:41,730 epoch 9 - iter 42/73 - loss 0.01644099 - samples/sec: 14.58 - lr: 0.010000\n",
+      "2021-09-21 22:25:42,144 epoch 9 - iter 49/73 - loss 0.01411481 - samples/sec: 16.97 - lr: 0.010000\n",
+      "2021-09-21 22:25:42,604 epoch 9 - iter 56/73 - loss 0.01236957 - samples/sec: 15.22 - lr: 0.010000\n",
+      "2021-09-21 22:25:42,978 epoch 9 - iter 63/73 - loss 0.01101608 - samples/sec: 18.73 - lr: 0.010000\n",
+      "2021-09-21 22:25:43,331 epoch 9 - iter 70/73 - loss 0.00993632 - samples/sec: 19.90 - lr: 0.010000\n",
+      "2021-09-21 22:25:43,564 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:43,564 EPOCH 9 done: loss 0.0095 - lr 0.0100000\n",
+      "2021-09-21 22:25:43,981 DEV : loss 0.7560954689979553 - score 0.375\n",
+      "2021-09-21 22:25:43,982 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:25:43,984 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:44,306 epoch 10 - iter 7/73 - loss 0.08560292 - samples/sec: 22.75 - lr: 0.010000\n",
+      "2021-09-21 22:25:44,798 epoch 10 - iter 14/73 - loss 0.04290090 - samples/sec: 14.25 - lr: 0.010000\n",
+      "2021-09-21 22:25:45,148 epoch 10 - iter 21/73 - loss 0.02866162 - samples/sec: 20.02 - lr: 0.010000\n",
+      "2021-09-21 22:25:45,533 epoch 10 - iter 28/73 - loss 0.02152868 - samples/sec: 18.22 - lr: 0.010000\n",
+      "2021-09-21 22:25:45,927 epoch 10 - iter 35/73 - loss 0.01763232 - samples/sec: 17.79 - lr: 0.010000\n",
+      "2021-09-21 22:25:46,519 epoch 10 - iter 42/73 - loss 0.01478495 - samples/sec: 11.84 - lr: 0.010000\n",
+      "2021-09-21 22:25:46,976 epoch 10 - iter 49/73 - loss 0.01269418 - samples/sec: 15.36 - lr: 0.010000\n",
+      "2021-09-21 22:25:47,383 epoch 10 - iter 56/73 - loss 0.01114060 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 22:25:47,809 epoch 10 - iter 63/73 - loss 0.00992763 - samples/sec: 16.49 - lr: 0.010000\n",
+      "2021-09-21 22:25:48,170 epoch 10 - iter 70/73 - loss 0.00896836 - samples/sec: 19.42 - lr: 0.010000\n",
+      "2021-09-21 22:25:48,359 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:48,360 EPOCH 10 done: loss 0.0086 - lr 0.0100000\n",
+      "2021-09-21 22:25:48,811 DEV : loss 0.7598840594291687 - score 0.375\n",
+      "2021-09-21 22:25:48,812 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:25:52,678 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:25:52,679 Testing using best model ...\n",
+      "2021-09-21 22:25:52,681 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:13:22,451 \t0.6667\n",
-      "2021-09-08 02:13:22,452 \n",
+      "2021-09-21 22:25:56,898 \t0.6667\n",
+      "2021-09-21 22:25:56,899 \n",
       "Results:\n",
       "- F-score (micro) 0.6667\n",
-      "- F-score (macro) 0.4444\n",
+      "- F-score (macro) 0.537\n",
       "- Accuracy 0.6667\n",
       "\n",
       "By class:\n",
       "                                                                                  precision    recall  f1-score   support\n",
       "\n",
-      "                                                   a social unit living together     1.0000    1.0000    1.0000         2\n",
-      "                                     a particular branch of scientific knowledge     1.0000    1.0000    1.0000         2\n",
-      "                      an activity that is diverting and that holds the attention     1.0000    1.0000    1.0000         1\n",
-      "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
-      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
-      "                 an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
-      "                          the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         3\n",
-      "                     the study of government of states and other political units     1.0000    1.0000    1.0000         1\n",
+      "                                                   a social unit living together     0.5000    1.0000    0.6667         1\n",
+      "                                     a particular branch of scientific knowledge     1.0000    1.0000    1.0000         1\n",
+      "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                             a machine for performing calculations automatically     1.0000    0.5000    0.6667         2\n",
+      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         1\n",
+      "                 an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         1\n",
+      "an extended social group having a distinctive cultural and economic organization     1.0000    1.0000    1.0000         1\n",
+      "                          the commercial activity of providing funds and capital     0.5000    0.5000    0.5000         2\n",
+      "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
       "\n",
       "                                                                       micro avg     0.6667    0.6667    0.6667         9\n",
-      "                                                                       macro avg     0.4444    0.4444    0.4444         9\n",
-      "                                                                    weighted avg     0.6667    0.6667    0.6667         9\n",
+      "                                                                       macro avg     0.5556    0.5556    0.5370         9\n",
+      "                                                                    weighted avg     0.7222    0.6667    0.6667         9\n",
       "                                                                     samples avg     0.6667    0.6667    0.6667         9\n",
       "\n",
-      "2021-09-08 02:13:22,452 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:25:56,899 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:12,845 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:13:38,428 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:13:42,364 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:26:16,835 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 15453.49it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 16843.77it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:13:42,371 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
-      "2021-09-08 02:13:42,380 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:42,381 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:26:16,841 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
+      "2021-09-21 22:26:16,855 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:16,857 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7287,25 +7306,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:13:42,382 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:42,382 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 02:13:42,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:42,383 Parameters:\n",
-      "2021-09-08 02:13:42,383  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:13:42,384  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:13:42,384  - patience: \"3\"\n",
-      "2021-09-08 02:13:42,384  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:13:42,384  - max_epochs: \"10\"\n",
-      "2021-09-08 02:13:42,385  - shuffle: \"True\"\n",
-      "2021-09-08 02:13:42,385  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:13:42,385  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:13:42,386 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:42,386 Model training base path: \"None\"\n",
-      "2021-09-08 02:13:42,386 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:42,387 Device: cuda:1\n",
-      "2021-09-08 02:13:42,387 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:42,387 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:13:42,395 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:26:16,857 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:16,858 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 22:26:16,858 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:16,858 Parameters:\n",
+      "2021-09-21 22:26:16,858  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:26:16,859  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:26:16,859  - patience: \"3\"\n",
+      "2021-09-21 22:26:16,859  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:26:16,860  - max_epochs: \"10\"\n",
+      "2021-09-21 22:26:16,860  - shuffle: \"True\"\n",
+      "2021-09-21 22:26:16,860  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:26:16,861  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:26:16,861 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:16,861 Model training base path: \"None\"\n",
+      "2021-09-21 22:26:16,861 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:16,862 Device: cuda:0\n",
+      "2021-09-21 22:26:16,862 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:16,862 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:26:16,880 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7319,220 +7338,221 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:13:42,803 epoch 1 - iter 7/73 - loss 0.77188661 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 02:13:43,321 epoch 1 - iter 14/73 - loss 0.62646590 - samples/sec: 13.52 - lr: 0.020000\n",
-      "2021-09-08 02:13:43,663 epoch 1 - iter 21/73 - loss 0.60988956 - samples/sec: 20.52 - lr: 0.020000\n",
-      "2021-09-08 02:13:44,059 epoch 1 - iter 28/73 - loss 0.52932853 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 02:13:44,433 epoch 1 - iter 35/73 - loss 0.55072869 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 02:13:44,826 epoch 1 - iter 42/73 - loss 0.58721882 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 02:13:45,182 epoch 1 - iter 49/73 - loss 0.56372598 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 02:13:45,785 epoch 1 - iter 56/73 - loss 0.55197762 - samples/sec: 11.63 - lr: 0.020000\n",
-      "2021-09-08 02:13:46,173 epoch 1 - iter 63/73 - loss 0.52011191 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 02:13:46,612 epoch 1 - iter 70/73 - loss 0.54979037 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 02:13:46,753 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:46,754 EPOCH 1 done: loss 0.5466 - lr 0.0200000\n",
-      "2021-09-08 02:13:47,163 DEV : loss 0.25647759437561035 - score 0.875\n",
-      "2021-09-08 02:13:47,164 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:26:17,224 epoch 1 - iter 7/73 - loss 0.62172561 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 22:26:17,587 epoch 1 - iter 14/73 - loss 0.37872671 - samples/sec: 19.36 - lr: 0.020000\n",
+      "2021-09-21 22:26:17,952 epoch 1 - iter 21/73 - loss 0.45174125 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 22:26:18,459 epoch 1 - iter 28/73 - loss 0.54947023 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 22:26:19,009 epoch 1 - iter 35/73 - loss 0.52992294 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 22:26:19,332 epoch 1 - iter 42/73 - loss 0.54443864 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 22:26:19,652 epoch 1 - iter 49/73 - loss 0.53769213 - samples/sec: 21.95 - lr: 0.020000\n",
+      "2021-09-21 22:26:20,115 epoch 1 - iter 56/73 - loss 0.53891513 - samples/sec: 15.13 - lr: 0.020000\n",
+      "2021-09-21 22:26:20,443 epoch 1 - iter 63/73 - loss 0.55316483 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 22:26:20,896 epoch 1 - iter 70/73 - loss 0.57741006 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 22:26:21,073 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:21,074 EPOCH 1 done: loss 0.5675 - lr 0.0200000\n",
+      "2021-09-21 22:26:21,402 DEV : loss 0.3099135160446167 - score 0.625\n",
+      "2021-09-21 22:26:21,403 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:26:25,484 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:25,901 epoch 2 - iter 7/73 - loss 0.30276791 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 22:26:26,267 epoch 2 - iter 14/73 - loss 0.34690471 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 22:26:26,691 epoch 2 - iter 21/73 - loss 0.32208057 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 22:26:27,186 epoch 2 - iter 28/73 - loss 0.44154286 - samples/sec: 14.15 - lr: 0.020000\n",
+      "2021-09-21 22:26:27,553 epoch 2 - iter 35/73 - loss 0.45738525 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 22:26:27,932 epoch 2 - iter 42/73 - loss 0.41775602 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 22:26:28,458 epoch 2 - iter 49/73 - loss 0.45140987 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 22:26:28,781 epoch 2 - iter 56/73 - loss 0.45459070 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 22:26:29,097 epoch 2 - iter 63/73 - loss 0.43251725 - samples/sec: 22.21 - lr: 0.020000\n",
+      "2021-09-21 22:26:29,463 epoch 2 - iter 70/73 - loss 0.41542186 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 22:26:29,686 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:29,686 EPOCH 2 done: loss 0.4288 - lr 0.0200000\n",
+      "2021-09-21 22:26:30,014 DEV : loss 0.08170302212238312 - score 1.0\n",
+      "2021-09-21 22:26:30,015 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:13:50,870 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:51,261 epoch 2 - iter 7/73 - loss 0.60634942 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 02:13:51,615 epoch 2 - iter 14/73 - loss 0.36650668 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 02:13:52,027 epoch 2 - iter 21/73 - loss 0.44482675 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 02:13:52,405 epoch 2 - iter 28/73 - loss 0.43669028 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 02:13:52,854 epoch 2 - iter 35/73 - loss 0.44953559 - samples/sec: 15.62 - lr: 0.020000\n",
-      "2021-09-08 02:13:53,322 epoch 2 - iter 42/73 - loss 0.41989012 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 02:13:53,694 epoch 2 - iter 49/73 - loss 0.40314036 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 02:13:54,079 epoch 2 - iter 56/73 - loss 0.36062613 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 02:13:54,458 epoch 2 - iter 63/73 - loss 0.39888010 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 02:13:54,904 epoch 2 - iter 70/73 - loss 0.40960161 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 02:13:55,134 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:55,135 EPOCH 2 done: loss 0.4139 - lr 0.0200000\n",
-      "2021-09-08 02:13:55,540 DEV : loss 0.15823771059513092 - score 0.875\n",
-      "2021-09-08 02:13:55,542 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:26:34,086 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:34,558 epoch 3 - iter 7/73 - loss 0.13287896 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 22:26:34,917 epoch 3 - iter 14/73 - loss 0.07410886 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 22:26:35,265 epoch 3 - iter 21/73 - loss 0.11615447 - samples/sec: 20.16 - lr: 0.020000\n",
+      "2021-09-21 22:26:35,676 epoch 3 - iter 28/73 - loss 0.16209792 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 22:26:36,207 epoch 3 - iter 35/73 - loss 0.19210953 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 22:26:36,590 epoch 3 - iter 42/73 - loss 0.23878374 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 22:26:36,934 epoch 3 - iter 49/73 - loss 0.26899451 - samples/sec: 20.35 - lr: 0.020000\n",
+      "2021-09-21 22:26:37,401 epoch 3 - iter 56/73 - loss 0.28510590 - samples/sec: 15.02 - lr: 0.020000\n",
+      "2021-09-21 22:26:37,823 epoch 3 - iter 63/73 - loss 0.33678057 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 22:26:38,234 epoch 3 - iter 70/73 - loss 0.38900353 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 22:26:38,410 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:38,410 EPOCH 3 done: loss 0.3838 - lr 0.0200000\n",
+      "2021-09-21 22:26:38,739 DEV : loss 0.10670742392539978 - score 0.875\n",
+      "2021-09-21 22:26:38,740 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:26:38,742 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:39,089 epoch 4 - iter 7/73 - loss 0.21266287 - samples/sec: 21.09 - lr: 0.020000\n",
+      "2021-09-21 22:26:39,697 epoch 4 - iter 14/73 - loss 0.21243102 - samples/sec: 11.52 - lr: 0.020000\n",
+      "2021-09-21 22:26:40,119 epoch 4 - iter 21/73 - loss 0.35548748 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 22:26:40,486 epoch 4 - iter 28/73 - loss 0.35256207 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 22:26:40,814 epoch 4 - iter 35/73 - loss 0.31936158 - samples/sec: 21.41 - lr: 0.020000\n",
+      "2021-09-21 22:26:41,143 epoch 4 - iter 42/73 - loss 0.32686679 - samples/sec: 21.32 - lr: 0.020000\n",
+      "2021-09-21 22:26:41,471 epoch 4 - iter 49/73 - loss 0.29360048 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 22:26:42,024 epoch 4 - iter 56/73 - loss 0.31249700 - samples/sec: 12.68 - lr: 0.020000\n",
+      "2021-09-21 22:26:42,376 epoch 4 - iter 63/73 - loss 0.27800867 - samples/sec: 19.90 - lr: 0.020000\n",
+      "2021-09-21 22:26:42,690 epoch 4 - iter 70/73 - loss 0.25226821 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 22:26:42,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:42,873 EPOCH 4 done: loss 0.2444 - lr 0.0200000\n",
+      "2021-09-21 22:26:43,203 DEV : loss 0.02066049724817276 - score 1.0\n",
+      "2021-09-21 22:26:43,204 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:13:59,636 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:59,966 epoch 3 - iter 7/73 - loss 0.29597618 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:14:00,402 epoch 3 - iter 14/73 - loss 0.17274657 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 02:14:00,726 epoch 3 - iter 21/73 - loss 0.23329914 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 02:14:01,171 epoch 3 - iter 28/73 - loss 0.20700830 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 02:14:01,577 epoch 3 - iter 35/73 - loss 0.38215327 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 02:14:02,066 epoch 3 - iter 42/73 - loss 0.41748152 - samples/sec: 14.33 - lr: 0.020000\n",
-      "2021-09-08 02:14:02,444 epoch 3 - iter 49/73 - loss 0.37106745 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 02:14:02,899 epoch 3 - iter 56/73 - loss 0.40574566 - samples/sec: 15.39 - lr: 0.020000\n",
-      "2021-09-08 02:14:03,250 epoch 3 - iter 63/73 - loss 0.36215582 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 02:14:03,624 epoch 3 - iter 70/73 - loss 0.33423174 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 02:14:03,813 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:03,814 EPOCH 3 done: loss 0.3356 - lr 0.0200000\n",
-      "2021-09-08 02:14:04,221 DEV : loss 0.17281299829483032 - score 0.875\n",
-      "2021-09-08 02:14:04,222 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:14:04,224 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:04,620 epoch 4 - iter 7/73 - loss 0.38417618 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:14:05,046 epoch 4 - iter 14/73 - loss 0.29274204 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 02:14:05,438 epoch 4 - iter 21/73 - loss 0.20152545 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 02:14:05,834 epoch 4 - iter 28/73 - loss 0.26511624 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 02:14:06,219 epoch 4 - iter 35/73 - loss 0.22671670 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 02:14:06,653 epoch 4 - iter 42/73 - loss 0.21810377 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 02:14:06,978 epoch 4 - iter 49/73 - loss 0.19221879 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 02:14:07,380 epoch 4 - iter 56/73 - loss 0.23336485 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 02:14:07,742 epoch 4 - iter 63/73 - loss 0.20788427 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 02:14:08,177 epoch 4 - iter 70/73 - loss 0.18765715 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 02:14:08,363 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:08,363 EPOCH 4 done: loss 0.1935 - lr 0.0200000\n",
-      "2021-09-08 02:14:08,935 DEV : loss 0.22376418113708496 - score 0.625\n",
-      "2021-09-08 02:14:08,936 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:14:08,938 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:09,342 epoch 5 - iter 7/73 - loss 0.18825968 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 02:14:09,672 epoch 5 - iter 14/73 - loss 0.09629358 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 02:14:10,106 epoch 5 - iter 21/73 - loss 0.17399019 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 02:14:10,466 epoch 5 - iter 28/73 - loss 0.19003524 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 02:14:10,807 epoch 5 - iter 35/73 - loss 0.20380011 - samples/sec: 20.60 - lr: 0.020000\n",
-      "2021-09-08 02:14:11,261 epoch 5 - iter 42/73 - loss 0.17075012 - samples/sec: 15.43 - lr: 0.020000\n",
-      "2021-09-08 02:14:11,710 epoch 5 - iter 49/73 - loss 0.14900716 - samples/sec: 15.59 - lr: 0.020000\n",
-      "2021-09-08 02:14:12,133 epoch 5 - iter 56/73 - loss 0.15727964 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 02:14:12,453 epoch 5 - iter 63/73 - loss 0.13999023 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 02:14:12,880 epoch 5 - iter 70/73 - loss 0.14047243 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 02:14:13,073 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:13,074 EPOCH 5 done: loss 0.1355 - lr 0.0200000\n",
-      "2021-09-08 02:14:13,480 DEV : loss 0.33425047993659973 - score 0.625\n",
-      "2021-09-08 02:14:13,481 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:14:13,483 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:13,908 epoch 6 - iter 7/73 - loss 0.00114725 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 02:14:14,341 epoch 6 - iter 14/73 - loss 0.00086399 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 02:14:14,713 epoch 6 - iter 21/73 - loss 0.00134668 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 02:14:15,080 epoch 6 - iter 28/73 - loss 0.01153515 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 02:14:15,564 epoch 6 - iter 35/73 - loss 0.00938470 - samples/sec: 14.47 - lr: 0.020000\n",
-      "2021-09-08 02:14:15,992 epoch 6 - iter 42/73 - loss 0.03649025 - samples/sec: 16.37 - lr: 0.020000\n",
-      "2021-09-08 02:14:16,309 epoch 6 - iter 49/73 - loss 0.03522307 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:14:16,686 epoch 6 - iter 56/73 - loss 0.03552474 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 02:14:16,995 epoch 6 - iter 63/73 - loss 0.03175473 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 02:14:17,447 epoch 6 - iter 70/73 - loss 0.03467478 - samples/sec: 15.50 - lr: 0.020000\n",
-      "2021-09-08 02:14:17,586 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:26:47,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:47,728 epoch 5 - iter 7/73 - loss 0.23118894 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 22:26:48,270 epoch 5 - iter 14/73 - loss 0.23716582 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 22:26:48,583 epoch 5 - iter 21/73 - loss 0.19469759 - samples/sec: 22.36 - lr: 0.020000\n",
+      "2021-09-21 22:26:48,958 epoch 5 - iter 28/73 - loss 0.17040045 - samples/sec: 18.70 - lr: 0.020000\n",
+      "2021-09-21 22:26:49,277 epoch 5 - iter 35/73 - loss 0.27621989 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 22:26:49,654 epoch 5 - iter 42/73 - loss 0.29557642 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 22:26:49,968 epoch 5 - iter 49/73 - loss 0.25394499 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 22:26:50,346 epoch 5 - iter 56/73 - loss 0.22243760 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 22:26:50,797 epoch 5 - iter 63/73 - loss 0.19985301 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 22:26:51,189 epoch 5 - iter 70/73 - loss 0.18315930 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 22:26:51,324 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:51,325 EPOCH 5 done: loss 0.1924 - lr 0.0200000\n",
+      "2021-09-21 22:26:51,652 DEV : loss 0.16275203227996826 - score 1.0\n",
+      "2021-09-21 22:26:51,653 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:26:51,655 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:52,057 epoch 6 - iter 7/73 - loss 0.10025098 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 22:26:52,497 epoch 6 - iter 14/73 - loss 0.17143087 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 22:26:52,877 epoch 6 - iter 21/73 - loss 0.11550316 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 22:26:53,199 epoch 6 - iter 28/73 - loss 0.08699674 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 22:26:53,513 epoch 6 - iter 35/73 - loss 0.06988826 - samples/sec: 22.31 - lr: 0.020000\n",
+      "2021-09-21 22:26:53,996 epoch 6 - iter 42/73 - loss 0.07953229 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 22:26:54,365 epoch 6 - iter 49/73 - loss 0.08128187 - samples/sec: 18.99 - lr: 0.020000\n",
+      "2021-09-21 22:26:54,858 epoch 6 - iter 56/73 - loss 0.12060848 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 22:26:55,260 epoch 6 - iter 63/73 - loss 0.13187780 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 22:26:55,574 epoch 6 - iter 70/73 - loss 0.15365875 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 22:26:55,711 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:14:17,587 EPOCH 6 done: loss 0.0333 - lr 0.0200000\n",
-      "2021-09-08 02:14:17,990 DEV : loss 0.2453528791666031 - score 0.75\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:14:17,991 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:14:17,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:18,433 epoch 7 - iter 7/73 - loss 0.13297663 - samples/sec: 16.46 - lr: 0.010000\n",
-      "2021-09-08 02:14:18,919 epoch 7 - iter 14/73 - loss 0.06674250 - samples/sec: 14.43 - lr: 0.010000\n",
-      "2021-09-08 02:14:19,313 epoch 7 - iter 21/73 - loss 0.07030612 - samples/sec: 17.80 - lr: 0.010000\n",
-      "2021-09-08 02:14:19,641 epoch 7 - iter 28/73 - loss 0.05286467 - samples/sec: 21.35 - lr: 0.010000\n",
-      "2021-09-08 02:14:20,013 epoch 7 - iter 35/73 - loss 0.04382496 - samples/sec: 18.82 - lr: 0.010000\n",
-      "2021-09-08 02:14:20,449 epoch 7 - iter 42/73 - loss 0.07919003 - samples/sec: 16.08 - lr: 0.010000\n",
-      "2021-09-08 02:14:20,758 epoch 7 - iter 49/73 - loss 0.06793746 - samples/sec: 22.71 - lr: 0.010000\n",
-      "2021-09-08 02:14:21,129 epoch 7 - iter 56/73 - loss 0.05951465 - samples/sec: 18.91 - lr: 0.010000\n",
-      "2021-09-08 02:14:21,510 epoch 7 - iter 63/73 - loss 0.05298771 - samples/sec: 18.38 - lr: 0.010000\n",
-      "2021-09-08 02:14:21,854 epoch 7 - iter 70/73 - loss 0.04796597 - samples/sec: 20.39 - lr: 0.010000\n",
-      "2021-09-08 02:14:22,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:22,079 EPOCH 7 done: loss 0.0460 - lr 0.0100000\n",
-      "2021-09-08 02:14:22,486 DEV : loss 0.3072162866592407 - score 0.75\n",
-      "2021-09-08 02:14:22,487 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:14:22,489 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:22,893 epoch 8 - iter 7/73 - loss 0.00075164 - samples/sec: 17.99 - lr: 0.010000\n",
-      "2021-09-08 02:14:23,225 epoch 8 - iter 14/73 - loss 0.00060492 - samples/sec: 21.11 - lr: 0.010000\n",
-      "2021-09-08 02:14:23,595 epoch 8 - iter 21/73 - loss 0.00054626 - samples/sec: 18.97 - lr: 0.010000\n",
-      "2021-09-08 02:14:24,060 epoch 8 - iter 28/73 - loss 0.00274833 - samples/sec: 15.05 - lr: 0.010000\n",
-      "2021-09-08 02:14:24,402 epoch 8 - iter 35/73 - loss 0.00235872 - samples/sec: 20.49 - lr: 0.010000\n",
-      "2021-09-08 02:14:24,812 epoch 8 - iter 42/73 - loss 0.00204510 - samples/sec: 17.10 - lr: 0.010000\n",
-      "2021-09-08 02:14:25,259 epoch 8 - iter 49/73 - loss 0.00184026 - samples/sec: 15.70 - lr: 0.010000\n",
-      "2021-09-08 02:14:25,669 epoch 8 - iter 56/73 - loss 0.00166426 - samples/sec: 17.11 - lr: 0.010000\n",
-      "2021-09-08 02:14:26,045 epoch 8 - iter 63/73 - loss 0.00160053 - samples/sec: 18.63 - lr: 0.010000\n",
-      "2021-09-08 02:14:26,538 epoch 8 - iter 70/73 - loss 0.00146909 - samples/sec: 14.22 - lr: 0.010000\n",
-      "2021-09-08 02:14:26,719 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:26,719 EPOCH 8 done: loss 0.0014 - lr 0.0100000\n",
-      "2021-09-08 02:14:27,155 DEV : loss 0.2889506220817566 - score 0.75\n",
-      "2021-09-08 02:14:27,156 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:14:27,160 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:27,542 epoch 9 - iter 7/73 - loss 0.00022183 - samples/sec: 19.08 - lr: 0.010000\n",
-      "2021-09-08 02:14:27,898 epoch 9 - iter 14/73 - loss 0.00033703 - samples/sec: 19.69 - lr: 0.010000\n",
-      "2021-09-08 02:14:28,259 epoch 9 - iter 21/73 - loss 0.00219736 - samples/sec: 19.41 - lr: 0.010000\n",
-      "2021-09-08 02:14:28,647 epoch 9 - iter 28/73 - loss 0.00174114 - samples/sec: 18.05 - lr: 0.010000\n",
-      "2021-09-08 02:14:29,014 epoch 9 - iter 35/73 - loss 0.00147123 - samples/sec: 19.12 - lr: 0.010000\n",
-      "2021-09-08 02:14:29,402 epoch 9 - iter 42/73 - loss 0.00129289 - samples/sec: 18.07 - lr: 0.010000\n",
-      "2021-09-08 02:14:29,871 epoch 9 - iter 49/73 - loss 0.00117810 - samples/sec: 14.94 - lr: 0.010000\n",
-      "2021-09-08 02:14:30,239 epoch 9 - iter 56/73 - loss 0.00108426 - samples/sec: 19.04 - lr: 0.010000\n",
-      "2021-09-08 02:14:30,583 epoch 9 - iter 63/73 - loss 0.00109160 - samples/sec: 20.40 - lr: 0.010000\n",
-      "2021-09-08 02:14:31,019 epoch 9 - iter 70/73 - loss 0.03465675 - samples/sec: 16.07 - lr: 0.010000\n",
-      "2021-09-08 02:14:31,248 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:31,248 EPOCH 9 done: loss 0.0338 - lr 0.0100000\n",
-      "2021-09-08 02:14:31,656 DEV : loss 0.2613746225833893 - score 0.625\n",
-      "2021-09-08 02:14:31,657 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:14:31,659 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:32,060 epoch 10 - iter 7/73 - loss 0.00042089 - samples/sec: 18.13 - lr: 0.010000\n",
-      "2021-09-08 02:14:32,473 epoch 10 - iter 14/73 - loss 0.00073222 - samples/sec: 16.98 - lr: 0.010000\n",
-      "2021-09-08 02:14:32,802 epoch 10 - iter 21/73 - loss 0.00055524 - samples/sec: 21.32 - lr: 0.010000\n",
-      "2021-09-08 02:14:33,229 epoch 10 - iter 28/73 - loss 0.00200722 - samples/sec: 16.41 - lr: 0.010000\n",
-      "2021-09-08 02:14:33,633 epoch 10 - iter 35/73 - loss 0.00169282 - samples/sec: 17.35 - lr: 0.010000\n",
-      "2021-09-08 02:14:33,965 epoch 10 - iter 42/73 - loss 0.00147880 - samples/sec: 21.09 - lr: 0.010000\n",
-      "2021-09-08 02:14:34,296 epoch 10 - iter 49/73 - loss 0.00133507 - samples/sec: 21.19 - lr: 0.010000\n",
-      "2021-09-08 02:14:34,723 epoch 10 - iter 56/73 - loss 0.00392554 - samples/sec: 16.42 - lr: 0.010000\n",
-      "2021-09-08 02:14:35,082 epoch 10 - iter 63/73 - loss 0.00351043 - samples/sec: 19.48 - lr: 0.010000\n",
-      "2021-09-08 02:14:35,524 epoch 10 - iter 70/73 - loss 0.00324189 - samples/sec: 15.88 - lr: 0.010000\n",
-      "2021-09-08 02:14:35,739 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:35,740 EPOCH 10 done: loss 0.0031 - lr 0.0100000\n",
-      "2021-09-08 02:14:36,148 DEV : loss 0.1821303367614746 - score 0.75\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 02:14:36,150 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:14:40,293 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:14:40,294 Testing using best model ...\n",
-      "2021-09-08 02:14:40,295 loading file None/best-model.pt\n",
+      "2021-09-21 22:26:55,712 EPOCH 6 done: loss 0.1474 - lr 0.0200000\n",
+      "2021-09-21 22:26:56,192 DEV : loss 0.049512527883052826 - score 1.0\n",
+      "2021-09-21 22:26:56,194 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:26:56,195 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:26:56,606 epoch 7 - iter 7/73 - loss 0.00166957 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 22:26:56,927 epoch 7 - iter 14/73 - loss 0.11094420 - samples/sec: 21.88 - lr: 0.020000\n",
+      "2021-09-21 22:26:57,245 epoch 7 - iter 21/73 - loss 0.07659113 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 22:26:57,698 epoch 7 - iter 28/73 - loss 0.09533827 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 22:26:58,100 epoch 7 - iter 35/73 - loss 0.07912689 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 22:26:58,414 epoch 7 - iter 42/73 - loss 0.11928954 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 22:26:58,803 epoch 7 - iter 49/73 - loss 0.11137392 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 22:26:59,346 epoch 7 - iter 56/73 - loss 0.10718275 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 22:26:59,816 epoch 7 - iter 63/73 - loss 0.09584691 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 22:27:00,128 epoch 7 - iter 70/73 - loss 0.09616635 - samples/sec: 22.51 - lr: 0.020000\n",
+      "2021-09-21 22:27:00,267 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:00,268 EPOCH 7 done: loss 0.0923 - lr 0.0200000\n",
+      "2021-09-21 22:27:00,596 DEV : loss 0.052083808928728104 - score 1.0\n",
+      "2021-09-21 22:27:00,597 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:27:00,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:00,967 epoch 8 - iter 7/73 - loss 0.00062461 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 22:27:01,318 epoch 8 - iter 14/73 - loss 0.00126375 - samples/sec: 19.98 - lr: 0.020000\n",
+      "2021-09-21 22:27:01,763 epoch 8 - iter 21/73 - loss 0.00110227 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 22:27:02,088 epoch 8 - iter 28/73 - loss 0.01034140 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 22:27:02,537 epoch 8 - iter 35/73 - loss 0.00867864 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 22:27:02,906 epoch 8 - iter 42/73 - loss 0.01411081 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 22:27:03,280 epoch 8 - iter 49/73 - loss 0.01250695 - samples/sec: 18.76 - lr: 0.020000\n",
+      "2021-09-21 22:27:03,768 epoch 8 - iter 56/73 - loss 0.01311767 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 22:27:04,197 epoch 8 - iter 63/73 - loss 0.01193209 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 22:27:04,515 epoch 8 - iter 70/73 - loss 0.01086869 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 22:27:04,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:04,654 EPOCH 8 done: loss 0.0168 - lr 0.0200000\n",
+      "2021-09-21 22:27:04,981 DEV : loss 0.017617369070649147 - score 1.0\n",
+      "2021-09-21 22:27:04,982 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:27:09,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:09,393 epoch 9 - iter 7/73 - loss 0.23464599 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 22:27:09,906 epoch 9 - iter 14/73 - loss 0.12011892 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 22:27:10,276 epoch 9 - iter 21/73 - loss 0.08043441 - samples/sec: 18.92 - lr: 0.020000\n",
+      "2021-09-21 22:27:10,602 epoch 9 - iter 28/73 - loss 0.06042557 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 22:27:10,920 epoch 9 - iter 35/73 - loss 0.04873448 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 22:27:11,274 epoch 9 - iter 42/73 - loss 0.06275389 - samples/sec: 19.81 - lr: 0.020000\n",
+      "2021-09-21 22:27:11,701 epoch 9 - iter 49/73 - loss 0.05389090 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 22:27:12,192 epoch 9 - iter 56/73 - loss 0.04758216 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 22:27:12,581 epoch 9 - iter 63/73 - loss 0.05891841 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 22:27:12,922 epoch 9 - iter 70/73 - loss 0.05405421 - samples/sec: 20.57 - lr: 0.020000\n",
+      "2021-09-21 22:27:13,099 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:13,100 EPOCH 9 done: loss 0.0518 - lr 0.0200000\n",
+      "2021-09-21 22:27:13,426 DEV : loss 0.0014969678595662117 - score 1.0\n",
+      "2021-09-21 22:27:13,427 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:27:17,027 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:17,541 epoch 10 - iter 7/73 - loss 0.00014025 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 22:27:17,960 epoch 10 - iter 14/73 - loss 0.00020988 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 22:27:18,267 epoch 10 - iter 21/73 - loss 0.00026040 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 22:27:18,600 epoch 10 - iter 28/73 - loss 0.00023776 - samples/sec: 21.07 - lr: 0.020000\n",
+      "2021-09-21 22:27:18,953 epoch 10 - iter 35/73 - loss 0.01730197 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 22:27:19,283 epoch 10 - iter 42/73 - loss 0.06678268 - samples/sec: 21.27 - lr: 0.020000\n",
+      "2021-09-21 22:27:19,673 epoch 10 - iter 49/73 - loss 0.05761016 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 22:27:20,069 epoch 10 - iter 56/73 - loss 0.08587816 - samples/sec: 17.72 - lr: 0.020000\n",
+      "2021-09-21 22:27:20,506 epoch 10 - iter 63/73 - loss 0.08790050 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 22:27:21,015 epoch 10 - iter 70/73 - loss 0.07919103 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 22:27:21,230 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:21,231 EPOCH 10 done: loss 0.0759 - lr 0.0200000\n",
+      "2021-09-21 22:27:21,560 DEV : loss 0.004282773472368717 - score 1.0\n",
+      "2021-09-21 22:27:21,561 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:27:25,665 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:25,666 Testing using best model ...\n",
+      "2021-09-21 22:27:25,668 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:14:45,559 \t0.7778\n",
-      "2021-09-08 02:14:45,559 \n",
+      "2021-09-21 22:27:30,016 \t0.6667\n",
+      "2021-09-21 22:27:30,016 \n",
       "Results:\n",
-      "- F-score (micro) 0.7778\n",
-      "- F-score (macro) 0.3545\n",
-      "- Accuracy 0.7778\n",
+      "- F-score (micro) 0.6667\n",
+      "- F-score (macro) 0.4815\n",
+      "- Accuracy 0.6667\n",
       "\n",
       "By class:\n",
       "                                                                                  precision    recall  f1-score   support\n",
       "\n",
-      "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
-      "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
-      "                      an activity that is diverting and that holds the attention     0.7500    1.0000    0.8571         3\n",
-      "                             a machine for performing calculations automatically     0.5000    1.0000    0.6667         1\n",
-      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         1\n",
-      "                 an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         2\n",
+      "                                                   a social unit living together     1.0000    0.5000    0.6667         4\n",
+      "                                     a particular branch of scientific knowledge     1.0000    1.0000    1.0000         1\n",
+      "                      an activity that is diverting and that holds the attention     1.0000    1.0000    1.0000         1\n",
+      "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
+      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
+      "                 an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         1\n",
       "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
-      "                          the commercial activity of providing funds and capital     1.0000    0.5000    0.6667         2\n",
-      "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
+      "                          the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         0\n",
+      "                     the study of government of states and other political units     1.0000    0.5000    0.6667         2\n",
       "\n",
-      "                                                                       micro avg     0.7778    0.7778    0.7778         9\n",
-      "                                                                       macro avg     0.3611    0.3889    0.3545         9\n",
-      "                                                                    weighted avg     0.7500    0.7778    0.7302         9\n",
-      "                                                                     samples avg     0.7778    0.7778    0.7778         9\n",
+      "                                                                       micro avg     0.6667    0.6667    0.6667         9\n",
+      "                                                                       macro avg     0.5556    0.4444    0.4815         9\n",
+      "                                                                    weighted avg     1.0000    0.6667    0.7778         9\n",
+      "                                                                     samples avg     0.6667    0.6667    0.6667         9\n",
       "\n",
-      "2021-09-08 02:14:45,559 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:27:30,016 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:45,910 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:15:01,452 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:15:05,371 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:27:49,794 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 13075.81it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 16111.53it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:15:05,379 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
-      "2021-09-08 02:15:05,390 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:05,392 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:27:49,801 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
+      "2021-09-21 22:27:49,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:49,813 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7845,25 +7865,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:15:05,393 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:05,393 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 02:15:05,393 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:05,394 Parameters:\n",
-      "2021-09-08 02:15:05,394  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:15:05,394  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:15:05,395  - patience: \"3\"\n",
-      "2021-09-08 02:15:05,395  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:15:05,395  - max_epochs: \"10\"\n",
-      "2021-09-08 02:15:05,396  - shuffle: \"True\"\n",
-      "2021-09-08 02:15:05,396  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:15:05,396  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:15:05,396 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:05,397 Model training base path: \"None\"\n",
-      "2021-09-08 02:15:05,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:05,397 Device: cuda:1\n",
-      "2021-09-08 02:15:05,398 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:05,398 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:15:05,404 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:27:49,814 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:49,814 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 22:27:49,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:49,815 Parameters:\n",
+      "2021-09-21 22:27:49,815  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:27:49,815  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:27:49,816  - patience: \"3\"\n",
+      "2021-09-21 22:27:49,816  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:27:49,816  - max_epochs: \"10\"\n",
+      "2021-09-21 22:27:49,817  - shuffle: \"True\"\n",
+      "2021-09-21 22:27:49,817  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:27:49,817  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:27:49,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:49,818 Model training base path: \"None\"\n",
+      "2021-09-21 22:27:49,818 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:49,818 Device: cuda:0\n",
+      "2021-09-21 22:27:49,819 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:49,819 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:27:49,825 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7877,221 +7897,220 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:15:05,864 epoch 1 - iter 7/73 - loss 0.46587296 - samples/sec: 15.74 - lr: 0.020000\n",
-      "2021-09-08 02:15:06,378 epoch 1 - iter 14/73 - loss 0.25346189 - samples/sec: 13.66 - lr: 0.020000\n",
-      "2021-09-08 02:15:06,846 epoch 1 - iter 21/73 - loss 0.27196079 - samples/sec: 14.96 - lr: 0.020000\n",
-      "2021-09-08 02:15:07,232 epoch 1 - iter 28/73 - loss 0.21649399 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 02:15:07,629 epoch 1 - iter 35/73 - loss 0.28548811 - samples/sec: 17.64 - lr: 0.020000\n",
-      "2021-09-08 02:15:07,975 epoch 1 - iter 42/73 - loss 0.35767750 - samples/sec: 20.29 - lr: 0.020000\n",
-      "2021-09-08 02:15:08,343 epoch 1 - iter 49/73 - loss 0.37769211 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 02:15:08,876 epoch 1 - iter 56/73 - loss 0.40594764 - samples/sec: 13.16 - lr: 0.020000\n",
-      "2021-09-08 02:15:09,215 epoch 1 - iter 63/73 - loss 0.47623651 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 02:15:09,576 epoch 1 - iter 70/73 - loss 0.48002893 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 02:15:09,741 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:09,741 EPOCH 1 done: loss 0.4749 - lr 0.0200000\n",
-      "2021-09-08 02:15:10,024 DEV : loss 0.6221079230308533 - score 0.125\n",
-      "2021-09-08 02:15:10,025 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:15:14,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:14,852 epoch 2 - iter 7/73 - loss 0.70887283 - samples/sec: 16.19 - lr: 0.020000\n",
-      "2021-09-08 02:15:15,212 epoch 2 - iter 14/73 - loss 0.62225043 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 02:15:15,731 epoch 2 - iter 21/73 - loss 0.46528692 - samples/sec: 13.49 - lr: 0.020000\n",
-      "2021-09-08 02:15:16,110 epoch 2 - iter 28/73 - loss 0.41872364 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 02:15:16,557 epoch 2 - iter 35/73 - loss 0.45550705 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 02:15:17,016 epoch 2 - iter 42/73 - loss 0.41869861 - samples/sec: 15.29 - lr: 0.020000\n",
-      "2021-09-08 02:15:17,443 epoch 2 - iter 49/73 - loss 0.44559968 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 02:15:17,846 epoch 2 - iter 56/73 - loss 0.44236854 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 02:15:18,382 epoch 2 - iter 63/73 - loss 0.41439276 - samples/sec: 13.06 - lr: 0.020000\n",
-      "2021-09-08 02:15:18,757 epoch 2 - iter 70/73 - loss 0.40445584 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 02:15:18,902 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:18,902 EPOCH 2 done: loss 0.4039 - lr 0.0200000\n",
-      "2021-09-08 02:15:19,188 DEV : loss 0.4696251451969147 - score 0.5\n",
-      "2021-09-08 02:15:19,189 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:27:50,342 epoch 1 - iter 7/73 - loss 0.83825085 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 22:27:50,693 epoch 1 - iter 14/73 - loss 0.46492921 - samples/sec: 20.02 - lr: 0.020000\n",
+      "2021-09-21 22:27:51,038 epoch 1 - iter 21/73 - loss 0.54995204 - samples/sec: 20.31 - lr: 0.020000\n",
+      "2021-09-21 22:27:51,431 epoch 1 - iter 28/73 - loss 0.51211986 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 22:27:51,758 epoch 1 - iter 35/73 - loss 0.56996149 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 22:27:52,130 epoch 1 - iter 42/73 - loss 0.60109184 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 22:27:52,495 epoch 1 - iter 49/73 - loss 0.60929875 - samples/sec: 19.18 - lr: 0.020000\n",
+      "2021-09-21 22:27:52,975 epoch 1 - iter 56/73 - loss 0.59986171 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 22:27:53,325 epoch 1 - iter 63/73 - loss 0.59163780 - samples/sec: 19.99 - lr: 0.020000\n",
+      "2021-09-21 22:27:53,948 epoch 1 - iter 70/73 - loss 0.57718609 - samples/sec: 11.26 - lr: 0.020000\n",
+      "2021-09-21 22:27:54,162 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:54,163 EPOCH 1 done: loss 0.5539 - lr 0.0200000\n",
+      "2021-09-21 22:27:54,540 DEV : loss 0.2413215935230255 - score 0.875\n",
+      "2021-09-21 22:27:54,541 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:15:23,305 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:23,784 epoch 3 - iter 7/73 - loss 0.00510265 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 02:15:24,183 epoch 3 - iter 14/73 - loss 0.13125455 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 02:15:24,601 epoch 3 - iter 21/73 - loss 0.09008453 - samples/sec: 16.75 - lr: 0.020000\n",
-      "2021-09-08 02:15:24,992 epoch 3 - iter 28/73 - loss 0.14123405 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 02:15:25,354 epoch 3 - iter 35/73 - loss 0.11331543 - samples/sec: 19.35 - lr: 0.020000\n",
-      "2021-09-08 02:15:25,769 epoch 3 - iter 42/73 - loss 0.20668094 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 02:15:26,214 epoch 3 - iter 49/73 - loss 0.29642101 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 02:15:26,595 epoch 3 - iter 56/73 - loss 0.26409914 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:15:26,945 epoch 3 - iter 63/73 - loss 0.26819683 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 02:15:27,257 epoch 3 - iter 70/73 - loss 0.28282197 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 02:15:27,579 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:27,580 EPOCH 3 done: loss 0.2731 - lr 0.0200000\n",
-      "2021-09-08 02:15:27,878 DEV : loss 0.5648899674415588 - score 0.5\n",
-      "2021-09-08 02:15:27,879 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:15:27,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:28,288 epoch 4 - iter 7/73 - loss 0.34032176 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 02:15:28,775 epoch 4 - iter 14/73 - loss 0.19462340 - samples/sec: 14.41 - lr: 0.020000\n",
-      "2021-09-08 02:15:29,181 epoch 4 - iter 21/73 - loss 0.19874278 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 02:15:29,576 epoch 4 - iter 28/73 - loss 0.15138015 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 02:15:29,969 epoch 4 - iter 35/73 - loss 0.18929633 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 02:15:30,326 epoch 4 - iter 42/73 - loss 0.16982634 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 02:15:30,728 epoch 4 - iter 49/73 - loss 0.20752287 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 02:15:31,164 epoch 4 - iter 56/73 - loss 0.23274917 - samples/sec: 16.05 - lr: 0.020000\n",
-      "2021-09-08 02:15:31,579 epoch 4 - iter 63/73 - loss 0.21014705 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 02:15:31,971 epoch 4 - iter 70/73 - loss 0.20875932 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 02:15:32,131 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:32,131 EPOCH 4 done: loss 0.2007 - lr 0.0200000\n",
-      "2021-09-08 02:15:32,412 DEV : loss 0.45354682207107544 - score 0.625\n",
-      "2021-09-08 02:15:32,413 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:27:58,490 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:27:58,843 epoch 2 - iter 7/73 - loss 0.02806031 - samples/sec: 20.78 - lr: 0.020000\n",
+      "2021-09-21 22:27:59,240 epoch 2 - iter 14/73 - loss 0.25646356 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 22:27:59,600 epoch 2 - iter 21/73 - loss 0.32829844 - samples/sec: 19.50 - lr: 0.020000\n",
+      "2021-09-21 22:28:00,020 epoch 2 - iter 28/73 - loss 0.31097635 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 22:28:00,479 epoch 2 - iter 35/73 - loss 0.29649363 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 22:28:00,938 epoch 2 - iter 42/73 - loss 0.34901375 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 22:28:01,372 epoch 2 - iter 49/73 - loss 0.30109588 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 22:28:01,756 epoch 2 - iter 56/73 - loss 0.29627800 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 22:28:02,093 epoch 2 - iter 63/73 - loss 0.39286717 - samples/sec: 20.82 - lr: 0.020000\n",
+      "2021-09-21 22:28:02,531 epoch 2 - iter 70/73 - loss 0.35884134 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 22:28:02,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:02,765 EPOCH 2 done: loss 0.3443 - lr 0.0200000\n",
+      "2021-09-21 22:28:03,141 DEV : loss 0.04436252638697624 - score 1.0\n",
+      "2021-09-21 22:28:03,142 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:15:36,515 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:36,990 epoch 5 - iter 7/73 - loss 0.35691205 - samples/sec: 15.24 - lr: 0.020000\n",
-      "2021-09-08 02:15:37,402 epoch 5 - iter 14/73 - loss 0.20471928 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 02:15:37,785 epoch 5 - iter 21/73 - loss 0.13759816 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 02:15:38,110 epoch 5 - iter 28/73 - loss 0.12658865 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 02:15:38,482 epoch 5 - iter 35/73 - loss 0.10255703 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 02:15:38,908 epoch 5 - iter 42/73 - loss 0.08672061 - samples/sec: 16.44 - lr: 0.020000\n",
-      "2021-09-08 02:15:39,424 epoch 5 - iter 49/73 - loss 0.08134627 - samples/sec: 13.57 - lr: 0.020000\n",
-      "2021-09-08 02:15:39,786 epoch 5 - iter 56/73 - loss 0.11303924 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 02:15:40,138 epoch 5 - iter 63/73 - loss 0.10114135 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 02:15:40,506 epoch 5 - iter 70/73 - loss 0.09129911 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 02:15:40,752 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:40,753 EPOCH 5 done: loss 0.0876 - lr 0.0200000\n",
-      "2021-09-08 02:15:41,037 DEV : loss 0.706353485584259 - score 0.5\n",
-      "2021-09-08 02:15:41,038 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:15:41,040 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:41,418 epoch 6 - iter 7/73 - loss 0.01955481 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 02:15:41,892 epoch 6 - iter 14/73 - loss 0.01185962 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 02:15:42,281 epoch 6 - iter 21/73 - loss 0.00838548 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 02:15:42,852 epoch 6 - iter 28/73 - loss 0.00639484 - samples/sec: 12.27 - lr: 0.020000\n",
-      "2021-09-08 02:15:43,174 epoch 6 - iter 35/73 - loss 0.00526143 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 02:15:43,550 epoch 6 - iter 42/73 - loss 0.04350100 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 02:15:43,985 epoch 6 - iter 49/73 - loss 0.03741192 - samples/sec: 16.11 - lr: 0.020000\n",
-      "2021-09-08 02:15:44,419 epoch 6 - iter 56/73 - loss 0.03289793 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 02:15:44,869 epoch 6 - iter 63/73 - loss 0.02928307 - samples/sec: 15.60 - lr: 0.020000\n",
-      "2021-09-08 02:15:45,199 epoch 6 - iter 70/73 - loss 0.02648749 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 02:15:45,384 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:28:07,070 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:07,522 epoch 3 - iter 7/73 - loss 0.00600797 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 22:28:07,996 epoch 3 - iter 14/73 - loss 0.09651881 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 22:28:08,443 epoch 3 - iter 21/73 - loss 0.06684855 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 22:28:08,825 epoch 3 - iter 28/73 - loss 0.08945545 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 22:28:09,232 epoch 3 - iter 35/73 - loss 0.12072525 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 22:28:09,641 epoch 3 - iter 42/73 - loss 0.10163651 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 22:28:10,071 epoch 3 - iter 49/73 - loss 0.13447177 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 22:28:10,402 epoch 3 - iter 56/73 - loss 0.13926765 - samples/sec: 21.20 - lr: 0.020000\n",
+      "2021-09-21 22:28:10,732 epoch 3 - iter 63/73 - loss 0.12717362 - samples/sec: 21.24 - lr: 0.020000\n",
+      "2021-09-21 22:28:11,272 epoch 3 - iter 70/73 - loss 0.11547712 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 22:28:11,469 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:11,469 EPOCH 3 done: loss 0.1372 - lr 0.0200000\n",
+      "2021-09-21 22:28:11,851 DEV : loss 0.10065292567014694 - score 0.875\n",
+      "2021-09-21 22:28:11,852 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:28:11,854 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:12,358 epoch 4 - iter 7/73 - loss 0.22676316 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 22:28:12,878 epoch 4 - iter 14/73 - loss 0.18428374 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 22:28:13,197 epoch 4 - iter 21/73 - loss 0.27189282 - samples/sec: 21.97 - lr: 0.020000\n",
+      "2021-09-21 22:28:13,562 epoch 4 - iter 28/73 - loss 0.21155558 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 22:28:13,938 epoch 4 - iter 35/73 - loss 0.17238077 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 22:28:14,333 epoch 4 - iter 42/73 - loss 0.18667865 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 22:28:14,727 epoch 4 - iter 49/73 - loss 0.17492821 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 22:28:15,032 epoch 4 - iter 56/73 - loss 0.15325771 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 22:28:15,438 epoch 4 - iter 63/73 - loss 0.13634125 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 22:28:15,926 epoch 4 - iter 70/73 - loss 0.14607676 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 22:28:16,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:16,070 EPOCH 4 done: loss 0.1401 - lr 0.0200000\n",
+      "2021-09-21 22:28:16,447 DEV : loss 0.03417786955833435 - score 0.875\n",
+      "2021-09-21 22:28:16,448 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:28:16,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:17,005 epoch 5 - iter 7/73 - loss 0.00112714 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 22:28:17,408 epoch 5 - iter 14/73 - loss 0.03907056 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 22:28:17,742 epoch 5 - iter 21/73 - loss 0.02640136 - samples/sec: 21.02 - lr: 0.020000\n",
+      "2021-09-21 22:28:18,224 epoch 5 - iter 28/73 - loss 0.03101922 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 22:28:18,553 epoch 5 - iter 35/73 - loss 0.04139827 - samples/sec: 21.32 - lr: 0.020000\n",
+      "2021-09-21 22:28:18,883 epoch 5 - iter 42/73 - loss 0.05163584 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 22:28:19,417 epoch 5 - iter 49/73 - loss 0.05084267 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 22:28:19,803 epoch 5 - iter 56/73 - loss 0.04575600 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 22:28:20,133 epoch 5 - iter 63/73 - loss 0.05664461 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 22:28:20,516 epoch 5 - iter 70/73 - loss 0.06005591 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 22:28:20,652 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:20,653 EPOCH 5 done: loss 0.0576 - lr 0.0200000\n",
+      "2021-09-21 22:28:21,029 DEV : loss 0.08708903193473816 - score 0.875\n",
+      "2021-09-21 22:28:21,030 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:28:21,032 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:21,429 epoch 6 - iter 7/73 - loss 0.00065165 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 22:28:21,786 epoch 6 - iter 14/73 - loss 0.01331010 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 22:28:22,164 epoch 6 - iter 21/73 - loss 0.01817851 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 22:28:22,522 epoch 6 - iter 28/73 - loss 0.01390163 - samples/sec: 19.58 - lr: 0.020000\n",
+      "2021-09-21 22:28:22,944 epoch 6 - iter 35/73 - loss 0.01120384 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 22:28:23,348 epoch 6 - iter 42/73 - loss 0.00941123 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 22:28:23,779 epoch 6 - iter 49/73 - loss 0.00821564 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 22:28:24,191 epoch 6 - iter 56/73 - loss 0.00784350 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 22:28:24,498 epoch 6 - iter 63/73 - loss 0.00700521 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 22:28:24,932 epoch 6 - iter 70/73 - loss 0.00660986 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 22:28:25,205 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:15:45,384 EPOCH 6 done: loss 0.0254 - lr 0.0200000\n",
-      "2021-09-08 02:15:45,678 DEV : loss 0.6044124364852905 - score 0.625\n",
-      "2021-09-08 02:15:45,680 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:15:45,683 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:46,073 epoch 7 - iter 7/73 - loss 0.00026147 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 02:15:46,449 epoch 7 - iter 14/73 - loss 0.00036676 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 02:15:46,807 epoch 7 - iter 21/73 - loss 0.00035268 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 02:15:47,179 epoch 7 - iter 28/73 - loss 0.09370104 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 02:15:47,544 epoch 7 - iter 35/73 - loss 0.15139530 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 02:15:48,063 epoch 7 - iter 42/73 - loss 0.12655041 - samples/sec: 13.50 - lr: 0.020000\n",
-      "2021-09-08 02:15:48,486 epoch 7 - iter 49/73 - loss 0.11951388 - samples/sec: 16.59 - lr: 0.020000\n",
-      "2021-09-08 02:15:48,900 epoch 7 - iter 56/73 - loss 0.17127190 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 02:15:49,214 epoch 7 - iter 63/73 - loss 0.16002375 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 02:15:49,665 epoch 7 - iter 70/73 - loss 0.14418931 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 02:15:49,886 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:49,886 EPOCH 7 done: loss 0.1427 - lr 0.0200000\n",
-      "2021-09-08 02:15:50,170 DEV : loss 0.5153984427452087 - score 0.625\n",
-      "2021-09-08 02:15:50,171 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:15:50,246 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:50,743 epoch 8 - iter 7/73 - loss 0.00131913 - samples/sec: 14.52 - lr: 0.020000\n",
-      "2021-09-08 02:15:51,051 epoch 8 - iter 14/73 - loss 0.06035429 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:15:51,409 epoch 8 - iter 21/73 - loss 0.04042363 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 02:15:51,880 epoch 8 - iter 28/73 - loss 0.03169376 - samples/sec: 14.90 - lr: 0.020000\n",
-      "2021-09-08 02:15:52,246 epoch 8 - iter 35/73 - loss 0.02625481 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 02:15:52,639 epoch 8 - iter 42/73 - loss 0.02204723 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 02:15:53,099 epoch 8 - iter 49/73 - loss 0.01899220 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 02:15:53,546 epoch 8 - iter 56/73 - loss 0.01664652 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 02:15:53,947 epoch 8 - iter 63/73 - loss 0.01617567 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 02:15:54,263 epoch 8 - iter 70/73 - loss 0.01498163 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:15:54,435 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:54,435 EPOCH 8 done: loss 0.0356 - lr 0.0200000\n",
-      "2021-09-08 02:15:54,729 DEV : loss 0.5890305042266846 - score 0.625\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:15:54,730 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:15:54,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:55,116 epoch 9 - iter 7/73 - loss 0.00017051 - samples/sec: 18.99 - lr: 0.010000\n",
-      "2021-09-08 02:15:55,534 epoch 9 - iter 14/73 - loss 0.00033697 - samples/sec: 16.77 - lr: 0.010000\n",
-      "2021-09-08 02:15:55,937 epoch 9 - iter 21/73 - loss 0.00079170 - samples/sec: 17.40 - lr: 0.010000\n",
-      "2021-09-08 02:15:56,368 epoch 9 - iter 28/73 - loss 0.00071886 - samples/sec: 16.26 - lr: 0.010000\n",
-      "2021-09-08 02:15:56,693 epoch 9 - iter 35/73 - loss 0.01270254 - samples/sec: 21.54 - lr: 0.010000\n",
-      "2021-09-08 02:15:57,106 epoch 9 - iter 42/73 - loss 0.01067407 - samples/sec: 16.98 - lr: 0.010000\n",
-      "2021-09-08 02:15:57,576 epoch 9 - iter 49/73 - loss 0.00920799 - samples/sec: 14.90 - lr: 0.010000\n",
-      "2021-09-08 02:15:57,941 epoch 9 - iter 56/73 - loss 0.00812039 - samples/sec: 19.24 - lr: 0.010000\n",
-      "2021-09-08 02:15:58,407 epoch 9 - iter 63/73 - loss 0.00725273 - samples/sec: 15.02 - lr: 0.010000\n",
-      "2021-09-08 02:15:58,770 epoch 9 - iter 70/73 - loss 0.00660560 - samples/sec: 19.32 - lr: 0.010000\n",
-      "2021-09-08 02:15:58,903 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:15:58,904 EPOCH 9 done: loss 0.0063 - lr 0.0100000\n",
-      "2021-09-08 02:15:59,207 DEV : loss 0.4371097981929779 - score 0.75\n",
-      "2021-09-08 02:15:59,209 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:16:03,048 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:03,688 epoch 10 - iter 7/73 - loss 0.00017940 - samples/sec: 11.29 - lr: 0.010000\n",
-      "2021-09-08 02:16:04,054 epoch 10 - iter 14/73 - loss 0.00039325 - samples/sec: 19.17 - lr: 0.010000\n",
-      "2021-09-08 02:16:04,515 epoch 10 - iter 21/73 - loss 0.00031669 - samples/sec: 15.19 - lr: 0.010000\n",
-      "2021-09-08 02:16:05,004 epoch 10 - iter 28/73 - loss 0.00032746 - samples/sec: 14.34 - lr: 0.010000\n",
-      "2021-09-08 02:16:05,486 epoch 10 - iter 35/73 - loss 0.00039377 - samples/sec: 14.55 - lr: 0.010000\n",
-      "2021-09-08 02:16:05,919 epoch 10 - iter 42/73 - loss 0.00046710 - samples/sec: 16.19 - lr: 0.010000\n",
-      "2021-09-08 02:16:06,381 epoch 10 - iter 49/73 - loss 0.00047493 - samples/sec: 15.16 - lr: 0.010000\n",
-      "2021-09-08 02:16:06,818 epoch 10 - iter 56/73 - loss 0.00044634 - samples/sec: 16.06 - lr: 0.010000\n",
-      "2021-09-08 02:16:07,284 epoch 10 - iter 63/73 - loss 0.00042810 - samples/sec: 15.03 - lr: 0.010000\n",
-      "2021-09-08 02:16:07,695 epoch 10 - iter 70/73 - loss 0.00042101 - samples/sec: 17.04 - lr: 0.010000\n",
-      "2021-09-08 02:16:07,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:07,905 EPOCH 10 done: loss 0.0004 - lr 0.0100000\n",
-      "2021-09-08 02:16:08,197 DEV : loss 0.44877535104751587 - score 0.75\n",
-      "2021-09-08 02:16:08,199 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:16:11,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:11,950 Testing using best model ...\n",
-      "2021-09-08 02:16:11,952 loading file None/best-model.pt\n",
+      "2021-09-21 22:28:25,205 EPOCH 6 done: loss 0.0064 - lr 0.0200000\n",
+      "2021-09-21 22:28:25,735 DEV : loss 0.07299904525279999 - score 0.875\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:28:25,737 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:28:25,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:26,186 epoch 7 - iter 7/73 - loss 0.01215794 - samples/sec: 16.16 - lr: 0.010000\n",
+      "2021-09-21 22:28:26,561 epoch 7 - iter 14/73 - loss 0.00635323 - samples/sec: 18.68 - lr: 0.010000\n",
+      "2021-09-21 22:28:26,888 epoch 7 - iter 21/73 - loss 0.00435538 - samples/sec: 21.45 - lr: 0.010000\n",
+      "2021-09-21 22:28:27,307 epoch 7 - iter 28/73 - loss 0.00339839 - samples/sec: 16.74 - lr: 0.010000\n",
+      "2021-09-21 22:28:27,739 epoch 7 - iter 35/73 - loss 0.05573687 - samples/sec: 16.23 - lr: 0.010000\n",
+      "2021-09-21 22:28:28,051 epoch 7 - iter 42/73 - loss 0.04653017 - samples/sec: 22.50 - lr: 0.010000\n",
+      "2021-09-21 22:28:28,438 epoch 7 - iter 49/73 - loss 0.03992728 - samples/sec: 18.09 - lr: 0.010000\n",
+      "2021-09-21 22:28:28,789 epoch 7 - iter 56/73 - loss 0.03499356 - samples/sec: 19.97 - lr: 0.010000\n",
+      "2021-09-21 22:28:29,104 epoch 7 - iter 63/73 - loss 0.03112947 - samples/sec: 22.33 - lr: 0.010000\n",
+      "2021-09-21 22:28:29,731 epoch 7 - iter 70/73 - loss 0.02806192 - samples/sec: 11.16 - lr: 0.010000\n",
+      "2021-09-21 22:28:29,923 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:29,923 EPOCH 7 done: loss 0.0269 - lr 0.0100000\n",
+      "2021-09-21 22:28:30,300 DEV : loss 0.08553502708673477 - score 0.875\n",
+      "2021-09-21 22:28:30,302 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:28:30,304 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:30,696 epoch 8 - iter 7/73 - loss 0.00039780 - samples/sec: 18.51 - lr: 0.010000\n",
+      "2021-09-21 22:28:31,063 epoch 8 - iter 14/73 - loss 0.05517516 - samples/sec: 19.10 - lr: 0.010000\n",
+      "2021-09-21 22:28:31,570 epoch 8 - iter 21/73 - loss 0.03731257 - samples/sec: 13.82 - lr: 0.010000\n",
+      "2021-09-21 22:28:31,934 epoch 8 - iter 28/73 - loss 0.02839835 - samples/sec: 19.30 - lr: 0.010000\n",
+      "2021-09-21 22:28:32,280 epoch 8 - iter 35/73 - loss 0.02275953 - samples/sec: 20.23 - lr: 0.010000\n",
+      "2021-09-21 22:28:32,644 epoch 8 - iter 42/73 - loss 0.01909008 - samples/sec: 19.26 - lr: 0.010000\n",
+      "2021-09-21 22:28:33,109 epoch 8 - iter 49/73 - loss 0.01639665 - samples/sec: 15.08 - lr: 0.010000\n",
+      "2021-09-21 22:28:33,459 epoch 8 - iter 56/73 - loss 0.01439469 - samples/sec: 20.04 - lr: 0.010000\n",
+      "2021-09-21 22:28:33,901 epoch 8 - iter 63/73 - loss 0.01284899 - samples/sec: 15.87 - lr: 0.010000\n",
+      "2021-09-21 22:28:34,342 epoch 8 - iter 70/73 - loss 0.01158668 - samples/sec: 15.90 - lr: 0.010000\n",
+      "2021-09-21 22:28:34,475 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:34,475 EPOCH 8 done: loss 0.0111 - lr 0.0100000\n",
+      "2021-09-21 22:28:35,616 DEV : loss 0.06789565831422806 - score 1.0\n",
+      "2021-09-21 22:28:35,617 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:28:35,619 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:35,991 epoch 9 - iter 7/73 - loss 0.00033717 - samples/sec: 19.58 - lr: 0.010000\n",
+      "2021-09-21 22:28:36,397 epoch 9 - iter 14/73 - loss 0.00031655 - samples/sec: 17.29 - lr: 0.010000\n",
+      "2021-09-21 22:28:36,740 epoch 9 - iter 21/73 - loss 0.00041494 - samples/sec: 20.45 - lr: 0.010000\n",
+      "2021-09-21 22:28:37,053 epoch 9 - iter 28/73 - loss 0.00036017 - samples/sec: 22.39 - lr: 0.010000\n",
+      "2021-09-21 22:28:37,529 epoch 9 - iter 35/73 - loss 0.00032198 - samples/sec: 14.72 - lr: 0.010000\n",
+      "2021-09-21 22:28:37,903 epoch 9 - iter 42/73 - loss 0.00038251 - samples/sec: 18.72 - lr: 0.010000\n",
+      "2021-09-21 22:28:38,442 epoch 9 - iter 49/73 - loss 0.00036627 - samples/sec: 13.00 - lr: 0.010000\n",
+      "2021-09-21 22:28:38,792 epoch 9 - iter 56/73 - loss 0.00034151 - samples/sec: 20.06 - lr: 0.010000\n",
+      "2021-09-21 22:28:39,186 epoch 9 - iter 63/73 - loss 0.00055057 - samples/sec: 17.81 - lr: 0.010000\n",
+      "2021-09-21 22:28:39,640 epoch 9 - iter 70/73 - loss 0.00051287 - samples/sec: 15.42 - lr: 0.010000\n",
+      "2021-09-21 22:28:39,785 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:39,786 EPOCH 9 done: loss 0.0005 - lr 0.0100000\n",
+      "2021-09-21 22:28:40,318 DEV : loss 0.08898292481899261 - score 0.875\n",
+      "2021-09-21 22:28:40,319 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:28:40,342 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:40,744 epoch 10 - iter 7/73 - loss 0.00021928 - samples/sec: 18.04 - lr: 0.010000\n",
+      "2021-09-21 22:28:41,167 epoch 10 - iter 14/73 - loss 0.00027327 - samples/sec: 16.58 - lr: 0.010000\n",
+      "2021-09-21 22:28:41,524 epoch 10 - iter 21/73 - loss 0.00034615 - samples/sec: 19.66 - lr: 0.010000\n",
+      "2021-09-21 22:28:41,848 epoch 10 - iter 28/73 - loss 0.00031959 - samples/sec: 21.63 - lr: 0.010000\n",
+      "2021-09-21 22:28:42,240 epoch 10 - iter 35/73 - loss 0.04996147 - samples/sec: 17.86 - lr: 0.010000\n",
+      "2021-09-21 22:28:42,697 epoch 10 - iter 42/73 - loss 0.04167447 - samples/sec: 15.37 - lr: 0.010000\n",
+      "2021-09-21 22:28:43,137 epoch 10 - iter 49/73 - loss 0.03579112 - samples/sec: 15.90 - lr: 0.010000\n",
+      "2021-09-21 22:28:43,501 epoch 10 - iter 56/73 - loss 0.03134773 - samples/sec: 19.29 - lr: 0.010000\n",
+      "2021-09-21 22:28:43,872 epoch 10 - iter 63/73 - loss 0.02788735 - samples/sec: 18.89 - lr: 0.010000\n",
+      "2021-09-21 22:28:44,382 epoch 10 - iter 70/73 - loss 0.02513030 - samples/sec: 13.74 - lr: 0.010000\n",
+      "2021-09-21 22:28:44,530 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:44,531 EPOCH 10 done: loss 0.0241 - lr 0.0100000\n",
+      "2021-09-21 22:28:44,910 DEV : loss 0.10449357330799103 - score 0.875\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 22:28:44,911 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:28:48,876 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:28:48,877 Testing using best model ...\n",
+      "2021-09-21 22:28:48,878 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:16:17,471 \t0.7778\n",
-      "2021-09-08 02:16:17,472 \n",
+      "2021-09-21 22:28:52,979 \t0.8889\n",
+      "2021-09-21 22:28:52,980 \n",
       "Results:\n",
-      "- F-score (micro) 0.7778\n",
-      "- F-score (macro) 0.5556\n",
-      "- Accuracy 0.7778\n",
+      "- F-score (micro) 0.8889\n",
+      "- F-score (macro) 0.5397\n",
+      "- Accuracy 0.8889\n",
       "\n",
       "By class:\n",
       "                                                                                  precision    recall  f1-score   support\n",
       "\n",
       "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
       "                                     a particular branch of scientific knowledge     1.0000    1.0000    1.0000         1\n",
-      "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         1\n",
+      "                      an activity that is diverting and that holds the attention     1.0000    1.0000    1.0000         2\n",
       "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
-      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         1\n",
+      "                                  knowledge acquired by learning and instruction     1.0000    0.7500    0.8571         4\n",
       "                 an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         1\n",
-      "an extended social group having a distinctive cultural and economic organization     1.0000    1.0000    1.0000         1\n",
+      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
       "                          the commercial activity of providing funds and capital     1.0000    1.0000    1.0000         1\n",
-      "                     the study of government of states and other political units     1.0000    1.0000    1.0000         3\n",
+      "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                                                       micro avg     0.7778    0.7778    0.7778         9\n",
-      "                                                                       macro avg     0.5556    0.5556    0.5556         9\n",
-      "                                                                    weighted avg     0.7778    0.7778    0.7778         9\n",
-      "                                                                     samples avg     0.7778    0.7778    0.7778         9\n",
+      "                                                                       micro avg     0.8889    0.8889    0.8889         9\n",
+      "                                                                       macro avg     0.5556    0.5278    0.5397         9\n",
+      "                                                                    weighted avg     1.0000    0.8889    0.9365         9\n",
+      "                                                                     samples avg     0.8889    0.8889    0.8889         9\n",
       "\n",
-      "2021-09-08 02:16:17,472 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:28:52,980 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:16:33,691 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 22:29:08,902 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:16:37,619 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:29:12,793 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 82/82 [00:00<00:00, 17556.56it/s]"
+      "100%|██████████| 82/82 [00:00<00:00, 20883.66it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:16:37,625 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
-      "2021-09-08 02:16:37,641 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:37,643 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:29:12,798 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
+      "2021-09-21 22:29:12,810 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:12,811 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8404,25 +8423,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:16:37,643 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:37,643 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
-      "2021-09-08 02:16:37,644 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:37,644 Parameters:\n",
-      "2021-09-08 02:16:37,644  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:16:37,645  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:16:37,645  - patience: \"3\"\n",
-      "2021-09-08 02:16:37,645  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:16:37,645  - max_epochs: \"10\"\n",
-      "2021-09-08 02:16:37,646  - shuffle: \"True\"\n",
-      "2021-09-08 02:16:37,646  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:16:37,646  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:16:37,647 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:37,647 Model training base path: \"None\"\n",
-      "2021-09-08 02:16:37,647 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:37,647 Device: cuda:1\n",
-      "2021-09-08 02:16:37,648 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:37,648 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:16:37,655 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:29:12,812 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:12,812 Corpus: \"Corpus: 73 train + 8 dev + 9 test sentences\"\n",
+      "2021-09-21 22:29:12,813 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:12,813 Parameters:\n",
+      "2021-09-21 22:29:12,813  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:29:12,814  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:29:12,814  - patience: \"3\"\n",
+      "2021-09-21 22:29:12,814  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:29:12,814  - max_epochs: \"10\"\n",
+      "2021-09-21 22:29:12,815  - shuffle: \"True\"\n",
+      "2021-09-21 22:29:12,815  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:29:12,815  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:29:12,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:12,816 Model training base path: \"None\"\n",
+      "2021-09-21 22:29:12,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:12,816 Device: cuda:0\n",
+      "2021-09-21 22:29:12,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:12,817 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:29:12,824 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -8436,203 +8455,197 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:16:38,091 epoch 1 - iter 7/73 - loss 0.76350584 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 02:16:38,581 epoch 1 - iter 14/73 - loss 0.62434794 - samples/sec: 14.30 - lr: 0.020000\n",
-      "2021-09-08 02:16:38,977 epoch 1 - iter 21/73 - loss 0.49249623 - samples/sec: 17.72 - lr: 0.020000\n",
-      "2021-09-08 02:16:39,450 epoch 1 - iter 28/73 - loss 0.61129797 - samples/sec: 14.81 - lr: 0.020000\n",
-      "2021-09-08 02:16:39,928 epoch 1 - iter 35/73 - loss 0.59218327 - samples/sec: 14.67 - lr: 0.020000\n",
-      "2021-09-08 02:16:40,342 epoch 1 - iter 42/73 - loss 0.63695672 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 02:16:40,754 epoch 1 - iter 49/73 - loss 0.59833873 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 02:16:41,308 epoch 1 - iter 56/73 - loss 0.59154015 - samples/sec: 12.66 - lr: 0.020000\n",
-      "2021-09-08 02:16:41,754 epoch 1 - iter 63/73 - loss 0.58836874 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 02:16:42,232 epoch 1 - iter 70/73 - loss 0.59925292 - samples/sec: 14.67 - lr: 0.020000\n",
-      "2021-09-08 02:16:42,437 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:42,438 EPOCH 1 done: loss 0.5778 - lr 0.0200000\n",
-      "2021-09-08 02:16:42,661 DEV : loss 0.3173583149909973 - score 0.625\n",
-      "2021-09-08 02:16:42,662 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:29:13,201 epoch 1 - iter 7/73 - loss 0.80528855 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 22:29:13,647 epoch 1 - iter 14/73 - loss 0.66555588 - samples/sec: 15.71 - lr: 0.020000\n",
+      "2021-09-21 22:29:14,029 epoch 1 - iter 21/73 - loss 0.50273740 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 22:29:14,404 epoch 1 - iter 28/73 - loss 0.47222224 - samples/sec: 18.71 - lr: 0.020000\n",
+      "2021-09-21 22:29:14,729 epoch 1 - iter 35/73 - loss 0.51515631 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 22:29:15,071 epoch 1 - iter 42/73 - loss 0.47488175 - samples/sec: 20.51 - lr: 0.020000\n",
+      "2021-09-21 22:29:15,379 epoch 1 - iter 49/73 - loss 0.47649557 - samples/sec: 22.71 - lr: 0.020000\n",
+      "2021-09-21 22:29:15,797 epoch 1 - iter 56/73 - loss 0.50458030 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 22:29:16,146 epoch 1 - iter 63/73 - loss 0.52118054 - samples/sec: 20.06 - lr: 0.020000\n",
+      "2021-09-21 22:29:16,512 epoch 1 - iter 70/73 - loss 0.49835274 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 22:29:16,712 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:16,713 EPOCH 1 done: loss 0.4790 - lr 0.0200000\n",
+      "2021-09-21 22:29:17,085 DEV : loss 0.9469681978225708 - score 0.25\n",
+      "2021-09-21 22:29:17,086 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:29:21,469 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:21,817 epoch 2 - iter 7/73 - loss 0.86356037 - samples/sec: 21.16 - lr: 0.020000\n",
+      "2021-09-21 22:29:22,205 epoch 2 - iter 14/73 - loss 0.69302806 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 22:29:22,598 epoch 2 - iter 21/73 - loss 0.67474923 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 22:29:23,003 epoch 2 - iter 28/73 - loss 0.66704956 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 22:29:23,400 epoch 2 - iter 35/73 - loss 0.60009302 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 22:29:23,719 epoch 2 - iter 42/73 - loss 0.58270121 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 22:29:24,090 epoch 2 - iter 49/73 - loss 0.54467110 - samples/sec: 18.89 - lr: 0.020000\n",
+      "2021-09-21 22:29:24,448 epoch 2 - iter 56/73 - loss 0.50095722 - samples/sec: 19.59 - lr: 0.020000\n",
+      "2021-09-21 22:29:24,870 epoch 2 - iter 63/73 - loss 0.52502212 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 22:29:25,191 epoch 2 - iter 70/73 - loss 0.48348298 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 22:29:25,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:25,370 EPOCH 2 done: loss 0.4857 - lr 0.0200000\n",
+      "2021-09-21 22:29:25,782 DEV : loss 0.36500146985054016 - score 0.5\n",
+      "2021-09-21 22:29:25,783 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:16:46,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:47,370 epoch 2 - iter 7/73 - loss 0.71696332 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 02:16:47,830 epoch 2 - iter 14/73 - loss 0.59523832 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 02:16:48,278 epoch 2 - iter 21/73 - loss 0.51185901 - samples/sec: 15.64 - lr: 0.020000\n",
-      "2021-09-08 02:16:48,660 epoch 2 - iter 28/73 - loss 0.48048414 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 02:16:49,096 epoch 2 - iter 35/73 - loss 0.44563411 - samples/sec: 16.08 - lr: 0.020000\n",
-      "2021-09-08 02:16:49,624 epoch 2 - iter 42/73 - loss 0.41017571 - samples/sec: 13.26 - lr: 0.020000\n",
-      "2021-09-08 02:16:50,054 epoch 2 - iter 49/73 - loss 0.40395344 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 02:16:50,489 epoch 2 - iter 56/73 - loss 0.38713715 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 02:16:50,996 epoch 2 - iter 63/73 - loss 0.39887875 - samples/sec: 13.82 - lr: 0.020000\n",
-      "2021-09-08 02:16:51,428 epoch 2 - iter 70/73 - loss 0.41481856 - samples/sec: 16.23 - lr: 0.020000\n",
-      "2021-09-08 02:16:51,702 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:51,703 EPOCH 2 done: loss 0.4322 - lr 0.0200000\n",
-      "2021-09-08 02:16:51,930 DEV : loss 0.3056710958480835 - score 0.625\n",
-      "2021-09-08 02:16:51,931 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:29:29,685 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:30,177 epoch 3 - iter 7/73 - loss 0.24610930 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 22:29:30,513 epoch 3 - iter 14/73 - loss 0.20010839 - samples/sec: 20.85 - lr: 0.020000\n",
+      "2021-09-21 22:29:30,836 epoch 3 - iter 21/73 - loss 0.18035646 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 22:29:31,162 epoch 3 - iter 28/73 - loss 0.32631091 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 22:29:31,503 epoch 3 - iter 35/73 - loss 0.26381555 - samples/sec: 20.52 - lr: 0.020000\n",
+      "2021-09-21 22:29:31,950 epoch 3 - iter 42/73 - loss 0.29653273 - samples/sec: 15.71 - lr: 0.020000\n",
+      "2021-09-21 22:29:32,288 epoch 3 - iter 49/73 - loss 0.30775334 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 22:29:32,676 epoch 3 - iter 56/73 - loss 0.30939347 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 22:29:33,080 epoch 3 - iter 63/73 - loss 0.30370226 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 22:29:33,394 epoch 3 - iter 70/73 - loss 0.30688802 - samples/sec: 22.31 - lr: 0.020000\n",
+      "2021-09-21 22:29:33,527 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:33,527 EPOCH 3 done: loss 0.2948 - lr 0.0200000\n",
+      "2021-09-21 22:29:33,898 DEV : loss 0.34013327956199646 - score 0.625\n",
+      "2021-09-21 22:29:33,899 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:16:56,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:16:56,595 epoch 3 - iter 7/73 - loss 0.29761147 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 02:16:57,252 epoch 3 - iter 14/73 - loss 0.26141401 - samples/sec: 10.66 - lr: 0.020000\n",
-      "2021-09-08 02:16:57,735 epoch 3 - iter 21/73 - loss 0.30548466 - samples/sec: 14.50 - lr: 0.020000\n",
-      "2021-09-08 02:16:58,132 epoch 3 - iter 28/73 - loss 0.23658994 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 02:16:58,522 epoch 3 - iter 35/73 - loss 0.23242934 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 02:16:58,994 epoch 3 - iter 42/73 - loss 0.24364890 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 02:16:59,420 epoch 3 - iter 49/73 - loss 0.22422199 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 02:16:59,919 epoch 3 - iter 56/73 - loss 0.21767984 - samples/sec: 14.05 - lr: 0.020000\n",
-      "2021-09-08 02:17:00,384 epoch 3 - iter 63/73 - loss 0.23266035 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 02:17:00,774 epoch 3 - iter 70/73 - loss 0.24224093 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 02:17:00,991 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:00,991 EPOCH 3 done: loss 0.2528 - lr 0.0200000\n",
-      "2021-09-08 02:17:01,216 DEV : loss 0.39015164971351624 - score 0.375\n",
-      "2021-09-08 02:17:01,216 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:17:01,218 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:01,687 epoch 4 - iter 7/73 - loss 0.32868951 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 02:17:02,119 epoch 4 - iter 14/73 - loss 0.40497574 - samples/sec: 16.23 - lr: 0.020000\n",
-      "2021-09-08 02:17:02,548 epoch 4 - iter 21/73 - loss 0.29030918 - samples/sec: 16.35 - lr: 0.020000\n",
-      "2021-09-08 02:17:03,024 epoch 4 - iter 28/73 - loss 0.25434148 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 02:17:03,504 epoch 4 - iter 35/73 - loss 0.22224446 - samples/sec: 14.60 - lr: 0.020000\n",
-      "2021-09-08 02:17:03,950 epoch 4 - iter 42/73 - loss 0.20429832 - samples/sec: 15.73 - lr: 0.020000\n",
-      "2021-09-08 02:17:04,377 epoch 4 - iter 49/73 - loss 0.17618108 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 02:17:04,794 epoch 4 - iter 56/73 - loss 0.17137784 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 02:17:05,216 epoch 4 - iter 63/73 - loss 0.16344196 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 02:17:05,683 epoch 4 - iter 70/73 - loss 0.14814955 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 02:17:05,852 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:05,853 EPOCH 4 done: loss 0.1427 - lr 0.0200000\n",
-      "2021-09-08 02:17:06,075 DEV : loss 0.38390466570854187 - score 0.875\n",
-      "2021-09-08 02:17:06,076 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:29:37,850 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:38,301 epoch 4 - iter 7/73 - loss 0.44907272 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 22:29:38,684 epoch 4 - iter 14/73 - loss 0.23017069 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 22:29:39,009 epoch 4 - iter 21/73 - loss 0.24603308 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 22:29:39,313 epoch 4 - iter 28/73 - loss 0.18518483 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 22:29:39,708 epoch 4 - iter 35/73 - loss 0.20872335 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 22:29:40,103 epoch 4 - iter 42/73 - loss 0.18966705 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 22:29:40,483 epoch 4 - iter 49/73 - loss 0.20853461 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 22:29:40,841 epoch 4 - iter 56/73 - loss 0.18411064 - samples/sec: 19.58 - lr: 0.020000\n",
+      "2021-09-21 22:29:41,207 epoch 4 - iter 63/73 - loss 0.19702175 - samples/sec: 19.18 - lr: 0.020000\n",
+      "2021-09-21 22:29:41,514 epoch 4 - iter 70/73 - loss 0.18018315 - samples/sec: 22.80 - lr: 0.020000\n",
+      "2021-09-21 22:29:41,658 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:41,659 EPOCH 4 done: loss 0.1729 - lr 0.0200000\n",
+      "2021-09-21 22:29:42,132 DEV : loss 0.40710267424583435 - score 0.75\n",
+      "2021-09-21 22:29:42,133 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:17:10,104 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:10,583 epoch 5 - iter 7/73 - loss 0.09229815 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 02:17:10,924 epoch 5 - iter 14/73 - loss 0.04830378 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 02:17:11,299 epoch 5 - iter 21/73 - loss 0.03563001 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 02:17:11,717 epoch 5 - iter 28/73 - loss 0.04371169 - samples/sec: 16.78 - lr: 0.020000\n",
-      "2021-09-08 02:17:12,145 epoch 5 - iter 35/73 - loss 0.15491106 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 02:17:12,474 epoch 5 - iter 42/73 - loss 0.13409420 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 02:17:12,791 epoch 5 - iter 49/73 - loss 0.11683304 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:17:13,250 epoch 5 - iter 56/73 - loss 0.12189231 - samples/sec: 15.27 - lr: 0.020000\n",
-      "2021-09-08 02:17:13,585 epoch 5 - iter 63/73 - loss 0.10847719 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 02:17:14,025 epoch 5 - iter 70/73 - loss 0.09871577 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 02:17:14,216 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:14,217 EPOCH 5 done: loss 0.0962 - lr 0.0200000\n",
-      "2021-09-08 02:17:14,435 DEV : loss 0.32247892022132874 - score 0.875\n",
-      "2021-09-08 02:17:14,436 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:29:47,075 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:47,453 epoch 5 - iter 7/73 - loss 0.05687793 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 22:29:47,812 epoch 5 - iter 14/73 - loss 0.13542200 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 22:29:48,170 epoch 5 - iter 21/73 - loss 0.17943188 - samples/sec: 19.58 - lr: 0.020000\n",
+      "2021-09-21 22:29:48,727 epoch 5 - iter 28/73 - loss 0.13574622 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 22:29:49,046 epoch 5 - iter 35/73 - loss 0.10885603 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 22:29:49,508 epoch 5 - iter 42/73 - loss 0.09131569 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 22:29:49,854 epoch 5 - iter 49/73 - loss 0.10533228 - samples/sec: 20.32 - lr: 0.020000\n",
+      "2021-09-21 22:29:50,205 epoch 5 - iter 56/73 - loss 0.10525162 - samples/sec: 19.95 - lr: 0.020000\n",
+      "2021-09-21 22:29:50,542 epoch 5 - iter 63/73 - loss 0.09369621 - samples/sec: 20.84 - lr: 0.020000\n",
+      "2021-09-21 22:29:50,895 epoch 5 - iter 70/73 - loss 0.08835629 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 22:29:51,043 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:51,043 EPOCH 5 done: loss 0.1025 - lr 0.0200000\n",
+      "2021-09-21 22:29:51,428 DEV : loss 0.2980607748031616 - score 0.75\n",
+      "2021-09-21 22:29:51,429 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:17:18,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:18,643 epoch 6 - iter 7/73 - loss 0.01520061 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 02:17:19,019 epoch 6 - iter 14/73 - loss 0.00936662 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 02:17:19,514 epoch 6 - iter 21/73 - loss 0.02297107 - samples/sec: 14.15 - lr: 0.020000\n",
-      "2021-09-08 02:17:19,863 epoch 6 - iter 28/73 - loss 0.01745004 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 02:17:20,184 epoch 6 - iter 35/73 - loss 0.01499158 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 02:17:20,598 epoch 6 - iter 42/73 - loss 0.01278387 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 02:17:21,032 epoch 6 - iter 49/73 - loss 0.01257560 - samples/sec: 16.15 - lr: 0.020000\n",
-      "2021-09-08 02:17:21,431 epoch 6 - iter 56/73 - loss 0.01224220 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 02:17:21,808 epoch 6 - iter 63/73 - loss 0.01118609 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 02:17:22,138 epoch 6 - iter 70/73 - loss 0.01021485 - samples/sec: 21.26 - lr: 0.020000\n"
+      "2021-09-21 22:29:55,338 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:55,668 epoch 6 - iter 7/73 - loss 0.02121571 - samples/sec: 22.33 - lr: 0.020000\n",
+      "2021-09-21 22:29:55,982 epoch 6 - iter 14/73 - loss 0.14000310 - samples/sec: 22.39 - lr: 0.020000\n",
+      "2021-09-21 22:29:56,433 epoch 6 - iter 21/73 - loss 0.09419836 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 22:29:56,841 epoch 6 - iter 28/73 - loss 0.10696820 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 22:29:57,168 epoch 6 - iter 35/73 - loss 0.08573295 - samples/sec: 21.45 - lr: 0.020000\n",
+      "2021-09-21 22:29:57,524 epoch 6 - iter 42/73 - loss 0.11397475 - samples/sec: 19.68 - lr: 0.020000\n",
+      "2021-09-21 22:29:57,884 epoch 6 - iter 49/73 - loss 0.12090770 - samples/sec: 19.47 - lr: 0.020000\n",
+      "2021-09-21 22:29:58,235 epoch 6 - iter 56/73 - loss 0.10607405 - samples/sec: 19.98 - lr: 0.020000\n",
+      "2021-09-21 22:29:58,563 epoch 6 - iter 63/73 - loss 0.09455249 - samples/sec: 21.38 - lr: 0.020000\n",
+      "2021-09-21 22:29:58,983 epoch 6 - iter 70/73 - loss 0.08514191 - samples/sec: 16.71 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:17:22,313 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:22,314 EPOCH 6 done: loss 0.0123 - lr 0.0200000\n",
-      "2021-09-08 02:17:22,531 DEV : loss 0.5563281774520874 - score 0.625\n",
-      "2021-09-08 02:17:22,532 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:17:22,533 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:22,866 epoch 7 - iter 7/73 - loss 0.09590859 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 02:17:23,319 epoch 7 - iter 14/73 - loss 0.04820397 - samples/sec: 15.49 - lr: 0.020000\n",
-      "2021-09-08 02:17:23,787 epoch 7 - iter 21/73 - loss 0.03220584 - samples/sec: 14.96 - lr: 0.020000\n",
-      "2021-09-08 02:17:24,161 epoch 7 - iter 28/73 - loss 0.02443362 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 02:17:24,599 epoch 7 - iter 35/73 - loss 0.01964652 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 02:17:24,974 epoch 7 - iter 42/73 - loss 0.01639869 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 02:17:25,321 epoch 7 - iter 49/73 - loss 0.01412561 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 02:17:25,786 epoch 7 - iter 56/73 - loss 0.01239637 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 02:17:26,137 epoch 7 - iter 63/73 - loss 0.04696450 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 02:17:26,465 epoch 7 - iter 70/73 - loss 0.04231974 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 02:17:26,600 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:26,600 EPOCH 7 done: loss 0.0406 - lr 0.0200000\n",
-      "2021-09-08 02:17:26,817 DEV : loss 0.4344191551208496 - score 0.625\n",
-      "2021-09-08 02:17:26,818 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:17:26,821 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:27,154 epoch 8 - iter 7/73 - loss 0.00028587 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 02:17:27,522 epoch 8 - iter 14/73 - loss 0.00344869 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 02:17:28,038 epoch 8 - iter 21/73 - loss 0.00240881 - samples/sec: 13.56 - lr: 0.020000\n",
-      "2021-09-08 02:17:28,426 epoch 8 - iter 28/73 - loss 0.00186483 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 02:17:28,830 epoch 8 - iter 35/73 - loss 0.08733293 - samples/sec: 17.32 - lr: 0.020000\n",
-      "2021-09-08 02:17:29,217 epoch 8 - iter 42/73 - loss 0.07283988 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:17:29,577 epoch 8 - iter 49/73 - loss 0.06245961 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 02:17:29,944 epoch 8 - iter 56/73 - loss 0.05468672 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 02:17:30,356 epoch 8 - iter 63/73 - loss 0.04866434 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 02:17:30,926 epoch 8 - iter 70/73 - loss 0.04384992 - samples/sec: 12.29 - lr: 0.020000\n",
-      "2021-09-08 02:17:31,059 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:31,059 EPOCH 8 done: loss 0.0421 - lr 0.0200000\n",
-      "2021-09-08 02:17:31,297 DEV : loss 0.3270512819290161 - score 0.75\n",
-      "2021-09-08 02:17:31,298 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:17:31,316 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:31,795 epoch 9 - iter 7/73 - loss 0.03120351 - samples/sec: 15.09 - lr: 0.020000\n",
-      "2021-09-08 02:17:32,115 epoch 9 - iter 14/73 - loss 0.01583401 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 02:17:32,493 epoch 9 - iter 21/73 - loss 0.01064857 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 02:17:32,927 epoch 9 - iter 28/73 - loss 0.00885021 - samples/sec: 16.14 - lr: 0.020000\n",
-      "2021-09-08 02:17:33,350 epoch 9 - iter 35/73 - loss 0.00715459 - samples/sec: 16.59 - lr: 0.020000\n",
-      "2021-09-08 02:17:33,700 epoch 9 - iter 42/73 - loss 0.00658762 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 02:17:34,154 epoch 9 - iter 49/73 - loss 0.01676815 - samples/sec: 15.44 - lr: 0.020000\n",
-      "2021-09-08 02:17:34,492 epoch 9 - iter 56/73 - loss 0.02205994 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 02:17:34,873 epoch 9 - iter 63/73 - loss 0.01966635 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:17:35,202 epoch 9 - iter 70/73 - loss 0.01771412 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 02:17:35,388 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:35,388 EPOCH 9 done: loss 0.0170 - lr 0.0200000\n",
-      "2021-09-08 02:17:35,607 DEV : loss 0.3679582476615906 - score 0.875\n",
+      "2021-09-21 22:29:59,118 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:59,119 EPOCH 6 done: loss 0.1065 - lr 0.0200000\n",
+      "2021-09-21 22:29:59,491 DEV : loss 0.362339586019516 - score 0.75\n",
+      "2021-09-21 22:29:59,493 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:29:59,495 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:29:59,816 epoch 7 - iter 7/73 - loss 0.00150425 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:30:00,252 epoch 7 - iter 14/73 - loss 0.00123503 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 22:30:00,602 epoch 7 - iter 21/73 - loss 0.00103982 - samples/sec: 19.99 - lr: 0.020000\n",
+      "2021-09-21 22:30:00,980 epoch 7 - iter 28/73 - loss 0.09803852 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 22:30:01,397 epoch 7 - iter 35/73 - loss 0.07857161 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 22:30:01,810 epoch 7 - iter 42/73 - loss 0.06702617 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 22:30:02,134 epoch 7 - iter 49/73 - loss 0.05773292 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 22:30:02,464 epoch 7 - iter 56/73 - loss 0.07277667 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 22:30:02,801 epoch 7 - iter 63/73 - loss 0.06479601 - samples/sec: 20.79 - lr: 0.020000\n",
+      "2021-09-21 22:30:03,136 epoch 7 - iter 70/73 - loss 0.05886708 - samples/sec: 20.98 - lr: 0.020000\n",
+      "2021-09-21 22:30:03,270 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:03,271 EPOCH 7 done: loss 0.0565 - lr 0.0200000\n",
+      "2021-09-21 22:30:03,642 DEV : loss 0.6335262656211853 - score 0.75\n",
+      "2021-09-21 22:30:03,643 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:30:03,645 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:04,013 epoch 8 - iter 7/73 - loss 0.00043618 - samples/sec: 19.81 - lr: 0.020000\n",
+      "2021-09-21 22:30:04,343 epoch 8 - iter 14/73 - loss 0.00198629 - samples/sec: 21.27 - lr: 0.020000\n",
+      "2021-09-21 22:30:04,813 epoch 8 - iter 21/73 - loss 0.03501525 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 22:30:05,158 epoch 8 - iter 28/73 - loss 0.02663027 - samples/sec: 20.34 - lr: 0.020000\n",
+      "2021-09-21 22:30:05,571 epoch 8 - iter 35/73 - loss 0.07079924 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 22:30:05,919 epoch 8 - iter 42/73 - loss 0.05903289 - samples/sec: 20.16 - lr: 0.020000\n",
+      "2021-09-21 22:30:06,249 epoch 8 - iter 49/73 - loss 0.09984093 - samples/sec: 21.24 - lr: 0.020000\n",
+      "2021-09-21 22:30:06,639 epoch 8 - iter 56/73 - loss 0.09961785 - samples/sec: 17.98 - lr: 0.020000\n",
+      "2021-09-21 22:30:06,973 epoch 8 - iter 63/73 - loss 0.08906856 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 22:30:07,298 epoch 8 - iter 70/73 - loss 0.08215197 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 22:30:07,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:07,428 EPOCH 8 done: loss 0.0788 - lr 0.0200000\n",
+      "2021-09-21 22:30:07,799 DEV : loss 0.6507712602615356 - score 0.625\n",
+      "2021-09-21 22:30:07,800 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:30:07,802 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:08,126 epoch 9 - iter 7/73 - loss 0.00121455 - samples/sec: 22.65 - lr: 0.020000\n",
+      "2021-09-21 22:30:08,563 epoch 9 - iter 14/73 - loss 0.01067160 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 22:30:08,926 epoch 9 - iter 21/73 - loss 0.00751681 - samples/sec: 19.27 - lr: 0.020000\n",
+      "2021-09-21 22:30:09,290 epoch 9 - iter 28/73 - loss 0.00735826 - samples/sec: 19.30 - lr: 0.020000\n",
+      "2021-09-21 22:30:09,654 epoch 9 - iter 35/73 - loss 0.00600076 - samples/sec: 19.24 - lr: 0.020000\n",
+      "2021-09-21 22:30:10,042 epoch 9 - iter 42/73 - loss 0.01364645 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 22:30:10,443 epoch 9 - iter 49/73 - loss 0.01386246 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 22:30:10,761 epoch 9 - iter 56/73 - loss 0.05235639 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 22:30:11,080 epoch 9 - iter 63/73 - loss 0.04813017 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 22:30:11,441 epoch 9 - iter 70/73 - loss 0.04335237 - samples/sec: 19.41 - lr: 0.020000\n",
+      "2021-09-21 22:30:11,571 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:11,572 EPOCH 9 done: loss 0.0416 - lr 0.0200000\n",
+      "2021-09-21 22:30:12,102 DEV : loss 0.5646776556968689 - score 0.625\n",
       "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:17:35,608 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:17:35,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:35,981 epoch 10 - iter 7/73 - loss 0.00017395 - samples/sec: 19.63 - lr: 0.010000\n",
-      "2021-09-08 02:17:36,354 epoch 10 - iter 14/73 - loss 0.00021143 - samples/sec: 18.79 - lr: 0.010000\n",
-      "2021-09-08 02:17:36,713 epoch 10 - iter 21/73 - loss 0.00022728 - samples/sec: 19.54 - lr: 0.010000\n",
-      "2021-09-08 02:17:37,070 epoch 10 - iter 28/73 - loss 0.00019798 - samples/sec: 19.64 - lr: 0.010000\n",
-      "2021-09-08 02:17:37,418 epoch 10 - iter 35/73 - loss 0.00018273 - samples/sec: 20.16 - lr: 0.010000\n",
-      "2021-09-08 02:17:37,835 epoch 10 - iter 42/73 - loss 0.02577811 - samples/sec: 16.80 - lr: 0.010000\n",
-      "2021-09-08 02:17:38,155 epoch 10 - iter 49/73 - loss 0.04859765 - samples/sec: 21.88 - lr: 0.010000\n",
-      "2021-09-08 02:17:38,685 epoch 10 - iter 56/73 - loss 0.04259337 - samples/sec: 13.23 - lr: 0.010000\n",
-      "2021-09-08 02:17:39,055 epoch 10 - iter 63/73 - loss 0.05267802 - samples/sec: 18.96 - lr: 0.010000\n",
-      "2021-09-08 02:17:39,468 epoch 10 - iter 70/73 - loss 0.04780906 - samples/sec: 16.95 - lr: 0.010000\n",
-      "2021-09-08 02:17:39,674 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:39,674 EPOCH 10 done: loss 0.0459 - lr 0.0100000\n",
-      "2021-09-08 02:17:39,892 DEV : loss 0.34520256519317627 - score 0.875\n",
-      "2021-09-08 02:17:39,893 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:17:43,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:17:43,737 Testing using best model ...\n",
-      "2021-09-08 02:17:43,739 loading file None/best-model.pt\n",
+      "2021-09-21 22:30:12,103 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:30:12,105 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:12,476 epoch 10 - iter 7/73 - loss 0.00035312 - samples/sec: 19.72 - lr: 0.010000\n",
+      "2021-09-21 22:30:12,820 epoch 10 - iter 14/73 - loss 0.00024647 - samples/sec: 20.37 - lr: 0.010000\n",
+      "2021-09-21 22:30:13,177 epoch 10 - iter 21/73 - loss 0.02646152 - samples/sec: 19.62 - lr: 0.010000\n",
+      "2021-09-21 22:30:13,635 epoch 10 - iter 28/73 - loss 0.01998651 - samples/sec: 15.32 - lr: 0.010000\n",
+      "2021-09-21 22:30:13,987 epoch 10 - iter 35/73 - loss 0.01602424 - samples/sec: 19.88 - lr: 0.010000\n",
+      "2021-09-21 22:30:14,318 epoch 10 - iter 42/73 - loss 0.01338276 - samples/sec: 21.21 - lr: 0.010000\n",
+      "2021-09-21 22:30:14,637 epoch 10 - iter 49/73 - loss 0.01150333 - samples/sec: 22.01 - lr: 0.010000\n",
+      "2021-09-21 22:30:15,006 epoch 10 - iter 56/73 - loss 0.01010566 - samples/sec: 18.99 - lr: 0.010000\n",
+      "2021-09-21 22:30:15,376 epoch 10 - iter 63/73 - loss 0.00906344 - samples/sec: 18.94 - lr: 0.010000\n",
+      "2021-09-21 22:30:15,724 epoch 10 - iter 70/73 - loss 0.00818618 - samples/sec: 20.16 - lr: 0.010000\n",
+      "2021-09-21 22:30:15,858 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:15,858 EPOCH 10 done: loss 0.0079 - lr 0.0100000\n",
+      "2021-09-21 22:30:16,231 DEV : loss 0.5914800763130188 - score 0.625\n",
+      "2021-09-21 22:30:16,232 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:30:20,364 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:30:20,365 Testing using best model ...\n",
+      "2021-09-21 22:30:20,367 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:17:48,980 \t0.7778\n",
-      "2021-09-08 02:17:48,981 \n",
+      "2021-09-21 22:30:24,631 \t0.4444\n",
+      "2021-09-21 22:30:24,632 \n",
       "Results:\n",
-      "- F-score (micro) 0.7778\n",
-      "- F-score (macro) 0.5185\n",
-      "- Accuracy 0.7778\n",
+      "- F-score (micro) 0.4444\n",
+      "- F-score (macro) 0.2593\n",
+      "- Accuracy 0.4444\n",
       "\n",
       "By class:\n",
       "                                                                                  precision    recall  f1-score   support\n",
       "\n",
-      "                                                   a social unit living together     0.5000    1.0000    0.6667         1\n",
-      "                                     a particular branch of scientific knowledge     1.0000    1.0000    1.0000         1\n",
-      "                      an activity that is diverting and that holds the attention     0.5000    1.0000    0.6667         1\n",
-      "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
-      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
+      "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
+      "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
+      "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         2\n",
+      "                             a machine for performing calculations automatically     1.0000    0.5000    0.6667         2\n",
+      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         2\n",
       "                 an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         2\n",
-      "an extended social group having a distinctive cultural and economic organization     1.0000    0.5000    0.6667         2\n",
-      "                          the commercial activity of providing funds and capital     1.0000    0.5000    0.6667         2\n",
+      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
+      "                          the commercial activity of providing funds and capital     0.5000    1.0000    0.6667         1\n",
       "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                                                       micro avg     0.7778    0.7778    0.7778         9\n",
-      "                                                                       macro avg     0.5556    0.5556    0.5185         9\n",
-      "                                                                    weighted avg     0.8889    0.7778    0.7778         9\n",
-      "                                                                     samples avg     0.7778    0.7778    0.7778         9\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 02:17:48,981 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.6161559888579387\n"
+      "                                                                       micro avg     0.4444    0.4444    0.4444         9\n",
+      "                                                                       macro avg     0.2778    0.2778    0.2593         9\n",
+      "                                                                    weighted avg     0.5000    0.4444    0.4444         9\n",
+      "                                                                     samples avg     0.4444    0.4444    0.4444         9\n",
+      "\n",
+      "2021-09-21 22:30:24,632 ----------------------------------------------------------------------------------------------------\n"
      ]
     }
    ],
@@ -8713,6 +8726,17 @@
    "id": "a310936c",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f62717b",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
diff --git a/fewshot/topic_yin_fewshot.ipynb b/fewshot/topic_yin_fewshot.ipynb
index 661f05d..75b3842 100644
--- a/fewshot/topic_yin_fewshot.ipynb
+++ b/fewshot/topic_yin_fewshot.ipynb
@@ -70,25 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:33,165 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 21:51:40,028 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:53:42,663 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:51:46,844 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 84592.95it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 86713.17it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:42,667 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
-      "2021-09-08 01:53:42,670 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:42,672 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:51:46,848 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
+      "2021-09-21 21:51:46,851 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:46,853 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -401,25 +401,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:42,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:42,673 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 01:53:42,674 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:42,674 Parameters:\n",
-      "2021-09-08 01:53:42,674  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:53:42,675  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:53:42,676  - patience: \"3\"\n",
-      "2021-09-08 01:53:42,677  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:53:42,677  - max_epochs: \"10\"\n",
-      "2021-09-08 01:53:42,677  - shuffle: \"True\"\n",
-      "2021-09-08 01:53:42,678  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:53:42,678  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:53:42,678 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:42,679 Model training base path: \"temp\"\n",
-      "2021-09-08 01:53:42,679 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:42,680 Device: cuda:0\n",
-      "2021-09-08 01:53:42,680 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:42,680 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:53:42,687 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:51:46,853 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:46,854 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 21:51:46,854 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:46,854 Parameters:\n",
+      "2021-09-21 21:51:46,855  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:51:46,856  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:51:46,856  - patience: \"3\"\n",
+      "2021-09-21 21:51:46,857  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:51:46,857  - max_epochs: \"10\"\n",
+      "2021-09-21 21:51:46,858  - shuffle: \"True\"\n",
+      "2021-09-21 21:51:46,858  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:51:46,858  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:51:46,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:46,859 Model training base path: \"temp\"\n",
+      "2021-09-21 21:51:46,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:46,860 Device: cuda:0\n",
+      "2021-09-21 21:51:46,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:46,861 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:51:46,869 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -433,221 +433,222 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:53:43,460 epoch 1 - iter 13/130 - loss 0.45809358 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 01:53:44,212 epoch 1 - iter 26/130 - loss 0.47946470 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 01:53:44,972 epoch 1 - iter 39/130 - loss 0.48413356 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 01:53:45,671 epoch 1 - iter 52/130 - loss 0.52272945 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 01:53:46,425 epoch 1 - iter 65/130 - loss 0.58134841 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 01:53:47,170 epoch 1 - iter 78/130 - loss 0.58698411 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:53:47,923 epoch 1 - iter 91/130 - loss 0.55892048 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 01:53:48,660 epoch 1 - iter 104/130 - loss 0.55417945 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 01:53:49,356 epoch 1 - iter 117/130 - loss 0.51843301 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 01:53:50,089 epoch 1 - iter 130/130 - loss 0.53041054 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 01:53:50,091 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:50,091 EPOCH 1 done: loss 0.5304 - lr 0.0200000\n",
-      "2021-09-08 01:53:54,714 DEV : loss 0.9369557499885559 - score 0.0714\n",
-      "2021-09-08 01:53:54,715 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:51:47,683 epoch 1 - iter 13/130 - loss 0.34294790 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:51:48,448 epoch 1 - iter 26/130 - loss 0.56482434 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 21:51:49,204 epoch 1 - iter 39/130 - loss 0.57544339 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:51:49,987 epoch 1 - iter 52/130 - loss 0.57605702 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 21:51:50,707 epoch 1 - iter 65/130 - loss 0.50824043 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 21:51:51,452 epoch 1 - iter 78/130 - loss 0.50077724 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 21:51:52,235 epoch 1 - iter 91/130 - loss 0.50416269 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 21:51:53,003 epoch 1 - iter 104/130 - loss 0.49602172 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:51:53,694 epoch 1 - iter 117/130 - loss 0.48647267 - samples/sec: 18.85 - lr: 0.020000\n",
+      "2021-09-21 21:51:54,303 epoch 1 - iter 130/130 - loss 0.52257483 - samples/sec: 21.37 - lr: 0.020000\n",
+      "2021-09-21 21:51:54,305 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:51:54,305 EPOCH 1 done: loss 0.5226 - lr 0.0200000\n",
+      "2021-09-21 21:51:54,621 DEV : loss 0.8774684071540833 - score 0.1429\n",
+      "2021-09-21 21:51:54,622 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:53:58,967 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:53:59,879 epoch 2 - iter 13/130 - loss 0.83319832 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 01:54:00,605 epoch 2 - iter 26/130 - loss 0.68107237 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 01:54:01,454 epoch 2 - iter 39/130 - loss 0.70436053 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 01:54:02,294 epoch 2 - iter 52/130 - loss 0.63170937 - samples/sec: 15.49 - lr: 0.020000\n",
-      "2021-09-08 01:54:03,103 epoch 2 - iter 65/130 - loss 0.61944248 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:54:03,931 epoch 2 - iter 78/130 - loss 0.57815339 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 01:54:04,835 epoch 2 - iter 91/130 - loss 0.60341361 - samples/sec: 14.40 - lr: 0.020000\n",
-      "2021-09-08 01:54:05,714 epoch 2 - iter 104/130 - loss 0.61360724 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 01:54:06,589 epoch 2 - iter 117/130 - loss 0.59652112 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 01:54:07,443 epoch 2 - iter 130/130 - loss 0.61001689 - samples/sec: 15.23 - lr: 0.020000\n",
-      "2021-09-08 01:54:07,444 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:07,445 EPOCH 2 done: loss 0.6100 - lr 0.0200000\n",
-      "2021-09-08 01:54:08,087 DEV : loss 0.26250728964805603 - score 0.1429\n",
-      "2021-09-08 01:54:08,088 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:51:59,971 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:00,797 epoch 2 - iter 13/130 - loss 0.44856550 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 21:52:01,595 epoch 2 - iter 26/130 - loss 0.56472912 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 21:52:02,353 epoch 2 - iter 39/130 - loss 0.50383420 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:52:03,128 epoch 2 - iter 52/130 - loss 0.50759545 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 21:52:03,905 epoch 2 - iter 65/130 - loss 0.58865919 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 21:52:04,632 epoch 2 - iter 78/130 - loss 0.61711755 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:52:05,246 epoch 2 - iter 91/130 - loss 0.61819162 - samples/sec: 21.22 - lr: 0.020000\n",
+      "2021-09-21 21:52:05,837 epoch 2 - iter 104/130 - loss 0.60936269 - samples/sec: 22.03 - lr: 0.020000\n",
+      "2021-09-21 21:52:06,417 epoch 2 - iter 117/130 - loss 0.58906295 - samples/sec: 22.44 - lr: 0.020000\n",
+      "2021-09-21 21:52:07,000 epoch 2 - iter 130/130 - loss 0.59662496 - samples/sec: 22.33 - lr: 0.020000\n",
+      "2021-09-21 21:52:07,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:07,002 EPOCH 2 done: loss 0.5966 - lr 0.0200000\n",
+      "2021-09-21 21:52:07,319 DEV : loss 0.36961156129837036 - score 0.4286\n",
+      "2021-09-21 21:52:07,320 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:54:13,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:14,519 epoch 3 - iter 13/130 - loss 0.76826722 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 01:54:15,308 epoch 3 - iter 26/130 - loss 0.73988193 - samples/sec: 16.50 - lr: 0.020000\n",
-      "2021-09-08 01:54:16,106 epoch 3 - iter 39/130 - loss 0.71772902 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 01:54:16,912 epoch 3 - iter 52/130 - loss 0.70453699 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 01:54:17,713 epoch 3 - iter 65/130 - loss 0.69090397 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 01:54:18,556 epoch 3 - iter 78/130 - loss 0.69136371 - samples/sec: 15.45 - lr: 0.020000\n",
-      "2021-09-08 01:54:19,366 epoch 3 - iter 91/130 - loss 0.69281803 - samples/sec: 16.06 - lr: 0.020000\n",
-      "2021-09-08 01:54:20,189 epoch 3 - iter 104/130 - loss 0.69260296 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 01:54:20,988 epoch 3 - iter 117/130 - loss 0.68974229 - samples/sec: 16.30 - lr: 0.020000\n",
-      "2021-09-08 01:54:21,786 epoch 3 - iter 130/130 - loss 0.68447354 - samples/sec: 16.30 - lr: 0.020000\n",
-      "2021-09-08 01:54:21,787 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:21,788 EPOCH 3 done: loss 0.6845 - lr 0.0200000\n",
-      "2021-09-08 01:54:22,519 DEV : loss 0.4542766809463501 - score 0.0\n",
-      "2021-09-08 01:54:22,520 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:54:22,522 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:23,412 epoch 4 - iter 13/130 - loss 0.59622955 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:54:24,243 epoch 4 - iter 26/130 - loss 0.63212631 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 01:54:25,085 epoch 4 - iter 39/130 - loss 0.63495855 - samples/sec: 15.46 - lr: 0.020000\n",
-      "2021-09-08 01:54:25,854 epoch 4 - iter 52/130 - loss 0.63403367 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 01:54:26,658 epoch 4 - iter 65/130 - loss 0.63553110 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 01:54:27,451 epoch 4 - iter 78/130 - loss 0.64435921 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 01:54:28,261 epoch 4 - iter 91/130 - loss 0.64800922 - samples/sec: 16.08 - lr: 0.020000\n",
-      "2021-09-08 01:54:29,046 epoch 4 - iter 104/130 - loss 0.64395137 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 01:54:29,852 epoch 4 - iter 117/130 - loss 0.63968908 - samples/sec: 16.15 - lr: 0.020000\n",
-      "2021-09-08 01:54:30,676 epoch 4 - iter 130/130 - loss 0.64377654 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 01:54:30,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:30,677 EPOCH 4 done: loss 0.6438 - lr 0.0200000\n",
-      "2021-09-08 01:54:31,404 DEV : loss 0.412414014339447 - score 0.0\n",
-      "2021-09-08 01:54:31,405 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:54:31,407 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:32,278 epoch 5 - iter 13/130 - loss 0.65688561 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 01:54:33,094 epoch 5 - iter 26/130 - loss 0.64882181 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 01:54:33,906 epoch 5 - iter 39/130 - loss 0.66503746 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 01:54:34,668 epoch 5 - iter 52/130 - loss 0.65983117 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 01:54:35,487 epoch 5 - iter 65/130 - loss 0.65027077 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 01:54:36,326 epoch 5 - iter 78/130 - loss 0.65115151 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 01:54:37,145 epoch 5 - iter 91/130 - loss 0.65865346 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 01:54:37,929 epoch 5 - iter 104/130 - loss 0.65711614 - samples/sec: 16.62 - lr: 0.020000\n",
-      "2021-09-08 01:54:38,752 epoch 5 - iter 117/130 - loss 0.65588141 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 01:54:39,543 epoch 5 - iter 130/130 - loss 0.65995461 - samples/sec: 16.44 - lr: 0.020000\n",
-      "2021-09-08 01:54:39,545 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:39,545 EPOCH 5 done: loss 0.6600 - lr 0.0200000\n",
-      "2021-09-08 01:54:40,167 DEV : loss 0.4930815100669861 - score 0.0\n",
-      "2021-09-08 01:54:40,167 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:54:40,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:41,000 epoch 6 - iter 13/130 - loss 0.64921407 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 01:54:41,857 epoch 6 - iter 26/130 - loss 0.64659384 - samples/sec: 15.19 - lr: 0.020000\n",
-      "2021-09-08 01:54:42,673 epoch 6 - iter 39/130 - loss 0.65152175 - samples/sec: 15.94 - lr: 0.020000\n",
-      "2021-09-08 01:54:43,474 epoch 6 - iter 52/130 - loss 0.64375147 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 01:54:44,323 epoch 6 - iter 65/130 - loss 0.64517983 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 01:54:45,115 epoch 6 - iter 78/130 - loss 0.64222726 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 01:54:45,832 epoch 6 - iter 91/130 - loss 0.64832553 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 01:54:46,521 epoch 6 - iter 104/130 - loss 0.64697528 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 01:54:47,226 epoch 6 - iter 117/130 - loss 0.64522854 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 01:54:47,894 epoch 6 - iter 130/130 - loss 0.64771028 - samples/sec: 19.49 - lr: 0.020000\n"
+      "2021-09-21 21:52:11,298 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:12,109 epoch 3 - iter 13/130 - loss 0.55105000 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:52:12,917 epoch 3 - iter 26/130 - loss 0.61969934 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 21:52:13,680 epoch 3 - iter 39/130 - loss 0.64820016 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 21:52:14,418 epoch 3 - iter 52/130 - loss 0.52809819 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 21:52:15,122 epoch 3 - iter 65/130 - loss 0.52457289 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 21:52:15,888 epoch 3 - iter 78/130 - loss 0.60477521 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:52:16,633 epoch 3 - iter 91/130 - loss 0.60268446 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 21:52:17,363 epoch 3 - iter 104/130 - loss 0.56647757 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 21:52:18,103 epoch 3 - iter 117/130 - loss 0.55370249 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 21:52:18,885 epoch 3 - iter 130/130 - loss 0.53568162 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 21:52:18,887 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:18,888 EPOCH 3 done: loss 0.5357 - lr 0.0200000\n",
+      "2021-09-21 21:52:19,443 DEV : loss 0.5312498211860657 - score 0.5714\n",
+      "2021-09-21 21:52:19,444 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:52:23,030 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:23,857 epoch 4 - iter 13/130 - loss 0.31022312 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 21:52:24,573 epoch 4 - iter 26/130 - loss 0.36264437 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 21:52:25,351 epoch 4 - iter 39/130 - loss 0.40893870 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 21:52:26,141 epoch 4 - iter 52/130 - loss 0.46452305 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 21:52:26,871 epoch 4 - iter 65/130 - loss 0.43017819 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:52:27,661 epoch 4 - iter 78/130 - loss 0.44081512 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 21:52:28,391 epoch 4 - iter 91/130 - loss 0.43033770 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 21:52:29,120 epoch 4 - iter 104/130 - loss 0.42325729 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 21:52:29,877 epoch 4 - iter 117/130 - loss 0.40070702 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:52:30,650 epoch 4 - iter 130/130 - loss 0.41783963 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 21:52:30,651 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:30,652 EPOCH 4 done: loss 0.4178 - lr 0.0200000\n",
+      "2021-09-21 21:52:31,171 DEV : loss 0.8587847948074341 - score 0.4286\n",
+      "2021-09-21 21:52:31,172 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:52:31,174 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:31,960 epoch 5 - iter 13/130 - loss 0.44500467 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 21:52:32,705 epoch 5 - iter 26/130 - loss 0.34939085 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 21:52:33,456 epoch 5 - iter 39/130 - loss 0.27015275 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 21:52:34,217 epoch 5 - iter 52/130 - loss 0.27614060 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:52:34,921 epoch 5 - iter 65/130 - loss 0.33011139 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 21:52:35,684 epoch 5 - iter 78/130 - loss 0.37432607 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 21:52:36,417 epoch 5 - iter 91/130 - loss 0.34171975 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:52:37,179 epoch 5 - iter 104/130 - loss 0.31252040 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 21:52:37,939 epoch 5 - iter 117/130 - loss 0.31373158 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:52:38,700 epoch 5 - iter 130/130 - loss 0.34656515 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:52:38,702 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:38,702 EPOCH 5 done: loss 0.3466 - lr 0.0200000\n",
+      "2021-09-21 21:52:39,204 DEV : loss 0.4189074635505676 - score 0.4286\n",
+      "2021-09-21 21:52:39,205 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:52:39,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:40,030 epoch 6 - iter 13/130 - loss 0.18548258 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:52:40,729 epoch 6 - iter 26/130 - loss 0.09577605 - samples/sec: 18.62 - lr: 0.020000\n",
+      "2021-09-21 21:52:41,506 epoch 6 - iter 39/130 - loss 0.11277104 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 21:52:42,214 epoch 6 - iter 52/130 - loss 0.14120155 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 21:52:42,937 epoch 6 - iter 65/130 - loss 0.13839793 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 21:52:43,693 epoch 6 - iter 78/130 - loss 0.14242255 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 21:52:44,460 epoch 6 - iter 91/130 - loss 0.13899336 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 21:52:45,244 epoch 6 - iter 104/130 - loss 0.15308196 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 21:52:45,990 epoch 6 - iter 117/130 - loss 0.15625435 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 21:52:46,763 epoch 6 - iter 130/130 - loss 0.14766869 - samples/sec: 16.83 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:54:47,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:47,896 EPOCH 6 done: loss 0.6477 - lr 0.0200000\n",
-      "2021-09-08 01:54:48,239 DEV : loss 0.4723300337791443 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:54:48,240 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:54:48,242 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:48,968 epoch 7 - iter 13/130 - loss 0.64038821 - samples/sec: 18.84 - lr: 0.010000\n",
-      "2021-09-08 01:54:49,640 epoch 7 - iter 26/130 - loss 0.64599978 - samples/sec: 19.38 - lr: 0.010000\n",
-      "2021-09-08 01:54:50,341 epoch 7 - iter 39/130 - loss 0.64681309 - samples/sec: 18.55 - lr: 0.010000\n",
-      "2021-09-08 01:54:51,016 epoch 7 - iter 52/130 - loss 0.64116716 - samples/sec: 19.29 - lr: 0.010000\n",
-      "2021-09-08 01:54:51,665 epoch 7 - iter 65/130 - loss 0.64204327 - samples/sec: 20.05 - lr: 0.010000\n",
-      "2021-09-08 01:54:52,457 epoch 7 - iter 78/130 - loss 0.64301784 - samples/sec: 16.44 - lr: 0.010000\n",
-      "2021-09-08 01:54:53,233 epoch 7 - iter 91/130 - loss 0.64653176 - samples/sec: 16.78 - lr: 0.010000\n",
-      "2021-09-08 01:54:54,057 epoch 7 - iter 104/130 - loss 0.64648233 - samples/sec: 15.78 - lr: 0.010000\n",
-      "2021-09-08 01:54:54,848 epoch 7 - iter 117/130 - loss 0.64566464 - samples/sec: 16.46 - lr: 0.010000\n",
-      "2021-09-08 01:54:55,617 epoch 7 - iter 130/130 - loss 0.64673841 - samples/sec: 16.93 - lr: 0.010000\n",
-      "2021-09-08 01:54:55,618 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:55,618 EPOCH 7 done: loss 0.6467 - lr 0.0100000\n",
-      "2021-09-08 01:54:56,091 DEV : loss 0.4228614270687103 - score 0.0714\n",
-      "2021-09-08 01:54:56,092 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:54:56,187 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:54:56,924 epoch 8 - iter 13/130 - loss 0.62613513 - samples/sec: 18.28 - lr: 0.010000\n",
-      "2021-09-08 01:54:57,708 epoch 8 - iter 26/130 - loss 0.63329043 - samples/sec: 16.61 - lr: 0.010000\n",
-      "2021-09-08 01:54:58,526 epoch 8 - iter 39/130 - loss 0.64245955 - samples/sec: 15.92 - lr: 0.010000\n",
-      "2021-09-08 01:54:59,320 epoch 8 - iter 52/130 - loss 0.64565338 - samples/sec: 16.40 - lr: 0.010000\n",
-      "2021-09-08 01:55:00,131 epoch 8 - iter 65/130 - loss 0.64880211 - samples/sec: 16.03 - lr: 0.010000\n",
-      "2021-09-08 01:55:00,950 epoch 8 - iter 78/130 - loss 0.65021581 - samples/sec: 15.89 - lr: 0.010000\n",
-      "2021-09-08 01:55:01,831 epoch 8 - iter 91/130 - loss 0.64770995 - samples/sec: 14.77 - lr: 0.010000\n",
-      "2021-09-08 01:55:02,643 epoch 8 - iter 104/130 - loss 0.64817247 - samples/sec: 16.05 - lr: 0.010000\n",
-      "2021-09-08 01:55:03,556 epoch 8 - iter 117/130 - loss 0.64799221 - samples/sec: 14.26 - lr: 0.010000\n",
-      "2021-09-08 01:55:04,426 epoch 8 - iter 130/130 - loss 0.64572951 - samples/sec: 14.97 - lr: 0.010000\n",
-      "2021-09-08 01:55:04,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:04,428 EPOCH 8 done: loss 0.6457 - lr 0.0100000\n",
-      "2021-09-08 01:55:05,062 DEV : loss 0.439302921295166 - score 0.0\n",
-      "2021-09-08 01:55:05,063 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:55:05,065 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:06,005 epoch 9 - iter 13/130 - loss 0.64821663 - samples/sec: 15.10 - lr: 0.010000\n",
-      "2021-09-08 01:55:06,903 epoch 9 - iter 26/130 - loss 0.64413511 - samples/sec: 14.50 - lr: 0.010000\n",
-      "2021-09-08 01:55:07,697 epoch 9 - iter 39/130 - loss 0.63985371 - samples/sec: 16.39 - lr: 0.010000\n",
-      "2021-09-08 01:55:08,506 epoch 9 - iter 52/130 - loss 0.64221208 - samples/sec: 16.09 - lr: 0.010000\n",
-      "2021-09-08 01:55:09,425 epoch 9 - iter 65/130 - loss 0.64264104 - samples/sec: 14.14 - lr: 0.010000\n",
-      "2021-09-08 01:55:10,244 epoch 9 - iter 78/130 - loss 0.64529467 - samples/sec: 15.91 - lr: 0.010000\n",
-      "2021-09-08 01:55:11,103 epoch 9 - iter 91/130 - loss 0.64656940 - samples/sec: 15.15 - lr: 0.010000\n",
-      "2021-09-08 01:55:11,896 epoch 9 - iter 104/130 - loss 0.64632276 - samples/sec: 16.43 - lr: 0.010000\n",
-      "2021-09-08 01:55:12,629 epoch 9 - iter 117/130 - loss 0.64354387 - samples/sec: 17.76 - lr: 0.010000\n",
-      "2021-09-08 01:55:13,376 epoch 9 - iter 130/130 - loss 0.64456750 - samples/sec: 17.42 - lr: 0.010000\n",
-      "2021-09-08 01:55:13,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:13,378 EPOCH 9 done: loss 0.6446 - lr 0.0100000\n",
-      "2021-09-08 01:55:13,831 DEV : loss 0.4613458216190338 - score 0.0\n",
-      "2021-09-08 01:55:13,832 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:55:13,902 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:14,673 epoch 10 - iter 13/130 - loss 0.65381892 - samples/sec: 17.37 - lr: 0.010000\n",
-      "2021-09-08 01:55:15,446 epoch 10 - iter 26/130 - loss 0.64849163 - samples/sec: 16.85 - lr: 0.010000\n",
-      "2021-09-08 01:55:16,222 epoch 10 - iter 39/130 - loss 0.64265872 - samples/sec: 16.76 - lr: 0.010000\n",
-      "2021-09-08 01:55:17,104 epoch 10 - iter 52/130 - loss 0.64155843 - samples/sec: 14.77 - lr: 0.010000\n",
-      "2021-09-08 01:55:17,998 epoch 10 - iter 65/130 - loss 0.64294444 - samples/sec: 14.56 - lr: 0.010000\n",
-      "2021-09-08 01:55:18,809 epoch 10 - iter 78/130 - loss 0.64442576 - samples/sec: 16.07 - lr: 0.010000\n",
-      "2021-09-08 01:55:19,652 epoch 10 - iter 91/130 - loss 0.64537707 - samples/sec: 15.43 - lr: 0.010000\n",
-      "2021-09-08 01:55:20,473 epoch 10 - iter 104/130 - loss 0.64739941 - samples/sec: 15.86 - lr: 0.010000\n",
-      "2021-09-08 01:55:21,370 epoch 10 - iter 117/130 - loss 0.64626216 - samples/sec: 14.52 - lr: 0.010000\n",
-      "2021-09-08 01:55:22,205 epoch 10 - iter 130/130 - loss 0.64624265 - samples/sec: 15.58 - lr: 0.010000\n",
-      "2021-09-08 01:55:22,206 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:22,207 EPOCH 10 done: loss 0.6462 - lr 0.0100000\n",
-      "2021-09-08 01:55:23,037 DEV : loss 0.42555826902389526 - score 0.0714\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 01:55:23,038 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:55:27,179 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:27,180 Testing using best model ...\n",
-      "2021-09-08 01:55:27,183 loading file temp/best-model.pt\n",
+      "2021-09-21 21:52:46,764 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:46,765 EPOCH 6 done: loss 0.1477 - lr 0.0200000\n",
+      "2021-09-21 21:52:47,273 DEV : loss 0.5064550042152405 - score 0.6429\n",
+      "2021-09-21 21:52:47,273 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:52:51,169 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:51,748 epoch 7 - iter 13/130 - loss 0.00506172 - samples/sec: 23.15 - lr: 0.020000\n",
+      "2021-09-21 21:52:52,316 epoch 7 - iter 26/130 - loss 0.16879745 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 21:52:52,881 epoch 7 - iter 39/130 - loss 0.14608274 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 21:52:53,439 epoch 7 - iter 52/130 - loss 0.14880566 - samples/sec: 23.35 - lr: 0.020000\n",
+      "2021-09-21 21:52:54,020 epoch 7 - iter 65/130 - loss 0.18430014 - samples/sec: 22.41 - lr: 0.020000\n",
+      "2021-09-21 21:52:54,622 epoch 7 - iter 78/130 - loss 0.21084596 - samples/sec: 21.62 - lr: 0.020000\n",
+      "2021-09-21 21:52:55,187 epoch 7 - iter 91/130 - loss 0.18294913 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 21:52:55,740 epoch 7 - iter 104/130 - loss 0.17172976 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 21:52:56,307 epoch 7 - iter 117/130 - loss 0.21626556 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 21:52:56,864 epoch 7 - iter 130/130 - loss 0.21291071 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 21:52:56,865 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:56,865 EPOCH 7 done: loss 0.2129 - lr 0.0200000\n",
+      "2021-09-21 21:52:57,085 DEV : loss 0.2645382881164551 - score 0.4286\n",
+      "2021-09-21 21:52:57,085 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:52:57,087 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:52:57,683 epoch 8 - iter 13/130 - loss 0.08265044 - samples/sec: 22.39 - lr: 0.020000\n",
+      "2021-09-21 21:52:58,278 epoch 8 - iter 26/130 - loss 0.04581792 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 21:52:58,946 epoch 8 - iter 39/130 - loss 0.13182043 - samples/sec: 19.47 - lr: 0.020000\n",
+      "2021-09-21 21:52:59,726 epoch 8 - iter 52/130 - loss 0.18217212 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 21:53:00,498 epoch 8 - iter 65/130 - loss 0.19092547 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 21:53:01,240 epoch 8 - iter 78/130 - loss 0.24430823 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 21:53:01,994 epoch 8 - iter 91/130 - loss 0.21609039 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 21:53:02,788 epoch 8 - iter 104/130 - loss 0.23041547 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 21:53:03,573 epoch 8 - iter 117/130 - loss 0.22698162 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 21:53:04,358 epoch 8 - iter 130/130 - loss 0.22356260 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 21:53:04,360 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:04,360 EPOCH 8 done: loss 0.2236 - lr 0.0200000\n",
+      "2021-09-21 21:53:04,747 DEV : loss 0.29425638914108276 - score 0.6429\n",
+      "2021-09-21 21:53:04,749 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:53:08,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:09,207 epoch 9 - iter 13/130 - loss 0.12153100 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 21:53:09,975 epoch 9 - iter 26/130 - loss 0.17795578 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 21:53:10,741 epoch 9 - iter 39/130 - loss 0.20156588 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:53:11,509 epoch 9 - iter 52/130 - loss 0.19382884 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 21:53:12,231 epoch 9 - iter 65/130 - loss 0.16745240 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 21:53:13,003 epoch 9 - iter 78/130 - loss 0.15432852 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 21:53:13,762 epoch 9 - iter 91/130 - loss 0.13253847 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 21:53:14,517 epoch 9 - iter 104/130 - loss 0.14947038 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:53:15,251 epoch 9 - iter 117/130 - loss 0.14182414 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:53:16,032 epoch 9 - iter 130/130 - loss 0.14701749 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 21:53:16,033 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:16,034 EPOCH 9 done: loss 0.1470 - lr 0.0200000\n",
+      "2021-09-21 21:53:16,354 DEV : loss 0.4198043644428253 - score 0.6429\n",
+      "2021-09-21 21:53:16,355 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:53:16,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:16,948 epoch 10 - iter 13/130 - loss 0.24284759 - samples/sec: 22.57 - lr: 0.020000\n",
+      "2021-09-21 21:53:17,527 epoch 10 - iter 26/130 - loss 0.37449973 - samples/sec: 22.49 - lr: 0.020000\n",
+      "2021-09-21 21:53:18,086 epoch 10 - iter 39/130 - loss 0.25251312 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:53:18,651 epoch 10 - iter 52/130 - loss 0.20331658 - samples/sec: 23.07 - lr: 0.020000\n",
+      "2021-09-21 21:53:19,202 epoch 10 - iter 65/130 - loss 0.16632899 - samples/sec: 23.63 - lr: 0.020000\n",
+      "2021-09-21 21:53:19,759 epoch 10 - iter 78/130 - loss 0.17364063 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 21:53:20,346 epoch 10 - iter 91/130 - loss 0.15461125 - samples/sec: 22.19 - lr: 0.020000\n",
+      "2021-09-21 21:53:20,943 epoch 10 - iter 104/130 - loss 0.15088862 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 21:53:21,622 epoch 10 - iter 117/130 - loss 0.15218466 - samples/sec: 19.17 - lr: 0.020000\n",
+      "2021-09-21 21:53:22,365 epoch 10 - iter 130/130 - loss 0.15358702 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 21:53:22,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:22,367 EPOCH 10 done: loss 0.1536 - lr 0.0200000\n",
+      "2021-09-21 21:53:23,010 DEV : loss 0.394258588552475 - score 0.5\n",
+      "2021-09-21 21:53:23,010 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:53:27,688 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:27,689 Testing using best model ...\n",
+      "2021-09-21 21:53:27,690 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:55:33,248 \t0.0625\n",
-      "2021-09-08 01:55:33,249 \n",
+      "2021-09-21 21:53:34,773 \t0.625\n",
+      "2021-09-21 21:53:34,773 \n",
       "Results:\n",
-      "- F-score (micro) 0.0625\n",
-      "- F-score (macro) 0.0312\n",
-      "- Accuracy 0.0625\n",
+      "- F-score (micro) 0.625\n",
+      "- F-score (macro) 0.4687\n",
+      "- Accuracy 0.625\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
-      "       travel     0.0000    0.0000    0.0000         0\n",
-      "   technology     0.0000    0.0000    0.0000         1\n",
-      "     wellness     0.0000    0.0000    0.0000         0\n",
-      "        women     0.0000    0.0000    0.0000         1\n",
-      "      parents     0.0000    0.0000    0.0000         3\n",
-      "     business     0.0000    0.0000    0.0000         1\n",
-      "     weddings     0.0000    0.0000    0.0000         0\n",
-      "      fashion     0.0000    0.0000    0.0000         0\n",
+      "       travel     1.0000    1.0000    1.0000         2\n",
+      "   technology     0.0000    0.0000    0.0000         0\n",
+      "     wellness     1.0000    0.5000    0.6667         2\n",
+      "        women     0.0000    0.0000    0.0000         0\n",
+      "      parents     0.3333    1.0000    0.5000         1\n",
+      "     business     0.0000    0.0000    0.0000         0\n",
+      "     weddings     0.0000    0.0000    0.0000         1\n",
+      "      fashion     1.0000    1.0000    1.0000         1\n",
       "entertainment     0.0000    0.0000    0.0000         1\n",
-      "      science     1.0000    0.3333    0.5000         3\n",
-      "      divorce     0.0000    0.0000    0.0000         1\n",
-      "        crime     0.0000    0.0000    0.0000         0\n",
-      "     religion     0.0000    0.0000    0.0000         2\n",
-      "       sports     0.0000    0.0000    0.0000         1\n",
-      "     politics     0.0000    0.0000    0.0000         2\n",
-      "       comedy     0.0000    0.0000    0.0000         0\n",
+      "      science     1.0000    1.0000    1.0000         1\n",
+      "      divorce     0.0000    0.0000    0.0000         2\n",
+      "        crime     0.5000    1.0000    0.6667         1\n",
+      "     religion     1.0000    1.0000    1.0000         1\n",
+      "       sports     1.0000    1.0000    1.0000         1\n",
+      "     politics     0.5000    1.0000    0.6667         1\n",
+      "       comedy     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "    micro avg     0.0625    0.0625    0.0625        16\n",
-      "    macro avg     0.0625    0.0208    0.0312        16\n",
-      " weighted avg     0.1875    0.0625    0.0938        16\n",
-      "  samples avg     0.0625    0.0625    0.0625        16\n",
+      "    micro avg     0.6250    0.6250    0.6250        16\n",
+      "    macro avg     0.4583    0.5312    0.4687        16\n",
+      " weighted avg     0.5833    0.6250    0.5729        16\n",
+      "  samples avg     0.6250    0.6250    0.6250        16\n",
       "\n",
-      "2021-09-08 01:55:33,249 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:50,429 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 21:53:34,774 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:48,009 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:55:54,971 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:53:52,284 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 76853.46it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 79964.53it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:55:54,975 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
-      "2021-09-08 01:55:54,989 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:54,991 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:53:52,287 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
+      "2021-09-21 21:53:52,304 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:52,306 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -960,25 +961,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:55:54,992 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:54,992 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 01:55:54,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:54,993 Parameters:\n",
-      "2021-09-08 01:55:54,993  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:55:54,993  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:55:54,994  - patience: \"3\"\n",
-      "2021-09-08 01:55:54,994  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:55:54,994  - max_epochs: \"10\"\n",
-      "2021-09-08 01:55:54,995  - shuffle: \"True\"\n",
-      "2021-09-08 01:55:54,995  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:55:54,995  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:55:54,995 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:54,996 Model training base path: \"temp\"\n",
-      "2021-09-08 01:55:54,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:54,996 Device: cuda:0\n",
-      "2021-09-08 01:55:54,997 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:55:54,997 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:55:55,003 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:53:52,307 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:52,307 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 21:53:52,307 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:52,308 Parameters:\n",
+      "2021-09-21 21:53:52,308  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:53:52,308  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:53:52,309  - patience: \"3\"\n",
+      "2021-09-21 21:53:52,309  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:53:52,309  - max_epochs: \"10\"\n",
+      "2021-09-21 21:53:52,309  - shuffle: \"True\"\n",
+      "2021-09-21 21:53:52,310  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:53:52,310  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:53:52,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:52,311 Model training base path: \"temp\"\n",
+      "2021-09-21 21:53:52,311 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:52,311 Device: cuda:0\n",
+      "2021-09-21 21:53:52,311 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:52,312 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:53:52,325 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -992,221 +993,221 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:55:55,917 epoch 1 - iter 13/130 - loss 0.38150090 - samples/sec: 14.98 - lr: 0.020000\n",
-      "2021-09-08 01:55:56,820 epoch 1 - iter 26/130 - loss 0.46058325 - samples/sec: 14.40 - lr: 0.020000\n",
-      "2021-09-08 01:55:57,714 epoch 1 - iter 39/130 - loss 0.47753961 - samples/sec: 14.56 - lr: 0.020000\n",
-      "2021-09-08 01:55:58,575 epoch 1 - iter 52/130 - loss 0.53185610 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:55:59,457 epoch 1 - iter 65/130 - loss 0.56261491 - samples/sec: 14.76 - lr: 0.020000\n",
-      "2021-09-08 01:56:00,311 epoch 1 - iter 78/130 - loss 0.51825787 - samples/sec: 15.24 - lr: 0.020000\n",
-      "2021-09-08 01:56:01,173 epoch 1 - iter 91/130 - loss 0.50962885 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:56:02,040 epoch 1 - iter 104/130 - loss 0.52625475 - samples/sec: 15.00 - lr: 0.020000\n",
-      "2021-09-08 01:56:02,914 epoch 1 - iter 117/130 - loss 0.51940327 - samples/sec: 14.90 - lr: 0.020000\n",
-      "2021-09-08 01:56:03,795 epoch 1 - iter 130/130 - loss 0.50717702 - samples/sec: 14.77 - lr: 0.020000\n",
-      "2021-09-08 01:56:03,796 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:03,797 EPOCH 1 done: loss 0.5072 - lr 0.0200000\n",
-      "2021-09-08 01:56:04,401 DEV : loss 0.5178500413894653 - score 0.2857\n",
-      "2021-09-08 01:56:04,402 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:53:53,145 epoch 1 - iter 13/130 - loss 0.24628818 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 21:53:53,896 epoch 1 - iter 26/130 - loss 0.36281720 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 21:53:54,667 epoch 1 - iter 39/130 - loss 0.40988124 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 21:53:55,297 epoch 1 - iter 52/130 - loss 0.49261875 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 21:53:55,881 epoch 1 - iter 65/130 - loss 0.53742265 - samples/sec: 22.26 - lr: 0.020000\n",
+      "2021-09-21 21:53:56,445 epoch 1 - iter 78/130 - loss 0.56314224 - samples/sec: 23.10 - lr: 0.020000\n",
+      "2021-09-21 21:53:57,011 epoch 1 - iter 91/130 - loss 0.58228900 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 21:53:57,566 epoch 1 - iter 104/130 - loss 0.58918620 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:53:58,122 epoch 1 - iter 117/130 - loss 0.59855063 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 21:53:58,675 epoch 1 - iter 130/130 - loss 0.60628960 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 21:53:58,677 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:53:58,677 EPOCH 1 done: loss 0.6063 - lr 0.0200000\n",
+      "2021-09-21 21:53:59,122 DEV : loss 0.6516722440719604 - score 0.0714\n",
+      "2021-09-21 21:53:59,122 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:56:08,369 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:09,364 epoch 2 - iter 13/130 - loss 0.53458196 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 01:56:10,196 epoch 2 - iter 26/130 - loss 0.47810452 - samples/sec: 15.64 - lr: 0.020000\n",
-      "2021-09-08 01:56:11,071 epoch 2 - iter 39/130 - loss 0.45312227 - samples/sec: 14.86 - lr: 0.020000\n",
-      "2021-09-08 01:56:11,966 epoch 2 - iter 52/130 - loss 0.53594224 - samples/sec: 14.54 - lr: 0.020000\n",
-      "2021-09-08 01:56:12,827 epoch 2 - iter 65/130 - loss 0.52328119 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:56:13,727 epoch 2 - iter 78/130 - loss 0.55265977 - samples/sec: 14.47 - lr: 0.020000\n",
-      "2021-09-08 01:56:14,610 epoch 2 - iter 91/130 - loss 0.50546175 - samples/sec: 14.73 - lr: 0.020000\n",
-      "2021-09-08 01:56:15,433 epoch 2 - iter 104/130 - loss 0.54062106 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 01:56:16,271 epoch 2 - iter 117/130 - loss 0.54157401 - samples/sec: 15.53 - lr: 0.020000\n",
-      "2021-09-08 01:56:17,144 epoch 2 - iter 130/130 - loss 0.55140843 - samples/sec: 14.90 - lr: 0.020000\n",
-      "2021-09-08 01:56:17,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:17,146 EPOCH 2 done: loss 0.5514 - lr 0.0200000\n",
-      "2021-09-08 01:56:17,836 DEV : loss 0.27413880825042725 - score 0.5\n",
-      "2021-09-08 01:56:17,837 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:54:02,764 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:03,530 epoch 2 - iter 13/130 - loss 0.64543771 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 21:54:04,205 epoch 2 - iter 26/130 - loss 0.65489988 - samples/sec: 19.28 - lr: 0.020000\n",
+      "2021-09-21 21:54:04,898 epoch 2 - iter 39/130 - loss 0.65323253 - samples/sec: 18.80 - lr: 0.020000\n",
+      "2021-09-21 21:54:05,483 epoch 2 - iter 52/130 - loss 0.64862326 - samples/sec: 22.26 - lr: 0.020000\n",
+      "2021-09-21 21:54:06,039 epoch 2 - iter 65/130 - loss 0.65002908 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 21:54:06,592 epoch 2 - iter 78/130 - loss 0.65176258 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 21:54:07,147 epoch 2 - iter 91/130 - loss 0.65265971 - samples/sec: 23.48 - lr: 0.020000\n",
+      "2021-09-21 21:54:07,724 epoch 2 - iter 104/130 - loss 0.64999876 - samples/sec: 22.57 - lr: 0.020000\n",
+      "2021-09-21 21:54:08,285 epoch 2 - iter 117/130 - loss 0.64895622 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 21:54:08,937 epoch 2 - iter 130/130 - loss 0.64791225 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 21:54:08,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:08,939 EPOCH 2 done: loss 0.6479 - lr 0.0200000\n",
+      "2021-09-21 21:54:09,495 DEV : loss 0.3890973925590515 - score 0.0\n",
+      "2021-09-21 21:54:09,495 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:54:09,497 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:10,301 epoch 3 - iter 13/130 - loss 0.67350821 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:54:11,033 epoch 3 - iter 26/130 - loss 0.65502633 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 21:54:11,751 epoch 3 - iter 39/130 - loss 0.65493635 - samples/sec: 18.12 - lr: 0.020000\n",
+      "2021-09-21 21:54:12,511 epoch 3 - iter 52/130 - loss 0.65800391 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 21:54:13,258 epoch 3 - iter 65/130 - loss 0.65359510 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 21:54:13,995 epoch 3 - iter 78/130 - loss 0.65176397 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 21:54:14,742 epoch 3 - iter 91/130 - loss 0.65168586 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:54:15,469 epoch 3 - iter 104/130 - loss 0.65000332 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 21:54:16,197 epoch 3 - iter 117/130 - loss 0.65043368 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:54:16,949 epoch 3 - iter 130/130 - loss 0.65230198 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 21:54:16,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:16,951 EPOCH 3 done: loss 0.6523 - lr 0.0200000\n",
+      "2021-09-21 21:54:17,472 DEV : loss 0.42122089862823486 - score 0.0\n",
+      "2021-09-21 21:54:17,473 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:54:17,474 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:18,236 epoch 4 - iter 13/130 - loss 0.63999520 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 21:54:18,992 epoch 4 - iter 26/130 - loss 0.64884588 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 21:54:19,721 epoch 4 - iter 39/130 - loss 0.64696637 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 21:54:20,477 epoch 4 - iter 52/130 - loss 0.65102197 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:54:21,216 epoch 4 - iter 65/130 - loss 0.65200561 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:54:21,953 epoch 4 - iter 78/130 - loss 0.64841426 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 21:54:22,653 epoch 4 - iter 91/130 - loss 0.65240951 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 21:54:23,363 epoch 4 - iter 104/130 - loss 0.65279223 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 21:54:24,088 epoch 4 - iter 117/130 - loss 0.65049012 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 21:54:24,844 epoch 4 - iter 130/130 - loss 0.64886713 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:54:24,845 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:24,845 EPOCH 4 done: loss 0.6489 - lr 0.0200000\n",
+      "2021-09-21 21:54:25,294 DEV : loss 0.4986637532711029 - score 0.0714\n",
+      "2021-09-21 21:54:25,296 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:56:22,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:23,027 epoch 3 - iter 13/130 - loss 0.36650944 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 01:56:23,857 epoch 3 - iter 26/130 - loss 0.34507127 - samples/sec: 15.68 - lr: 0.020000\n",
-      "2021-09-08 01:56:24,701 epoch 3 - iter 39/130 - loss 0.35422811 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 01:56:25,641 epoch 3 - iter 52/130 - loss 0.42982121 - samples/sec: 13.85 - lr: 0.020000\n",
-      "2021-09-08 01:56:26,490 epoch 3 - iter 65/130 - loss 0.39938655 - samples/sec: 15.32 - lr: 0.020000\n",
-      "2021-09-08 01:56:27,370 epoch 3 - iter 78/130 - loss 0.39692565 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 01:56:28,203 epoch 3 - iter 91/130 - loss 0.41199515 - samples/sec: 15.63 - lr: 0.020000\n",
-      "2021-09-08 01:56:29,063 epoch 3 - iter 104/130 - loss 0.40903569 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 01:56:29,919 epoch 3 - iter 117/130 - loss 0.39724761 - samples/sec: 15.20 - lr: 0.020000\n",
-      "2021-09-08 01:56:30,791 epoch 3 - iter 130/130 - loss 0.40590027 - samples/sec: 14.93 - lr: 0.020000\n",
-      "2021-09-08 01:56:30,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:30,793 EPOCH 3 done: loss 0.4059 - lr 0.0200000\n",
-      "2021-09-08 01:56:31,345 DEV : loss 0.3536321520805359 - score 0.3571\n",
-      "2021-09-08 01:56:31,346 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:56:31,347 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:32,252 epoch 4 - iter 13/130 - loss 0.51348665 - samples/sec: 15.16 - lr: 0.020000\n",
-      "2021-09-08 01:56:33,111 epoch 4 - iter 26/130 - loss 0.36977512 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 01:56:33,994 epoch 4 - iter 39/130 - loss 0.39726575 - samples/sec: 14.74 - lr: 0.020000\n",
-      "2021-09-08 01:56:34,813 epoch 4 - iter 52/130 - loss 0.40041727 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 01:56:35,536 epoch 4 - iter 65/130 - loss 0.37016790 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 01:56:36,240 epoch 4 - iter 78/130 - loss 0.40136242 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 01:56:36,941 epoch 4 - iter 91/130 - loss 0.40839876 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 01:56:37,683 epoch 4 - iter 104/130 - loss 0.36976575 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 01:56:38,353 epoch 4 - iter 117/130 - loss 0.35481621 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 01:56:39,031 epoch 4 - iter 130/130 - loss 0.34272142 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 01:56:39,032 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:39,033 EPOCH 4 done: loss 0.3427 - lr 0.0200000\n",
-      "2021-09-08 01:56:39,422 DEV : loss 0.5439569354057312 - score 0.3571\n",
-      "2021-09-08 01:56:39,423 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:56:39,426 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:40,125 epoch 5 - iter 13/130 - loss 0.37191463 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 01:56:40,803 epoch 5 - iter 26/130 - loss 0.43339222 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 01:56:41,642 epoch 5 - iter 39/130 - loss 0.37378697 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 01:56:42,555 epoch 5 - iter 52/130 - loss 0.32749113 - samples/sec: 14.26 - lr: 0.020000\n",
-      "2021-09-08 01:56:43,396 epoch 5 - iter 65/130 - loss 0.30403671 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 01:56:44,317 epoch 5 - iter 78/130 - loss 0.26547909 - samples/sec: 14.13 - lr: 0.020000\n",
-      "2021-09-08 01:56:45,202 epoch 5 - iter 91/130 - loss 0.24448041 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 01:56:45,990 epoch 5 - iter 104/130 - loss 0.28957849 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 01:56:46,766 epoch 5 - iter 117/130 - loss 0.30145985 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 01:56:47,481 epoch 5 - iter 130/130 - loss 0.29728445 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 01:56:47,483 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:47,483 EPOCH 5 done: loss 0.2973 - lr 0.0200000\n",
-      "2021-09-08 01:56:48,028 DEV : loss 0.34933143854141235 - score 0.5\n",
-      "2021-09-08 01:56:48,029 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:56:48,103 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:48,858 epoch 6 - iter 13/130 - loss 0.23903912 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 01:56:49,744 epoch 6 - iter 26/130 - loss 0.35732137 - samples/sec: 14.69 - lr: 0.020000\n",
-      "2021-09-08 01:56:50,664 epoch 6 - iter 39/130 - loss 0.28790771 - samples/sec: 14.13 - lr: 0.020000\n",
-      "2021-09-08 01:56:51,503 epoch 6 - iter 52/130 - loss 0.27726789 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 01:56:52,352 epoch 6 - iter 65/130 - loss 0.24944913 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 01:56:53,189 epoch 6 - iter 78/130 - loss 0.21218734 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:56:54,023 epoch 6 - iter 91/130 - loss 0.25380534 - samples/sec: 15.60 - lr: 0.020000\n",
-      "2021-09-08 01:56:54,851 epoch 6 - iter 104/130 - loss 0.24772583 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 01:56:55,713 epoch 6 - iter 117/130 - loss 0.26118122 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 01:56:56,446 epoch 6 - iter 130/130 - loss 0.26803965 - samples/sec: 17.78 - lr: 0.020000\n"
+      "2021-09-21 21:54:29,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:30,176 epoch 5 - iter 13/130 - loss 0.65675879 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 21:54:30,911 epoch 5 - iter 26/130 - loss 0.64608885 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:54:31,645 epoch 5 - iter 39/130 - loss 0.63574619 - samples/sec: 17.72 - lr: 0.020000\n",
+      "2021-09-21 21:54:32,379 epoch 5 - iter 52/130 - loss 0.63232711 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:54:33,125 epoch 5 - iter 65/130 - loss 0.63440924 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 21:54:33,842 epoch 5 - iter 78/130 - loss 0.63769307 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 21:54:34,571 epoch 5 - iter 91/130 - loss 0.64523412 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 21:54:35,307 epoch 5 - iter 104/130 - loss 0.64404965 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 21:54:36,067 epoch 5 - iter 117/130 - loss 0.64693005 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 21:54:36,819 epoch 5 - iter 130/130 - loss 0.65244149 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 21:54:36,820 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:36,821 EPOCH 5 done: loss 0.6524 - lr 0.0200000\n",
+      "2021-09-21 21:54:37,368 DEV : loss 0.5821923613548279 - score 0.0714\n",
+      "2021-09-21 21:54:37,371 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:54:37,373 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:38,185 epoch 6 - iter 13/130 - loss 0.63939520 - samples/sec: 18.29 - lr: 0.020000\n",
+      "2021-09-21 21:54:38,939 epoch 6 - iter 26/130 - loss 0.65247038 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 21:54:39,659 epoch 6 - iter 39/130 - loss 0.65062942 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 21:54:40,401 epoch 6 - iter 52/130 - loss 0.65594174 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 21:54:41,157 epoch 6 - iter 65/130 - loss 0.65594446 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 21:54:41,853 epoch 6 - iter 78/130 - loss 0.66243070 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 21:54:42,612 epoch 6 - iter 91/130 - loss 0.66064149 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 21:54:43,346 epoch 6 - iter 104/130 - loss 0.65704942 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 21:54:44,014 epoch 6 - iter 117/130 - loss 0.65613501 - samples/sec: 19.50 - lr: 0.020000\n",
+      "2021-09-21 21:54:44,591 epoch 6 - iter 130/130 - loss 0.65661331 - samples/sec: 22.58 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:56:56,447 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:56,448 EPOCH 6 done: loss 0.2680 - lr 0.0200000\n",
-      "2021-09-08 01:56:56,996 DEV : loss 0.5489241480827332 - score 0.5\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 01:56:56,997 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 01:56:57,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:56:57,792 epoch 7 - iter 13/130 - loss 0.15454581 - samples/sec: 18.86 - lr: 0.010000\n",
-      "2021-09-08 01:56:58,563 epoch 7 - iter 26/130 - loss 0.25327361 - samples/sec: 16.88 - lr: 0.010000\n",
-      "2021-09-08 01:56:59,259 epoch 7 - iter 39/130 - loss 0.18495838 - samples/sec: 18.70 - lr: 0.010000\n",
-      "2021-09-08 01:56:59,985 epoch 7 - iter 52/130 - loss 0.24167900 - samples/sec: 17.93 - lr: 0.010000\n",
-      "2021-09-08 01:57:00,734 epoch 7 - iter 65/130 - loss 0.20099509 - samples/sec: 17.39 - lr: 0.010000\n",
-      "2021-09-08 01:57:01,602 epoch 7 - iter 78/130 - loss 0.18474355 - samples/sec: 15.00 - lr: 0.010000\n",
-      "2021-09-08 01:57:02,512 epoch 7 - iter 91/130 - loss 0.22380145 - samples/sec: 14.31 - lr: 0.010000\n",
-      "2021-09-08 01:57:03,353 epoch 7 - iter 104/130 - loss 0.22731564 - samples/sec: 15.48 - lr: 0.010000\n",
-      "2021-09-08 01:57:04,194 epoch 7 - iter 117/130 - loss 0.21097839 - samples/sec: 15.48 - lr: 0.010000\n",
-      "2021-09-08 01:57:05,016 epoch 7 - iter 130/130 - loss 0.21771137 - samples/sec: 15.83 - lr: 0.010000\n",
-      "2021-09-08 01:57:05,017 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:05,018 EPOCH 7 done: loss 0.2177 - lr 0.0100000\n",
-      "2021-09-08 01:57:05,688 DEV : loss 0.2649989128112793 - score 0.5\n",
-      "2021-09-08 01:57:05,690 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:54:44,592 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:44,592 EPOCH 6 done: loss 0.6566 - lr 0.0200000\n",
+      "2021-09-21 21:54:44,915 DEV : loss 0.4960748255252838 - score 0.0\n",
+      "2021-09-21 21:54:44,916 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:54:44,993 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:45,581 epoch 7 - iter 13/130 - loss 0.66060771 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 21:54:46,164 epoch 7 - iter 26/130 - loss 0.65895698 - samples/sec: 22.32 - lr: 0.020000\n",
+      "2021-09-21 21:54:46,728 epoch 7 - iter 39/130 - loss 0.65365759 - samples/sec: 23.10 - lr: 0.020000\n",
+      "2021-09-21 21:54:47,283 epoch 7 - iter 52/130 - loss 0.65030328 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 21:54:47,835 epoch 7 - iter 65/130 - loss 0.64898109 - samples/sec: 23.60 - lr: 0.020000\n",
+      "2021-09-21 21:54:48,406 epoch 7 - iter 78/130 - loss 0.64696283 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 21:54:48,997 epoch 7 - iter 91/130 - loss 0.64471270 - samples/sec: 22.06 - lr: 0.020000\n",
+      "2021-09-21 21:54:49,550 epoch 7 - iter 104/130 - loss 0.64528274 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 21:54:50,101 epoch 7 - iter 117/130 - loss 0.64706837 - samples/sec: 23.63 - lr: 0.020000\n",
+      "2021-09-21 21:54:50,660 epoch 7 - iter 130/130 - loss 0.64674404 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 21:54:50,661 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:50,662 EPOCH 7 done: loss 0.6467 - lr 0.0200000\n",
+      "2021-09-21 21:54:50,888 DEV : loss 0.4350014328956604 - score 0.0\n",
+      "2021-09-21 21:54:50,889 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:54:50,891 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:51,459 epoch 8 - iter 13/130 - loss 0.64317918 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 21:54:52,028 epoch 8 - iter 26/130 - loss 0.64319580 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 21:54:52,633 epoch 8 - iter 39/130 - loss 0.63680315 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 21:54:53,365 epoch 8 - iter 52/130 - loss 0.64096765 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 21:54:54,126 epoch 8 - iter 65/130 - loss 0.63307284 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 21:54:54,865 epoch 8 - iter 78/130 - loss 0.63431691 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:54:55,626 epoch 8 - iter 91/130 - loss 0.63287073 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 21:54:56,323 epoch 8 - iter 104/130 - loss 0.63638417 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 21:54:57,079 epoch 8 - iter 117/130 - loss 0.63698249 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:54:57,875 epoch 8 - iter 130/130 - loss 0.63797310 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 21:54:57,876 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:57,876 EPOCH 8 done: loss 0.6380 - lr 0.0200000\n",
+      "2021-09-21 21:54:58,250 DEV : loss 0.425857275724411 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:54:58,250 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:54:58,252 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:54:59,042 epoch 9 - iter 13/130 - loss 0.64764197 - samples/sec: 17.22 - lr: 0.010000\n",
+      "2021-09-21 21:54:59,804 epoch 9 - iter 26/130 - loss 0.64813234 - samples/sec: 17.09 - lr: 0.010000\n",
+      "2021-09-21 21:55:00,547 epoch 9 - iter 39/130 - loss 0.64708229 - samples/sec: 17.51 - lr: 0.010000\n",
+      "2021-09-21 21:55:01,299 epoch 9 - iter 52/130 - loss 0.65411572 - samples/sec: 17.31 - lr: 0.010000\n",
+      "2021-09-21 21:55:01,998 epoch 9 - iter 65/130 - loss 0.65184345 - samples/sec: 18.61 - lr: 0.010000\n",
+      "2021-09-21 21:55:02,759 epoch 9 - iter 78/130 - loss 0.64936636 - samples/sec: 17.12 - lr: 0.010000\n",
+      "2021-09-21 21:55:03,523 epoch 9 - iter 91/130 - loss 0.64680084 - samples/sec: 17.02 - lr: 0.010000\n",
+      "2021-09-21 21:55:04,271 epoch 9 - iter 104/130 - loss 0.64236670 - samples/sec: 17.41 - lr: 0.010000\n",
+      "2021-09-21 21:55:04,978 epoch 9 - iter 117/130 - loss 0.64378388 - samples/sec: 18.41 - lr: 0.010000\n",
+      "2021-09-21 21:55:05,739 epoch 9 - iter 130/130 - loss 0.64388642 - samples/sec: 17.10 - lr: 0.010000\n",
+      "2021-09-21 21:55:05,741 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:05,741 EPOCH 9 done: loss 0.6439 - lr 0.0100000\n",
+      "2021-09-21 21:55:06,293 DEV : loss 0.4575541317462921 - score 0.0\n",
+      "2021-09-21 21:55:06,294 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:55:06,295 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:07,104 epoch 10 - iter 13/130 - loss 0.62990179 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 21:55:07,807 epoch 10 - iter 26/130 - loss 0.63852710 - samples/sec: 18.53 - lr: 0.010000\n",
+      "2021-09-21 21:55:08,568 epoch 10 - iter 39/130 - loss 0.63249245 - samples/sec: 17.09 - lr: 0.010000\n",
+      "2021-09-21 21:55:09,350 epoch 10 - iter 52/130 - loss 0.63737696 - samples/sec: 16.66 - lr: 0.010000\n",
+      "2021-09-21 21:55:10,124 epoch 10 - iter 65/130 - loss 0.63784414 - samples/sec: 16.81 - lr: 0.010000\n",
+      "2021-09-21 21:55:10,848 epoch 10 - iter 78/130 - loss 0.64032805 - samples/sec: 17.98 - lr: 0.010000\n",
+      "2021-09-21 21:55:11,622 epoch 10 - iter 91/130 - loss 0.64261580 - samples/sec: 16.80 - lr: 0.010000\n",
+      "2021-09-21 21:55:12,274 epoch 10 - iter 104/130 - loss 0.63991546 - samples/sec: 19.97 - lr: 0.010000\n",
+      "2021-09-21 21:55:12,859 epoch 10 - iter 117/130 - loss 0.64061865 - samples/sec: 22.26 - lr: 0.010000\n",
+      "2021-09-21 21:55:13,412 epoch 10 - iter 130/130 - loss 0.64136735 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 21:55:13,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:13,414 EPOCH 10 done: loss 0.6414 - lr 0.0100000\n",
+      "2021-09-21 21:55:13,639 DEV : loss 0.4205753207206726 - score 0.0714\n",
+      "2021-09-21 21:55:13,640 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:57:09,534 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:10,438 epoch 8 - iter 13/130 - loss 0.00369553 - samples/sec: 16.35 - lr: 0.010000\n",
-      "2021-09-08 01:57:11,263 epoch 8 - iter 26/130 - loss 0.07031792 - samples/sec: 15.79 - lr: 0.010000\n",
-      "2021-09-08 01:57:12,066 epoch 8 - iter 39/130 - loss 0.13088456 - samples/sec: 16.21 - lr: 0.010000\n",
-      "2021-09-08 01:57:12,909 epoch 8 - iter 52/130 - loss 0.16678084 - samples/sec: 15.43 - lr: 0.010000\n",
-      "2021-09-08 01:57:13,683 epoch 8 - iter 65/130 - loss 0.17133584 - samples/sec: 16.82 - lr: 0.010000\n",
-      "2021-09-08 01:57:14,487 epoch 8 - iter 78/130 - loss 0.17134617 - samples/sec: 16.20 - lr: 0.010000\n",
-      "2021-09-08 01:57:15,342 epoch 8 - iter 91/130 - loss 0.14883411 - samples/sec: 15.22 - lr: 0.010000\n",
-      "2021-09-08 01:57:16,154 epoch 8 - iter 104/130 - loss 0.13737327 - samples/sec: 16.02 - lr: 0.010000\n",
-      "2021-09-08 01:57:16,911 epoch 8 - iter 117/130 - loss 0.12633596 - samples/sec: 17.20 - lr: 0.010000\n",
-      "2021-09-08 01:57:17,715 epoch 8 - iter 130/130 - loss 0.12623994 - samples/sec: 16.20 - lr: 0.010000\n",
-      "2021-09-08 01:57:17,716 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:17,716 EPOCH 8 done: loss 0.1262 - lr 0.0100000\n",
-      "2021-09-08 01:57:18,520 DEV : loss 0.33416613936424255 - score 0.5\n",
-      "2021-09-08 01:57:18,522 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:57:18,524 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:19,414 epoch 9 - iter 13/130 - loss 0.14181622 - samples/sec: 15.29 - lr: 0.010000\n",
-      "2021-09-08 01:57:20,224 epoch 9 - iter 26/130 - loss 0.08226552 - samples/sec: 16.06 - lr: 0.010000\n",
-      "2021-09-08 01:57:21,036 epoch 9 - iter 39/130 - loss 0.05588183 - samples/sec: 16.03 - lr: 0.010000\n",
-      "2021-09-08 01:57:21,818 epoch 9 - iter 52/130 - loss 0.13677907 - samples/sec: 16.65 - lr: 0.010000\n",
-      "2021-09-08 01:57:22,685 epoch 9 - iter 65/130 - loss 0.13243359 - samples/sec: 15.01 - lr: 0.010000\n",
-      "2021-09-08 01:57:23,553 epoch 9 - iter 78/130 - loss 0.11062878 - samples/sec: 14.99 - lr: 0.010000\n",
-      "2021-09-08 01:57:24,275 epoch 9 - iter 91/130 - loss 0.09496962 - samples/sec: 18.03 - lr: 0.010000\n",
-      "2021-09-08 01:57:24,962 epoch 9 - iter 104/130 - loss 0.09385377 - samples/sec: 18.96 - lr: 0.010000\n",
-      "2021-09-08 01:57:25,637 epoch 9 - iter 117/130 - loss 0.08357531 - samples/sec: 19.27 - lr: 0.010000\n",
-      "2021-09-08 01:57:26,312 epoch 9 - iter 130/130 - loss 0.09007005 - samples/sec: 19.31 - lr: 0.010000\n",
-      "2021-09-08 01:57:26,313 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:26,314 EPOCH 9 done: loss 0.0901 - lr 0.0100000\n",
-      "2021-09-08 01:57:26,844 DEV : loss 0.31117913126945496 - score 0.5\n",
-      "2021-09-08 01:57:26,846 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 01:57:26,976 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:27,702 epoch 10 - iter 13/130 - loss 0.07845821 - samples/sec: 19.91 - lr: 0.010000\n",
-      "2021-09-08 01:57:28,374 epoch 10 - iter 26/130 - loss 0.04101148 - samples/sec: 19.36 - lr: 0.010000\n",
-      "2021-09-08 01:57:29,047 epoch 10 - iter 39/130 - loss 0.02783589 - samples/sec: 19.48 - lr: 0.010000\n",
-      "2021-09-08 01:57:29,718 epoch 10 - iter 52/130 - loss 0.02161513 - samples/sec: 19.40 - lr: 0.010000\n",
-      "2021-09-08 01:57:30,370 epoch 10 - iter 65/130 - loss 0.01760254 - samples/sec: 19.96 - lr: 0.010000\n",
-      "2021-09-08 01:57:31,040 epoch 10 - iter 78/130 - loss 0.02521965 - samples/sec: 19.43 - lr: 0.010000\n",
-      "2021-09-08 01:57:31,721 epoch 10 - iter 91/130 - loss 0.03436447 - samples/sec: 19.12 - lr: 0.010000\n",
-      "2021-09-08 01:57:32,399 epoch 10 - iter 104/130 - loss 0.03025948 - samples/sec: 19.19 - lr: 0.010000\n",
-      "2021-09-08 01:57:33,056 epoch 10 - iter 117/130 - loss 0.02700145 - samples/sec: 19.81 - lr: 0.010000\n",
-      "2021-09-08 01:57:33,722 epoch 10 - iter 130/130 - loss 0.02437859 - samples/sec: 19.55 - lr: 0.010000\n",
-      "2021-09-08 01:57:33,723 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:33,724 EPOCH 10 done: loss 0.0244 - lr 0.0100000\n",
-      "2021-09-08 01:57:34,129 DEV : loss 0.42395052313804626 - score 0.5\n",
-      "2021-09-08 01:57:34,130 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 01:57:37,797 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:57:37,798 Testing using best model ...\n",
-      "2021-09-08 01:57:37,799 loading file temp/best-model.pt\n",
+      "2021-09-21 21:55:25,313 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:25,314 Testing using best model ...\n",
+      "2021-09-21 21:55:25,315 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:57:43,827 \t0.5625\n",
-      "2021-09-08 01:57:43,828 \n",
+      "2021-09-21 21:55:32,615 \t0.0625\n",
+      "2021-09-21 21:55:32,615 \n",
       "Results:\n",
-      "- F-score (micro) 0.5625\n",
-      "- F-score (macro) 0.3833\n",
-      "- Accuracy 0.5625\n",
+      "- F-score (micro) 0.0625\n",
+      "- F-score (macro) 0.0625\n",
+      "- Accuracy 0.0625\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
-      "       travel     1.0000    0.6667    0.8000         3\n",
-      "   technology     0.0000    0.0000    0.0000         2\n",
-      "     wellness     1.0000    1.0000    1.0000         1\n",
-      "        women     0.0000    0.0000    0.0000         1\n",
-      "      parents     0.0000    0.0000    0.0000         0\n",
+      "       travel     0.0000    0.0000    0.0000         1\n",
+      "   technology     1.0000    1.0000    1.0000         1\n",
+      "     wellness     0.0000    0.0000    0.0000         1\n",
+      "        women     0.0000    0.0000    0.0000         0\n",
+      "      parents     0.0000    0.0000    0.0000         2\n",
       "     business     0.0000    0.0000    0.0000         1\n",
       "     weddings     0.0000    0.0000    0.0000         1\n",
-      "      fashion     1.0000    1.0000    1.0000         1\n",
-      "entertainment     0.5000    1.0000    0.6667         1\n",
-      "      science     1.0000    1.0000    1.0000         1\n",
+      "      fashion     0.0000    0.0000    0.0000         0\n",
+      "entertainment     0.0000    0.0000    0.0000         2\n",
+      "      science     0.0000    0.0000    0.0000         3\n",
       "      divorce     0.0000    0.0000    0.0000         0\n",
-      "        crime     0.0000    0.0000    0.0000         0\n",
-      "     religion     0.0000    0.0000    0.0000         1\n",
-      "       sports     1.0000    1.0000    1.0000         2\n",
-      "     politics     0.5000    1.0000    0.6667         1\n",
-      "       comedy     0.0000    0.0000    0.0000         0\n",
+      "        crime     0.0000    0.0000    0.0000         1\n",
+      "     religion     0.0000    0.0000    0.0000         0\n",
+      "       sports     0.0000    0.0000    0.0000         2\n",
+      "     politics     0.0000    0.0000    0.0000         0\n",
+      "       comedy     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "    micro avg     0.5625    0.5625    0.5625        16\n",
-      "    macro avg     0.3750    0.4167    0.3833        16\n",
-      " weighted avg     0.5625    0.5625    0.5458        16\n",
-      "  samples avg     0.5625    0.5625    0.5625        16\n",
+      "    micro avg     0.0625    0.0625    0.0625        16\n",
+      "    macro avg     0.0625    0.0625    0.0625        16\n",
+      " weighted avg     0.0625    0.0625    0.0625        16\n",
+      "  samples avg     0.0625    0.0625    0.0625        16\n",
       "\n",
-      "2021-09-08 01:57:43,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:04,544 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 21:55:32,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:46,002 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 01:58:09,135 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:55:50,186 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 62163.07it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 76766.75it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:09,140 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
-      "2021-09-08 01:58:09,162 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:09,164 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:55:50,190 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
+      "2021-09-21 21:55:50,198 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:50,200 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1519,25 +1520,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:09,165 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:09,165 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 01:58:09,165 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:09,166 Parameters:\n",
-      "2021-09-08 01:58:09,166  - learning_rate: \"0.02\"\n",
-      "2021-09-08 01:58:09,166  - mini_batch_size: \"1\"\n",
-      "2021-09-08 01:58:09,167  - patience: \"3\"\n",
-      "2021-09-08 01:58:09,167  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 01:58:09,167  - max_epochs: \"10\"\n",
-      "2021-09-08 01:58:09,168  - shuffle: \"True\"\n",
-      "2021-09-08 01:58:09,168  - train_with_dev: \"False\"\n",
-      "2021-09-08 01:58:09,168  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 01:58:09,169 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:09,169 Model training base path: \"temp\"\n",
-      "2021-09-08 01:58:09,169 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:09,170 Device: cuda:0\n",
-      "2021-09-08 01:58:09,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:09,170 Embeddings storage mode: cpu\n",
-      "2021-09-08 01:58:09,175 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 21:55:50,201 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:50,201 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 21:55:50,201 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:50,202 Parameters:\n",
+      "2021-09-21 21:55:50,202  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:55:50,202  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:55:50,203  - patience: \"3\"\n",
+      "2021-09-21 21:55:50,203  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:55:50,203  - max_epochs: \"10\"\n",
+      "2021-09-21 21:55:50,203  - shuffle: \"True\"\n",
+      "2021-09-21 21:55:50,204  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:55:50,204  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:55:50,204 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:50,205 Model training base path: \"temp\"\n",
+      "2021-09-21 21:55:50,205 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:50,205 Device: cuda:0\n",
+      "2021-09-21 21:55:50,205 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:50,206 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:55:50,212 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -1551,223 +1552,221 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:58:10,042 epoch 1 - iter 13/130 - loss 0.47223170 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 01:58:11,781 epoch 1 - iter 26/130 - loss 0.46791237 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 01:58:12,768 epoch 1 - iter 39/130 - loss 0.52898462 - samples/sec: 13.18 - lr: 0.020000\n",
-      "2021-09-08 01:58:13,744 epoch 1 - iter 52/130 - loss 0.52284584 - samples/sec: 13.34 - lr: 0.020000\n",
-      "2021-09-08 01:58:14,620 epoch 1 - iter 65/130 - loss 0.47994511 - samples/sec: 14.86 - lr: 0.020000\n",
-      "2021-09-08 01:58:15,437 epoch 1 - iter 78/130 - loss 0.50649056 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 01:58:16,277 epoch 1 - iter 91/130 - loss 0.52483222 - samples/sec: 15.49 - lr: 0.020000\n",
-      "2021-09-08 01:58:17,139 epoch 1 - iter 104/130 - loss 0.51633034 - samples/sec: 15.09 - lr: 0.020000\n",
-      "2021-09-08 01:58:17,898 epoch 1 - iter 117/130 - loss 0.47784472 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 01:58:18,735 epoch 1 - iter 130/130 - loss 0.48904128 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 01:58:18,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:18,737 EPOCH 1 done: loss 0.4890 - lr 0.0200000\n",
-      "2021-09-08 01:58:19,388 DEV : loss 0.5790066719055176 - score 0.1429\n",
-      "2021-09-08 01:58:19,389 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:58:23,364 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:24,092 epoch 2 - iter 13/130 - loss 0.42781853 - samples/sec: 18.41 - lr: 0.020000\n",
-      "2021-09-08 01:58:24,801 epoch 2 - iter 26/130 - loss 0.47161743 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 01:58:25,521 epoch 2 - iter 39/130 - loss 0.49119389 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 01:58:26,290 epoch 2 - iter 52/130 - loss 0.52562166 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 01:58:27,037 epoch 2 - iter 65/130 - loss 0.52071308 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 01:58:27,775 epoch 2 - iter 78/130 - loss 0.52963366 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 01:58:28,529 epoch 2 - iter 91/130 - loss 0.51237993 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 01:58:29,413 epoch 2 - iter 104/130 - loss 0.52132732 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 01:58:30,222 epoch 2 - iter 117/130 - loss 0.52112492 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 01:58:31,077 epoch 2 - iter 130/130 - loss 0.51788234 - samples/sec: 15.21 - lr: 0.020000\n",
-      "2021-09-08 01:58:31,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:31,079 EPOCH 2 done: loss 0.5179 - lr 0.0200000\n",
-      "2021-09-08 01:58:31,983 DEV : loss 0.5900394320487976 - score 0.5\n",
-      "2021-09-08 01:58:31,984 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:55:50,989 epoch 1 - iter 13/130 - loss 0.27619992 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 21:55:51,806 epoch 1 - iter 26/130 - loss 0.46878192 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 21:55:52,555 epoch 1 - iter 39/130 - loss 0.50576793 - samples/sec: 17.38 - lr: 0.020000\n",
+      "2021-09-21 21:55:53,350 epoch 1 - iter 52/130 - loss 0.54749182 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 21:55:54,119 epoch 1 - iter 65/130 - loss 0.57403176 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 21:55:54,928 epoch 1 - iter 78/130 - loss 0.57940387 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 21:55:55,747 epoch 1 - iter 91/130 - loss 0.57857387 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 21:55:56,473 epoch 1 - iter 104/130 - loss 0.57722400 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 21:55:57,243 epoch 1 - iter 117/130 - loss 0.55922275 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 21:55:58,018 epoch 1 - iter 130/130 - loss 0.58231052 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 21:55:58,019 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:55:58,019 EPOCH 1 done: loss 0.5823 - lr 0.0200000\n",
+      "2021-09-21 21:55:58,564 DEV : loss 0.3864644467830658 - score 0.5\n",
+      "2021-09-21 21:55:58,565 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:58:39,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:40,171 epoch 3 - iter 13/130 - loss 0.28015912 - samples/sec: 15.00 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,041 epoch 3 - iter 26/130 - loss 0.38238546 - samples/sec: 14.96 - lr: 0.020000\n",
-      "2021-09-08 01:58:41,895 epoch 3 - iter 39/130 - loss 0.35878980 - samples/sec: 15.24 - lr: 0.020000\n",
-      "2021-09-08 01:58:42,738 epoch 3 - iter 52/130 - loss 0.41835927 - samples/sec: 15.43 - lr: 0.020000\n",
-      "2021-09-08 01:58:43,545 epoch 3 - iter 65/130 - loss 0.40666729 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 01:58:44,301 epoch 3 - iter 78/130 - loss 0.41612541 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 01:58:45,017 epoch 3 - iter 91/130 - loss 0.39616349 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 01:58:45,736 epoch 3 - iter 104/130 - loss 0.42040358 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 01:58:46,459 epoch 3 - iter 117/130 - loss 0.42806543 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 01:58:47,179 epoch 3 - iter 130/130 - loss 0.41611176 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 01:58:47,180 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:47,181 EPOCH 3 done: loss 0.4161 - lr 0.0200000\n",
-      "2021-09-08 01:58:47,667 DEV : loss 0.2645077109336853 - score 0.5\n",
-      "2021-09-08 01:58:47,668 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:56:04,634 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:05,244 epoch 2 - iter 13/130 - loss 0.52630924 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 21:56:06,028 epoch 2 - iter 26/130 - loss 0.51962152 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 21:56:06,786 epoch 2 - iter 39/130 - loss 0.49086292 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 21:56:07,574 epoch 2 - iter 52/130 - loss 0.44562695 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 21:56:08,360 epoch 2 - iter 65/130 - loss 0.43143241 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:56:09,105 epoch 2 - iter 78/130 - loss 0.48112257 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 21:56:09,888 epoch 2 - iter 91/130 - loss 0.51303722 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 21:56:10,627 epoch 2 - iter 104/130 - loss 0.51863900 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 21:56:11,421 epoch 2 - iter 117/130 - loss 0.50389600 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 21:56:12,206 epoch 2 - iter 130/130 - loss 0.49206937 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 21:56:12,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:12,208 EPOCH 2 done: loss 0.4921 - lr 0.0200000\n",
+      "2021-09-21 21:56:12,575 DEV : loss 0.33002591133117676 - score 0.4286\n",
+      "2021-09-21 21:56:12,576 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:56:12,578 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:13,217 epoch 3 - iter 13/130 - loss 0.47124341 - samples/sec: 20.92 - lr: 0.020000\n",
+      "2021-09-21 21:56:13,800 epoch 3 - iter 26/130 - loss 0.47352017 - samples/sec: 22.33 - lr: 0.020000\n",
+      "2021-09-21 21:56:14,390 epoch 3 - iter 39/130 - loss 0.49863545 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 21:56:14,964 epoch 3 - iter 52/130 - loss 0.44986732 - samples/sec: 22.68 - lr: 0.020000\n",
+      "2021-09-21 21:56:15,536 epoch 3 - iter 65/130 - loss 0.39555000 - samples/sec: 22.76 - lr: 0.020000\n",
+      "2021-09-21 21:56:16,110 epoch 3 - iter 78/130 - loss 0.40065393 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 21:56:16,810 epoch 3 - iter 91/130 - loss 0.35914229 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 21:56:17,572 epoch 3 - iter 104/130 - loss 0.36296571 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 21:56:18,340 epoch 3 - iter 117/130 - loss 0.38375539 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 21:56:19,121 epoch 3 - iter 130/130 - loss 0.41646949 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 21:56:19,122 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:19,122 EPOCH 3 done: loss 0.4165 - lr 0.0200000\n",
+      "2021-09-21 21:56:19,703 DEV : loss 0.296977698802948 - score 0.6429\n",
+      "2021-09-21 21:56:19,703 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:58:51,489 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:52,396 epoch 4 - iter 13/130 - loss 0.40644522 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 01:58:53,259 epoch 4 - iter 26/130 - loss 0.31644501 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 01:58:54,165 epoch 4 - iter 39/130 - loss 0.28753173 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 01:58:54,974 epoch 4 - iter 52/130 - loss 0.30633431 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:58:55,825 epoch 4 - iter 65/130 - loss 0.27644892 - samples/sec: 15.28 - lr: 0.020000\n",
-      "2021-09-08 01:58:56,618 epoch 4 - iter 78/130 - loss 0.30064175 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:58:57,411 epoch 4 - iter 91/130 - loss 0.35652739 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 01:58:58,243 epoch 4 - iter 104/130 - loss 0.34091727 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,065 epoch 4 - iter 117/130 - loss 0.36094642 - samples/sec: 15.82 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,924 epoch 4 - iter 130/130 - loss 0.35037730 - samples/sec: 15.14 - lr: 0.020000\n",
-      "2021-09-08 01:58:59,926 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:58:59,926 EPOCH 4 done: loss 0.3504 - lr 0.0200000\n",
-      "2021-09-08 01:59:00,625 DEV : loss 0.4651038646697998 - score 0.5714\n",
-      "2021-09-08 01:59:00,626 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:56:23,630 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:24,443 epoch 4 - iter 13/130 - loss 0.43642710 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 21:56:25,198 epoch 4 - iter 26/130 - loss 0.42834395 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 21:56:25,951 epoch 4 - iter 39/130 - loss 0.33290219 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 21:56:26,737 epoch 4 - iter 52/130 - loss 0.35496326 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 21:56:27,506 epoch 4 - iter 65/130 - loss 0.34118172 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 21:56:28,256 epoch 4 - iter 78/130 - loss 0.35422843 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 21:56:29,017 epoch 4 - iter 91/130 - loss 0.34200724 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 21:56:29,733 epoch 4 - iter 104/130 - loss 0.30614290 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 21:56:30,348 epoch 4 - iter 117/130 - loss 0.31547152 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 21:56:30,914 epoch 4 - iter 130/130 - loss 0.29922431 - samples/sec: 22.99 - lr: 0.020000\n",
+      "2021-09-21 21:56:30,916 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:30,916 EPOCH 4 done: loss 0.2992 - lr 0.0200000\n",
+      "2021-09-21 21:56:31,239 DEV : loss 0.5536136031150818 - score 0.6429\n",
+      "2021-09-21 21:56:31,240 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:56:31,318 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:31,901 epoch 5 - iter 13/130 - loss 0.14376446 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 21:56:32,475 epoch 5 - iter 26/130 - loss 0.14134295 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 21:56:33,042 epoch 5 - iter 39/130 - loss 0.17025624 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 21:56:33,613 epoch 5 - iter 52/130 - loss 0.23221743 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 21:56:34,390 epoch 5 - iter 65/130 - loss 0.32648297 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 21:56:35,116 epoch 5 - iter 78/130 - loss 0.32009999 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 21:56:35,874 epoch 5 - iter 91/130 - loss 0.33223812 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 21:56:36,623 epoch 5 - iter 104/130 - loss 0.33153932 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:56:37,363 epoch 5 - iter 117/130 - loss 0.32571181 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 21:56:38,121 epoch 5 - iter 130/130 - loss 0.31808908 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 21:56:38,123 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:38,123 EPOCH 5 done: loss 0.3181 - lr 0.0200000\n",
+      "2021-09-21 21:56:38,762 DEV : loss 0.3951915204524994 - score 0.7857\n",
+      "2021-09-21 21:56:38,763 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:59:07,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:08,747 epoch 5 - iter 13/130 - loss 0.46650173 - samples/sec: 14.77 - lr: 0.020000\n",
-      "2021-09-08 01:59:09,631 epoch 5 - iter 26/130 - loss 0.26891917 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 01:59:10,518 epoch 5 - iter 39/130 - loss 0.30803023 - samples/sec: 14.67 - lr: 0.020000\n",
-      "2021-09-08 01:59:11,304 epoch 5 - iter 52/130 - loss 0.28004577 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 01:59:12,058 epoch 5 - iter 65/130 - loss 0.25841200 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 01:59:12,871 epoch 5 - iter 78/130 - loss 0.32978316 - samples/sec: 16.01 - lr: 0.020000\n",
-      "2021-09-08 01:59:13,645 epoch 5 - iter 91/130 - loss 0.31816671 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 01:59:14,437 epoch 5 - iter 104/130 - loss 0.35033273 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 01:59:15,326 epoch 5 - iter 117/130 - loss 0.34167069 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 01:59:16,201 epoch 5 - iter 130/130 - loss 0.32512566 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 01:59:16,202 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:16,203 EPOCH 5 done: loss 0.3251 - lr 0.0200000\n",
-      "2021-09-08 01:59:16,972 DEV : loss 0.3155031204223633 - score 0.5714\n",
-      "2021-09-08 01:59:16,975 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 01:59:21,129 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:22,139 epoch 6 - iter 13/130 - loss 0.39447320 - samples/sec: 13.79 - lr: 0.020000\n",
-      "2021-09-08 01:59:23,008 epoch 6 - iter 26/130 - loss 0.36426835 - samples/sec: 14.99 - lr: 0.020000\n",
-      "2021-09-08 01:59:23,950 epoch 6 - iter 39/130 - loss 0.34483340 - samples/sec: 13.81 - lr: 0.020000\n",
-      "2021-09-08 01:59:24,867 epoch 6 - iter 52/130 - loss 0.30251599 - samples/sec: 14.19 - lr: 0.020000\n",
-      "2021-09-08 01:59:25,801 epoch 6 - iter 65/130 - loss 0.27676949 - samples/sec: 13.92 - lr: 0.020000\n",
-      "2021-09-08 01:59:26,730 epoch 6 - iter 78/130 - loss 0.29500805 - samples/sec: 14.00 - lr: 0.020000\n",
-      "2021-09-08 01:59:27,636 epoch 6 - iter 91/130 - loss 0.28277542 - samples/sec: 14.36 - lr: 0.020000\n",
-      "2021-09-08 01:59:28,493 epoch 6 - iter 104/130 - loss 0.28154935 - samples/sec: 15.19 - lr: 0.020000\n",
-      "2021-09-08 01:59:29,288 epoch 6 - iter 117/130 - loss 0.29695270 - samples/sec: 16.37 - lr: 0.020000\n"
+      "2021-09-21 21:56:42,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:43,292 epoch 6 - iter 13/130 - loss 0.11201750 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 21:56:44,050 epoch 6 - iter 26/130 - loss 0.06515095 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 21:56:44,787 epoch 6 - iter 39/130 - loss 0.16358710 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 21:56:45,533 epoch 6 - iter 52/130 - loss 0.20938636 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 21:56:46,303 epoch 6 - iter 65/130 - loss 0.22137399 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 21:56:46,926 epoch 6 - iter 78/130 - loss 0.21011490 - samples/sec: 20.89 - lr: 0.020000\n",
+      "2021-09-21 21:56:47,500 epoch 6 - iter 91/130 - loss 0.22998739 - samples/sec: 22.69 - lr: 0.020000\n",
+      "2021-09-21 21:56:48,068 epoch 6 - iter 104/130 - loss 0.23817614 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 21:56:48,633 epoch 6 - iter 117/130 - loss 0.22642934 - samples/sec: 23.04 - lr: 0.020000\n",
+      "2021-09-21 21:56:49,194 epoch 6 - iter 130/130 - loss 0.22350237 - samples/sec: 23.23 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 01:59:30,074 epoch 6 - iter 130/130 - loss 0.29795202 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 01:59:30,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:30,076 EPOCH 6 done: loss 0.2980 - lr 0.0200000\n",
-      "2021-09-08 01:59:30,554 DEV : loss 0.3705907464027405 - score 0.4286\n",
-      "2021-09-08 01:59:30,555 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:59:30,628 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:31,427 epoch 7 - iter 13/130 - loss 0.32424576 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 01:59:32,224 epoch 7 - iter 26/130 - loss 0.23441085 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 01:59:32,995 epoch 7 - iter 39/130 - loss 0.32927566 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:59:33,760 epoch 7 - iter 52/130 - loss 0.26100020 - samples/sec: 17.01 - lr: 0.020000\n",
-      "2021-09-08 01:59:34,544 epoch 7 - iter 65/130 - loss 0.33704582 - samples/sec: 16.59 - lr: 0.020000\n",
-      "2021-09-08 01:59:35,329 epoch 7 - iter 78/130 - loss 0.31821041 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 01:59:36,091 epoch 7 - iter 91/130 - loss 0.31424534 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 01:59:36,855 epoch 7 - iter 104/130 - loss 0.29048952 - samples/sec: 17.02 - lr: 0.020000\n",
-      "2021-09-08 01:59:37,679 epoch 7 - iter 117/130 - loss 0.28097452 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 01:59:38,562 epoch 7 - iter 130/130 - loss 0.27122753 - samples/sec: 14.72 - lr: 0.020000\n",
-      "2021-09-08 01:59:38,564 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:38,564 EPOCH 7 done: loss 0.2712 - lr 0.0200000\n",
-      "2021-09-08 01:59:39,147 DEV : loss 0.301655113697052 - score 0.6429\n",
-      "2021-09-08 01:59:39,148 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:56:49,195 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:49,195 EPOCH 6 done: loss 0.2235 - lr 0.0200000\n",
+      "2021-09-21 21:56:49,542 DEV : loss 0.5244264006614685 - score 0.6429\n",
+      "2021-09-21 21:56:49,543 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:56:49,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:50,169 epoch 7 - iter 13/130 - loss 0.08960725 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 21:56:50,730 epoch 7 - iter 26/130 - loss 0.11843437 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 21:56:51,324 epoch 7 - iter 39/130 - loss 0.15088588 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 21:56:51,886 epoch 7 - iter 52/130 - loss 0.12094817 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 21:56:52,441 epoch 7 - iter 65/130 - loss 0.10708219 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 21:56:52,998 epoch 7 - iter 78/130 - loss 0.14632350 - samples/sec: 23.35 - lr: 0.020000\n",
+      "2021-09-21 21:56:53,554 epoch 7 - iter 91/130 - loss 0.15541822 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 21:56:54,108 epoch 7 - iter 104/130 - loss 0.13780906 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:56:54,669 epoch 7 - iter 117/130 - loss 0.13614239 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 21:56:55,248 epoch 7 - iter 130/130 - loss 0.12608254 - samples/sec: 22.48 - lr: 0.020000\n",
+      "2021-09-21 21:56:55,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:55,250 EPOCH 7 done: loss 0.1261 - lr 0.0200000\n",
+      "2021-09-21 21:56:55,770 DEV : loss 0.5020474195480347 - score 0.2143\n",
+      "2021-09-21 21:56:55,771 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:56:55,773 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:56:56,570 epoch 8 - iter 13/130 - loss 0.01446790 - samples/sec: 17.69 - lr: 0.020000\n",
+      "2021-09-21 21:56:57,358 epoch 8 - iter 26/130 - loss 0.15558701 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 21:56:58,114 epoch 8 - iter 39/130 - loss 0.19037508 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 21:56:58,849 epoch 8 - iter 52/130 - loss 0.21524668 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 21:56:59,590 epoch 8 - iter 65/130 - loss 0.21348175 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 21:57:00,366 epoch 8 - iter 78/130 - loss 0.21423107 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 21:57:01,144 epoch 8 - iter 91/130 - loss 0.18672531 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 21:57:01,913 epoch 8 - iter 104/130 - loss 0.16589483 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 21:57:02,635 epoch 8 - iter 117/130 - loss 0.16746970 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 21:57:03,391 epoch 8 - iter 130/130 - loss 0.20282560 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 21:57:03,392 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:03,393 EPOCH 8 done: loss 0.2028 - lr 0.0200000\n",
+      "2021-09-21 21:57:03,956 DEV : loss 0.3395189344882965 - score 0.6429\n",
+      "2021-09-21 21:57:03,957 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:57:03,959 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:04,720 epoch 9 - iter 13/130 - loss 0.02272726 - samples/sec: 17.96 - lr: 0.020000\n",
+      "2021-09-21 21:57:05,451 epoch 9 - iter 26/130 - loss 0.01619947 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 21:57:06,213 epoch 9 - iter 39/130 - loss 0.01575415 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 21:57:06,983 epoch 9 - iter 52/130 - loss 0.05239984 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 21:57:07,752 epoch 9 - iter 65/130 - loss 0.04753308 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:57:08,466 epoch 9 - iter 78/130 - loss 0.06874864 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 21:57:09,233 epoch 9 - iter 91/130 - loss 0.08443972 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 21:57:09,979 epoch 9 - iter 104/130 - loss 0.09637389 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 21:57:10,740 epoch 9 - iter 117/130 - loss 0.08584700 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 21:57:11,474 epoch 9 - iter 130/130 - loss 0.08082538 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 21:57:11,476 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:11,476 EPOCH 9 done: loss 0.0808 - lr 0.0200000\n",
+      "2021-09-21 21:57:11,969 DEV : loss 0.22154539823532104 - score 0.7857\n",
+      "2021-09-21 21:57:11,972 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 01:59:43,219 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:44,177 epoch 8 - iter 13/130 - loss 0.27341075 - samples/sec: 14.45 - lr: 0.020000\n",
-      "2021-09-08 01:59:45,093 epoch 8 - iter 26/130 - loss 0.17936635 - samples/sec: 14.20 - lr: 0.020000\n",
-      "2021-09-08 01:59:46,010 epoch 8 - iter 39/130 - loss 0.17202129 - samples/sec: 14.20 - lr: 0.020000\n",
-      "2021-09-08 01:59:46,799 epoch 8 - iter 52/130 - loss 0.12948365 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 01:59:47,619 epoch 8 - iter 65/130 - loss 0.18304417 - samples/sec: 15.88 - lr: 0.020000\n",
-      "2021-09-08 01:59:48,364 epoch 8 - iter 78/130 - loss 0.19111961 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 01:59:49,135 epoch 8 - iter 91/130 - loss 0.18390473 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 01:59:49,948 epoch 8 - iter 104/130 - loss 0.19011535 - samples/sec: 16.00 - lr: 0.020000\n",
-      "2021-09-08 01:59:50,785 epoch 8 - iter 117/130 - loss 0.18259621 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 01:59:51,607 epoch 8 - iter 130/130 - loss 0.19664998 - samples/sec: 15.83 - lr: 0.020000\n",
-      "2021-09-08 01:59:51,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:51,609 EPOCH 8 done: loss 0.1966 - lr 0.0200000\n",
-      "2021-09-08 01:59:52,467 DEV : loss 0.3359539210796356 - score 0.6429\n",
-      "2021-09-08 01:59:52,470 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 01:59:52,548 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 01:59:53,421 epoch 9 - iter 13/130 - loss 0.00858567 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 01:59:54,230 epoch 9 - iter 26/130 - loss 0.08883274 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 01:59:55,034 epoch 9 - iter 39/130 - loss 0.12418614 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 01:59:55,850 epoch 9 - iter 52/130 - loss 0.12395502 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 01:59:56,679 epoch 9 - iter 65/130 - loss 0.10579151 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 01:59:57,420 epoch 9 - iter 78/130 - loss 0.13158788 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 01:59:58,237 epoch 9 - iter 91/130 - loss 0.13668555 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 01:59:58,987 epoch 9 - iter 104/130 - loss 0.12100186 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 01:59:59,777 epoch 9 - iter 117/130 - loss 0.10929191 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 02:00:00,618 epoch 9 - iter 130/130 - loss 0.13627629 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 02:00:00,619 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:00,620 EPOCH 9 done: loss 0.1363 - lr 0.0200000\n",
-      "2021-09-08 02:00:01,253 DEV : loss 0.38016214966773987 - score 0.6429\n",
-      "2021-09-08 02:00:01,254 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:00:01,275 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:02,098 epoch 10 - iter 13/130 - loss 0.00305245 - samples/sec: 16.64 - lr: 0.020000\n",
-      "2021-09-08 02:00:02,907 epoch 10 - iter 26/130 - loss 0.09613927 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 02:00:03,710 epoch 10 - iter 39/130 - loss 0.09964079 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 02:00:04,511 epoch 10 - iter 52/130 - loss 0.07520215 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 02:00:05,313 epoch 10 - iter 65/130 - loss 0.07385261 - samples/sec: 16.23 - lr: 0.020000\n",
-      "2021-09-08 02:00:06,137 epoch 10 - iter 78/130 - loss 0.08904016 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 02:00:06,933 epoch 10 - iter 91/130 - loss 0.11266125 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 02:00:07,728 epoch 10 - iter 104/130 - loss 0.09913345 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 02:00:08,505 epoch 10 - iter 117/130 - loss 0.09030018 - samples/sec: 16.75 - lr: 0.020000\n",
-      "2021-09-08 02:00:09,277 epoch 10 - iter 130/130 - loss 0.08133894 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 02:00:09,278 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:09,279 EPOCH 10 done: loss 0.0813 - lr 0.0200000\n",
-      "2021-09-08 02:00:09,965 DEV : loss 0.4268724024295807 - score 0.6429\n",
-      "2021-09-08 02:00:09,967 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:00:18,660 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:18,661 Testing using best model ...\n",
-      "2021-09-08 02:00:18,663 loading file temp/best-model.pt\n",
+      "2021-09-21 21:57:16,107 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:16,679 epoch 10 - iter 13/130 - loss 0.05242831 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 21:57:17,234 epoch 10 - iter 26/130 - loss 0.11023076 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 21:57:17,784 epoch 10 - iter 39/130 - loss 0.10247857 - samples/sec: 23.67 - lr: 0.020000\n",
+      "2021-09-21 21:57:18,345 epoch 10 - iter 52/130 - loss 0.07969378 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 21:57:18,946 epoch 10 - iter 65/130 - loss 0.09205530 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 21:57:19,700 epoch 10 - iter 78/130 - loss 0.12658877 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 21:57:20,453 epoch 10 - iter 91/130 - loss 0.13320767 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 21:57:21,194 epoch 10 - iter 104/130 - loss 0.14753991 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:57:21,953 epoch 10 - iter 117/130 - loss 0.14647148 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 21:57:22,648 epoch 10 - iter 130/130 - loss 0.14069406 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 21:57:22,649 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:22,649 EPOCH 10 done: loss 0.1407 - lr 0.0200000\n",
+      "2021-09-21 21:57:23,097 DEV : loss 0.3281102180480957 - score 0.7857\n",
+      "2021-09-21 21:57:23,100 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:57:27,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:27,655 Testing using best model ...\n",
+      "2021-09-21 21:57:27,679 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:00:24,644 \t0.625\n",
-      "2021-09-08 02:00:24,645 \n",
+      "2021-09-21 21:57:35,094 \t0.4375\n",
+      "2021-09-21 21:57:35,095 \n",
       "Results:\n",
-      "- F-score (micro) 0.625\n",
-      "- F-score (macro) 0.4271\n",
-      "- Accuracy 0.625\n",
+      "- F-score (micro) 0.4375\n",
+      "- F-score (macro) 0.2958\n",
+      "- Accuracy 0.4375\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
-      "       travel     1.0000    1.0000    1.0000         2\n",
-      "   technology     0.0000    0.0000    0.0000         2\n",
+      "       travel     1.0000    1.0000    1.0000         1\n",
+      "   technology     1.0000    1.0000    1.0000         1\n",
       "     wellness     0.0000    0.0000    0.0000         0\n",
-      "        women     0.0000    0.0000    0.0000         2\n",
-      "      parents     0.5000    1.0000    0.6667         1\n",
-      "     business     1.0000    1.0000    1.0000         1\n",
-      "     weddings     1.0000    1.0000    1.0000         2\n",
+      "        women     0.5000    0.5000    0.5000         2\n",
+      "      parents     0.0000    0.0000    0.0000         1\n",
+      "     business     0.0000    0.0000    0.0000         0\n",
+      "     weddings     0.5000    1.0000    0.6667         1\n",
       "      fashion     0.0000    0.0000    0.0000         0\n",
-      "entertainment     1.0000    1.0000    1.0000         1\n",
-      "      science     1.0000    0.3333    0.5000         3\n",
-      "      divorce     0.0000    0.0000    0.0000         0\n",
-      "        crime     0.5000    1.0000    0.6667         1\n",
-      "     religion     1.0000    1.0000    1.0000         1\n",
-      "       sports     0.0000    0.0000    0.0000         0\n",
-      "     politics     0.0000    0.0000    0.0000         0\n",
+      "entertainment     0.0000    0.0000    0.0000         1\n",
+      "      science     0.0000    0.0000    0.0000         0\n",
+      "      divorce     0.5000    0.3333    0.4000         3\n",
+      "        crime     1.0000    0.3333    0.5000         3\n",
+      "     religion     0.0000    0.0000    0.0000         1\n",
+      "       sports     0.5000    1.0000    0.6667         1\n",
+      "     politics     0.0000    0.0000    0.0000         1\n",
       "       comedy     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "    micro avg     0.6250    0.6250    0.6250        16\n",
-      "    macro avg     0.4375    0.4583    0.4271        16\n",
-      " weighted avg     0.6875    0.6250    0.6146        16\n",
-      "  samples avg     0.6250    0.6250    0.6250        16\n",
+      "    micro avg     0.4375    0.4375    0.4375        16\n",
+      "    macro avg     0.3125    0.3229    0.2958        16\n",
+      " weighted avg     0.5312    0.4375    0.4396        16\n",
+      "  samples avg     0.4375    0.4375    0.4375        16\n",
       "\n",
-      "2021-09-08 02:00:24,645 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:41,602 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 21:57:35,095 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:54,309 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:00:50,968 Computing label dictionary. Progress:\n"
+      "2021-09-21 21:57:58,339 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 59464.79it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 77368.08it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:50,973 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
-      "2021-09-08 02:00:51,153 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,156 Model: \"TARSClassifier(\n",
+      "2021-09-21 21:57:58,342 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
+      "2021-09-21 21:57:58,351 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:58,353 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2080,24 +2079,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:51,156 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,157 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:00:51,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,157 Parameters:\n",
-      "2021-09-08 02:00:51,158  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:00:51,158  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:00:51,158  - patience: \"3\"\n",
-      "2021-09-08 02:00:51,159  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:00:51,159  - max_epochs: \"10\"\n",
-      "2021-09-08 02:00:51,159  - shuffle: \"True\"\n",
-      "2021-09-08 02:00:51,160  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:00:51,160  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:00:51,160 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,161 Model training base path: \"temp\"\n",
-      "2021-09-08 02:00:51,161 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,161 Device: cuda:0\n",
-      "2021-09-08 02:00:51,162 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:51,162 Embeddings storage mode: cpu\n"
+      "2021-09-21 21:57:58,353 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:58,354 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 21:57:58,354 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:58,354 Parameters:\n",
+      "2021-09-21 21:57:58,355  - learning_rate: \"0.02\"\n",
+      "2021-09-21 21:57:58,355  - mini_batch_size: \"1\"\n",
+      "2021-09-21 21:57:58,355  - patience: \"3\"\n",
+      "2021-09-21 21:57:58,355  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 21:57:58,356  - max_epochs: \"10\"\n",
+      "2021-09-21 21:57:58,356  - shuffle: \"True\"\n",
+      "2021-09-21 21:57:58,356  - train_with_dev: \"False\"\n",
+      "2021-09-21 21:57:58,357  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 21:57:58,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:58,357 Model training base path: \"temp\"\n",
+      "2021-09-21 21:57:58,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:58,358 Device: cuda:0\n",
+      "2021-09-21 21:57:58,358 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:57:58,358 Embeddings storage mode: cpu\n",
+      "2021-09-21 21:57:58,365 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2111,223 +2111,223 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:00:51,257 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:52,189 epoch 1 - iter 13/130 - loss 0.14271832 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 02:00:52,987 epoch 1 - iter 26/130 - loss 0.33942892 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 02:00:53,893 epoch 1 - iter 39/130 - loss 0.47286472 - samples/sec: 14.36 - lr: 0.020000\n",
-      "2021-09-08 02:00:54,719 epoch 1 - iter 52/130 - loss 0.49516272 - samples/sec: 15.76 - lr: 0.020000\n",
-      "2021-09-08 02:00:55,602 epoch 1 - iter 65/130 - loss 0.54104735 - samples/sec: 14.74 - lr: 0.020000\n",
-      "2021-09-08 02:00:56,463 epoch 1 - iter 78/130 - loss 0.50684127 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 02:00:57,400 epoch 1 - iter 91/130 - loss 0.53001670 - samples/sec: 13.88 - lr: 0.020000\n",
-      "2021-09-08 02:00:58,229 epoch 1 - iter 104/130 - loss 0.50853935 - samples/sec: 15.71 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,049 epoch 1 - iter 117/130 - loss 0.47813965 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,947 epoch 1 - iter 130/130 - loss 0.51765700 - samples/sec: 14.49 - lr: 0.020000\n",
-      "2021-09-08 02:00:59,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:00:59,950 EPOCH 1 done: loss 0.5177 - lr 0.0200000\n",
-      "2021-09-08 02:01:00,686 DEV : loss 0.8710112571716309 - score 0.1429\n",
-      "2021-09-08 02:01:00,687 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:57:59,190 epoch 1 - iter 13/130 - loss 0.40891346 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:57:59,967 epoch 1 - iter 26/130 - loss 0.49615028 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 21:58:00,736 epoch 1 - iter 39/130 - loss 0.49964706 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 21:58:01,533 epoch 1 - iter 52/130 - loss 0.54487857 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 21:58:02,301 epoch 1 - iter 65/130 - loss 0.51225905 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 21:58:03,049 epoch 1 - iter 78/130 - loss 0.53773513 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:58:03,814 epoch 1 - iter 91/130 - loss 0.51071553 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 21:58:04,613 epoch 1 - iter 104/130 - loss 0.48054014 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 21:58:05,384 epoch 1 - iter 117/130 - loss 0.45300990 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 21:58:06,156 epoch 1 - iter 130/130 - loss 0.48210327 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 21:58:06,158 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:06,158 EPOCH 1 done: loss 0.4821 - lr 0.0200000\n",
+      "2021-09-21 21:58:06,645 DEV : loss 0.5155091285705566 - score 0.1429\n",
+      "2021-09-21 21:58:06,646 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:58:10,617 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:11,394 epoch 2 - iter 13/130 - loss 0.46120554 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:58:12,141 epoch 2 - iter 26/130 - loss 0.50572919 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:58:12,914 epoch 2 - iter 39/130 - loss 0.48209815 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 21:58:13,722 epoch 2 - iter 52/130 - loss 0.49984157 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 21:58:14,472 epoch 2 - iter 65/130 - loss 0.51503224 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 21:58:15,265 epoch 2 - iter 78/130 - loss 0.50587469 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 21:58:16,025 epoch 2 - iter 91/130 - loss 0.49139260 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 21:58:16,818 epoch 2 - iter 104/130 - loss 0.49508789 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 21:58:17,597 epoch 2 - iter 117/130 - loss 0.50853830 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 21:58:18,392 epoch 2 - iter 130/130 - loss 0.50729368 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 21:58:18,394 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:18,394 EPOCH 2 done: loss 0.5073 - lr 0.0200000\n",
+      "2021-09-21 21:58:18,988 DEV : loss 0.37122708559036255 - score 0.2857\n",
+      "2021-09-21 21:58:18,989 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:01:07,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:08,180 epoch 2 - iter 13/130 - loss 0.80971419 - samples/sec: 15.59 - lr: 0.020000\n",
-      "2021-09-08 02:01:09,051 epoch 2 - iter 26/130 - loss 0.58134563 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 02:01:09,916 epoch 2 - iter 39/130 - loss 0.53295940 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 02:01:10,780 epoch 2 - iter 52/130 - loss 0.52237447 - samples/sec: 15.07 - lr: 0.020000\n",
-      "2021-09-08 02:01:11,601 epoch 2 - iter 65/130 - loss 0.52500101 - samples/sec: 15.86 - lr: 0.020000\n",
-      "2021-09-08 02:01:12,440 epoch 2 - iter 78/130 - loss 0.54148372 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 02:01:13,316 epoch 2 - iter 91/130 - loss 0.53579400 - samples/sec: 14.86 - lr: 0.020000\n",
-      "2021-09-08 02:01:14,148 epoch 2 - iter 104/130 - loss 0.51081602 - samples/sec: 15.64 - lr: 0.020000\n",
-      "2021-09-08 02:01:14,846 epoch 2 - iter 117/130 - loss 0.51028266 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 02:01:15,563 epoch 2 - iter 130/130 - loss 0.52084422 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 02:01:15,564 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:15,564 EPOCH 2 done: loss 0.5208 - lr 0.0200000\n",
-      "2021-09-08 02:01:16,027 DEV : loss 0.5375024080276489 - score 0.2857\n",
-      "2021-09-08 02:01:16,028 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:58:23,400 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:24,257 epoch 3 - iter 13/130 - loss 0.31732065 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 21:58:25,008 epoch 3 - iter 26/130 - loss 0.35350036 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 21:58:25,756 epoch 3 - iter 39/130 - loss 0.34503552 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 21:58:26,525 epoch 3 - iter 52/130 - loss 0.38628663 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:58:27,300 epoch 3 - iter 65/130 - loss 0.36604547 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 21:58:28,044 epoch 3 - iter 78/130 - loss 0.33577092 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 21:58:28,786 epoch 3 - iter 91/130 - loss 0.36767016 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 21:58:29,418 epoch 3 - iter 104/130 - loss 0.36492766 - samples/sec: 20.62 - lr: 0.020000\n",
+      "2021-09-21 21:58:30,017 epoch 3 - iter 117/130 - loss 0.37350373 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 21:58:30,597 epoch 3 - iter 130/130 - loss 0.39608329 - samples/sec: 22.44 - lr: 0.020000\n",
+      "2021-09-21 21:58:30,598 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:30,599 EPOCH 3 done: loss 0.3961 - lr 0.0200000\n",
+      "2021-09-21 21:58:30,910 DEV : loss 0.3447147309780121 - score 0.2857\n",
+      "2021-09-21 21:58:30,911 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:01:29,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:30,279 epoch 3 - iter 13/130 - loss 0.36231480 - samples/sec: 16.24 - lr: 0.020000\n",
-      "2021-09-08 02:01:31,184 epoch 3 - iter 26/130 - loss 0.36182601 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 02:01:31,990 epoch 3 - iter 39/130 - loss 0.38497900 - samples/sec: 16.16 - lr: 0.020000\n",
-      "2021-09-08 02:01:32,833 epoch 3 - iter 52/130 - loss 0.40192448 - samples/sec: 15.44 - lr: 0.020000\n",
-      "2021-09-08 02:01:33,583 epoch 3 - iter 65/130 - loss 0.41874301 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 02:01:34,301 epoch 3 - iter 78/130 - loss 0.46737396 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 02:01:35,008 epoch 3 - iter 91/130 - loss 0.44448379 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 02:01:35,737 epoch 3 - iter 104/130 - loss 0.45138788 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 02:01:36,401 epoch 3 - iter 117/130 - loss 0.41787215 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 02:01:37,093 epoch 3 - iter 130/130 - loss 0.42025068 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 02:01:37,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:37,095 EPOCH 3 done: loss 0.4203 - lr 0.0200000\n",
-      "2021-09-08 02:01:38,394 DEV : loss 0.34286579489707947 - score 0.4286\n",
-      "2021-09-08 02:01:38,395 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 21:58:36,340 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:36,936 epoch 4 - iter 13/130 - loss 0.19582642 - samples/sec: 22.45 - lr: 0.020000\n",
+      "2021-09-21 21:58:37,505 epoch 4 - iter 26/130 - loss 0.29194559 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 21:58:38,063 epoch 4 - iter 39/130 - loss 0.22217711 - samples/sec: 23.32 - lr: 0.020000\n",
+      "2021-09-21 21:58:38,619 epoch 4 - iter 52/130 - loss 0.20944308 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 21:58:39,235 epoch 4 - iter 65/130 - loss 0.27538631 - samples/sec: 21.12 - lr: 0.020000\n",
+      "2021-09-21 21:58:39,859 epoch 4 - iter 78/130 - loss 0.28781718 - samples/sec: 20.88 - lr: 0.020000\n",
+      "2021-09-21 21:58:40,587 epoch 4 - iter 91/130 - loss 0.26386672 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 21:58:41,361 epoch 4 - iter 104/130 - loss 0.26148568 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 21:58:42,063 epoch 4 - iter 117/130 - loss 0.23752832 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 21:58:42,832 epoch 4 - iter 130/130 - loss 0.26636196 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 21:58:42,833 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:42,833 EPOCH 4 done: loss 0.2664 - lr 0.0200000\n",
+      "2021-09-21 21:58:43,255 DEV : loss 0.8952703475952148 - score 0.3571\n",
+      "2021-09-21 21:58:43,256 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:01:44,843 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:45,561 epoch 4 - iter 13/130 - loss 0.35793217 - samples/sec: 19.09 - lr: 0.020000\n",
-      "2021-09-08 02:01:46,239 epoch 4 - iter 26/130 - loss 0.24890183 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 02:01:46,984 epoch 4 - iter 39/130 - loss 0.35474159 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 02:01:47,660 epoch 4 - iter 52/130 - loss 0.33363081 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 02:01:48,355 epoch 4 - iter 65/130 - loss 0.31386591 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 02:01:49,047 epoch 4 - iter 78/130 - loss 0.28861032 - samples/sec: 18.81 - lr: 0.020000\n",
-      "2021-09-08 02:01:49,755 epoch 4 - iter 91/130 - loss 0.32636555 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 02:01:50,461 epoch 4 - iter 104/130 - loss 0.33447305 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 02:01:51,151 epoch 4 - iter 117/130 - loss 0.34881000 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 02:01:51,818 epoch 4 - iter 130/130 - loss 0.33798325 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 02:01:51,820 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:51,820 EPOCH 4 done: loss 0.3380 - lr 0.0200000\n",
-      "2021-09-08 02:01:55,351 DEV : loss 0.4174199402332306 - score 0.2857\n",
-      "2021-09-08 02:01:55,352 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:01:55,400 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:01:56,163 epoch 5 - iter 13/130 - loss 0.25853085 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 02:01:56,890 epoch 5 - iter 26/130 - loss 0.35371021 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 02:01:57,617 epoch 5 - iter 39/130 - loss 0.32333790 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 02:01:58,342 epoch 5 - iter 52/130 - loss 0.33870937 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 02:01:59,079 epoch 5 - iter 65/130 - loss 0.33986906 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 02:01:59,811 epoch 5 - iter 78/130 - loss 0.33834609 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 02:02:00,609 epoch 5 - iter 91/130 - loss 0.29691820 - samples/sec: 16.30 - lr: 0.020000\n",
-      "2021-09-08 02:02:01,399 epoch 5 - iter 104/130 - loss 0.31309019 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 02:02:02,220 epoch 5 - iter 117/130 - loss 0.31896398 - samples/sec: 15.86 - lr: 0.020000\n",
-      "2021-09-08 02:02:03,068 epoch 5 - iter 130/130 - loss 0.32404054 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 02:02:03,069 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:03,070 EPOCH 5 done: loss 0.3240 - lr 0.0200000\n",
-      "2021-09-08 02:02:03,716 DEV : loss 0.5074160099029541 - score 0.3571\n",
-      "2021-09-08 02:02:03,717 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:02:03,720 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:04,667 epoch 6 - iter 13/130 - loss 0.10886725 - samples/sec: 15.51 - lr: 0.020000\n",
-      "2021-09-08 02:02:05,493 epoch 6 - iter 26/130 - loss 0.11615497 - samples/sec: 15.76 - lr: 0.020000\n",
-      "2021-09-08 02:02:06,309 epoch 6 - iter 39/130 - loss 0.18967263 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 02:02:07,129 epoch 6 - iter 52/130 - loss 0.16935461 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 02:02:07,906 epoch 6 - iter 65/130 - loss 0.20824970 - samples/sec: 16.75 - lr: 0.020000\n",
-      "2021-09-08 02:02:08,678 epoch 6 - iter 78/130 - loss 0.25746982 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 02:02:09,483 epoch 6 - iter 91/130 - loss 0.22410025 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 02:02:10,287 epoch 6 - iter 104/130 - loss 0.22577341 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 02:02:11,111 epoch 6 - iter 117/130 - loss 0.24377767 - samples/sec: 15.80 - lr: 0.020000\n"
+      "2021-09-21 21:58:47,420 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:48,248 epoch 5 - iter 13/130 - loss 0.26575700 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 21:58:49,020 epoch 5 - iter 26/130 - loss 0.17260128 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 21:58:49,768 epoch 5 - iter 39/130 - loss 0.26459593 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 21:58:50,549 epoch 5 - iter 52/130 - loss 0.24556282 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 21:58:51,332 epoch 5 - iter 65/130 - loss 0.29444777 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 21:58:52,104 epoch 5 - iter 78/130 - loss 0.29198106 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 21:58:52,825 epoch 5 - iter 91/130 - loss 0.30296622 - samples/sec: 18.07 - lr: 0.020000\n",
+      "2021-09-21 21:58:53,606 epoch 5 - iter 104/130 - loss 0.27622598 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 21:58:54,393 epoch 5 - iter 117/130 - loss 0.27258284 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 21:58:55,166 epoch 5 - iter 130/130 - loss 0.28910478 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 21:58:55,167 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:58:55,168 EPOCH 5 done: loss 0.2891 - lr 0.0200000\n",
+      "2021-09-21 21:58:55,682 DEV : loss 0.3758607506752014 - score 0.4286\n",
+      "2021-09-21 21:58:55,683 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 21:58:59,596 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:00,181 epoch 6 - iter 13/130 - loss 0.20508497 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 21:59:00,740 epoch 6 - iter 26/130 - loss 0.29690255 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 21:59:01,313 epoch 6 - iter 39/130 - loss 0.21983888 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 21:59:01,895 epoch 6 - iter 52/130 - loss 0.16535377 - samples/sec: 22.36 - lr: 0.020000\n",
+      "2021-09-21 21:59:02,581 epoch 6 - iter 65/130 - loss 0.15711960 - samples/sec: 18.98 - lr: 0.020000\n",
+      "2021-09-21 21:59:03,322 epoch 6 - iter 78/130 - loss 0.13540444 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 21:59:04,061 epoch 6 - iter 91/130 - loss 0.19373650 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 21:59:04,787 epoch 6 - iter 104/130 - loss 0.19369809 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 21:59:05,550 epoch 6 - iter 117/130 - loss 0.17747969 - samples/sec: 17.06 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:02:11,936 epoch 6 - iter 130/130 - loss 0.27255580 - samples/sec: 15.77 - lr: 0.020000\n",
-      "2021-09-08 02:02:11,937 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:11,938 EPOCH 6 done: loss 0.2726 - lr 0.0200000\n",
-      "2021-09-08 02:02:12,559 DEV : loss 0.3732897639274597 - score 0.5714\n",
-      "2021-09-08 02:02:12,560 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:02:23,275 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:24,164 epoch 7 - iter 13/130 - loss 0.12767872 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 02:02:24,979 epoch 7 - iter 26/130 - loss 0.23954494 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 02:02:25,793 epoch 7 - iter 39/130 - loss 0.19121930 - samples/sec: 15.99 - lr: 0.020000\n",
-      "2021-09-08 02:02:27,102 epoch 7 - iter 52/130 - loss 0.15010241 - samples/sec: 15.57 - lr: 0.020000\n",
-      "2021-09-08 02:02:27,903 epoch 7 - iter 65/130 - loss 0.15652816 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 02:02:28,750 epoch 7 - iter 78/130 - loss 0.19071101 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 02:02:29,604 epoch 7 - iter 91/130 - loss 0.18427457 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 02:02:30,423 epoch 7 - iter 104/130 - loss 0.17143540 - samples/sec: 15.89 - lr: 0.020000\n",
-      "2021-09-08 02:02:31,217 epoch 7 - iter 117/130 - loss 0.15484701 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 02:02:32,077 epoch 7 - iter 130/130 - loss 0.15582169 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 02:02:32,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:32,078 EPOCH 7 done: loss 0.1558 - lr 0.0200000\n",
-      "2021-09-08 02:02:32,544 DEV : loss 0.58050537109375 - score 0.3571\n",
-      "2021-09-08 02:02:32,545 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:02:32,562 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:33,305 epoch 8 - iter 13/130 - loss 0.34509569 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 02:02:34,073 epoch 8 - iter 26/130 - loss 0.22837349 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 02:02:34,819 epoch 8 - iter 39/130 - loss 0.16468042 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 02:02:35,554 epoch 8 - iter 52/130 - loss 0.17657290 - samples/sec: 17.72 - lr: 0.020000\n",
-      "2021-09-08 02:02:36,257 epoch 8 - iter 65/130 - loss 0.14635217 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 02:02:37,036 epoch 8 - iter 78/130 - loss 0.15604993 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 02:02:37,851 epoch 8 - iter 91/130 - loss 0.13528128 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 02:02:38,690 epoch 8 - iter 104/130 - loss 0.16357110 - samples/sec: 15.51 - lr: 0.020000\n",
-      "2021-09-08 02:02:39,476 epoch 8 - iter 117/130 - loss 0.14558743 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 02:02:40,280 epoch 8 - iter 130/130 - loss 0.13435154 - samples/sec: 16.19 - lr: 0.020000\n",
-      "2021-09-08 02:02:40,282 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:40,282 EPOCH 8 done: loss 0.1344 - lr 0.0200000\n",
-      "2021-09-08 02:02:41,159 DEV : loss 0.5856802463531494 - score 0.3571\n",
-      "2021-09-08 02:02:41,160 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:02:41,161 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:42,044 epoch 9 - iter 13/130 - loss 0.08717399 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 02:02:42,840 epoch 9 - iter 26/130 - loss 0.07673129 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 02:02:43,641 epoch 9 - iter 39/130 - loss 0.10753420 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 02:02:44,448 epoch 9 - iter 52/130 - loss 0.12746431 - samples/sec: 16.12 - lr: 0.020000\n",
-      "2021-09-08 02:02:45,299 epoch 9 - iter 65/130 - loss 0.12239677 - samples/sec: 15.30 - lr: 0.020000\n",
-      "2021-09-08 02:02:46,110 epoch 9 - iter 78/130 - loss 0.10229150 - samples/sec: 16.05 - lr: 0.020000\n",
-      "2021-09-08 02:02:46,957 epoch 9 - iter 91/130 - loss 0.10876850 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 02:02:47,781 epoch 9 - iter 104/130 - loss 0.09532410 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 02:02:48,632 epoch 9 - iter 117/130 - loss 0.09530811 - samples/sec: 15.29 - lr: 0.020000\n",
-      "2021-09-08 02:02:49,322 epoch 9 - iter 130/130 - loss 0.08943617 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 02:02:49,323 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:49,323 EPOCH 9 done: loss 0.0894 - lr 0.0200000\n",
-      "2021-09-08 02:02:49,589 DEV : loss 0.4602685272693634 - score 0.3571\n",
-      "2021-09-08 02:02:49,590 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:02:49,594 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:50,296 epoch 10 - iter 13/130 - loss 0.36187607 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 02:02:50,976 epoch 10 - iter 26/130 - loss 0.19625445 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 02:02:51,678 epoch 10 - iter 39/130 - loss 0.13155203 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 02:02:52,382 epoch 10 - iter 52/130 - loss 0.11776242 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 02:02:53,056 epoch 10 - iter 65/130 - loss 0.09452722 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 02:02:53,734 epoch 10 - iter 78/130 - loss 0.10169555 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 02:02:54,315 epoch 10 - iter 91/130 - loss 0.14175124 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 02:02:54,897 epoch 10 - iter 104/130 - loss 0.12712243 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 02:02:55,470 epoch 10 - iter 117/130 - loss 0.11414806 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:02:56,093 epoch 10 - iter 130/130 - loss 0.12007109 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 02:02:56,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:02:56,095 EPOCH 10 done: loss 0.1201 - lr 0.0200000\n",
-      "2021-09-08 02:02:56,441 DEV : loss 0.4172811210155487 - score 0.5\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:02:56,442 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:03:00,505 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:00,505 Testing using best model ...\n",
-      "2021-09-08 02:03:00,507 loading file temp/best-model.pt\n",
+      "2021-09-21 21:59:06,302 epoch 6 - iter 130/130 - loss 0.18079552 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 21:59:06,303 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:06,303 EPOCH 6 done: loss 0.1808 - lr 0.0200000\n",
+      "2021-09-21 21:59:06,798 DEV : loss 0.49628275632858276 - score 0.3571\n",
+      "2021-09-21 21:59:06,799 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:59:06,801 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:07,570 epoch 7 - iter 13/130 - loss 0.26138799 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 21:59:08,306 epoch 7 - iter 26/130 - loss 0.29975162 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 21:59:08,930 epoch 7 - iter 39/130 - loss 0.20183647 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 21:59:09,507 epoch 7 - iter 52/130 - loss 0.17367584 - samples/sec: 22.55 - lr: 0.020000\n",
+      "2021-09-21 21:59:10,063 epoch 7 - iter 65/130 - loss 0.15650595 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 21:59:10,622 epoch 7 - iter 78/130 - loss 0.18873194 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 21:59:11,185 epoch 7 - iter 91/130 - loss 0.21068106 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 21:59:11,751 epoch 7 - iter 104/130 - loss 0.22582632 - samples/sec: 22.99 - lr: 0.020000\n",
+      "2021-09-21 21:59:12,299 epoch 7 - iter 117/130 - loss 0.20099448 - samples/sec: 23.74 - lr: 0.020000\n",
+      "2021-09-21 21:59:12,951 epoch 7 - iter 130/130 - loss 0.23923328 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 21:59:12,952 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:12,953 EPOCH 7 done: loss 0.2392 - lr 0.0200000\n",
+      "2021-09-21 21:59:13,584 DEV : loss 0.334597647190094 - score 0.2857\n",
+      "2021-09-21 21:59:13,584 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 21:59:13,589 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:14,437 epoch 8 - iter 13/130 - loss 0.26733248 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 21:59:15,254 epoch 8 - iter 26/130 - loss 0.19627381 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 21:59:15,981 epoch 8 - iter 39/130 - loss 0.15116576 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 21:59:16,746 epoch 8 - iter 52/130 - loss 0.11541853 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 21:59:17,472 epoch 8 - iter 65/130 - loss 0.12699912 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 21:59:18,216 epoch 8 - iter 78/130 - loss 0.13349641 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 21:59:18,900 epoch 8 - iter 91/130 - loss 0.14288631 - samples/sec: 19.01 - lr: 0.020000\n",
+      "2021-09-21 21:59:19,486 epoch 8 - iter 104/130 - loss 0.14419618 - samples/sec: 22.22 - lr: 0.020000\n",
+      "2021-09-21 21:59:20,056 epoch 8 - iter 117/130 - loss 0.16369160 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 21:59:20,618 epoch 8 - iter 130/130 - loss 0.16972494 - samples/sec: 23.16 - lr: 0.020000\n",
+      "2021-09-21 21:59:20,619 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:20,619 EPOCH 8 done: loss 0.1697 - lr 0.0200000\n",
+      "2021-09-21 21:59:20,943 DEV : loss 0.5022491216659546 - score 0.2143\n",
+      "2021-09-21 21:59:20,944 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 21:59:21,023 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:21,599 epoch 9 - iter 13/130 - loss 0.13161620 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 21:59:22,158 epoch 9 - iter 26/130 - loss 0.16121341 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 21:59:22,715 epoch 9 - iter 39/130 - loss 0.13972165 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 21:59:23,418 epoch 9 - iter 52/130 - loss 0.11637928 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 21:59:24,151 epoch 9 - iter 65/130 - loss 0.11571922 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 21:59:24,905 epoch 9 - iter 78/130 - loss 0.09663347 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 21:59:25,630 epoch 9 - iter 91/130 - loss 0.09943428 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 21:59:26,336 epoch 9 - iter 104/130 - loss 0.11398706 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 21:59:27,092 epoch 9 - iter 117/130 - loss 0.10157024 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 21:59:27,853 epoch 9 - iter 130/130 - loss 0.11784224 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 21:59:27,854 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:27,854 EPOCH 9 done: loss 0.1178 - lr 0.0200000\n",
+      "2021-09-21 21:59:28,458 DEV : loss 0.4832569360733032 - score 0.4286\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 21:59:28,460 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 21:59:28,463 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:29,193 epoch 10 - iter 13/130 - loss 0.00593617 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 21:59:29,771 epoch 10 - iter 26/130 - loss 0.00513153 - samples/sec: 22.54 - lr: 0.010000\n",
+      "2021-09-21 21:59:30,326 epoch 10 - iter 39/130 - loss 0.03666930 - samples/sec: 23.42 - lr: 0.010000\n",
+      "2021-09-21 21:59:30,877 epoch 10 - iter 52/130 - loss 0.02771943 - samples/sec: 23.65 - lr: 0.010000\n",
+      "2021-09-21 21:59:31,436 epoch 10 - iter 65/130 - loss 0.04508710 - samples/sec: 23.29 - lr: 0.010000\n",
+      "2021-09-21 21:59:31,989 epoch 10 - iter 78/130 - loss 0.05527940 - samples/sec: 23.54 - lr: 0.010000\n",
+      "2021-09-21 21:59:32,543 epoch 10 - iter 91/130 - loss 0.04809228 - samples/sec: 23.51 - lr: 0.010000\n",
+      "2021-09-21 21:59:33,134 epoch 10 - iter 104/130 - loss 0.04237724 - samples/sec: 21.99 - lr: 0.010000\n",
+      "2021-09-21 21:59:33,878 epoch 10 - iter 117/130 - loss 0.05193258 - samples/sec: 17.50 - lr: 0.010000\n",
+      "2021-09-21 21:59:34,616 epoch 10 - iter 130/130 - loss 0.04739100 - samples/sec: 17.63 - lr: 0.010000\n",
+      "2021-09-21 21:59:34,617 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:34,617 EPOCH 10 done: loss 0.0474 - lr 0.0100000\n",
+      "2021-09-21 21:59:35,082 DEV : loss 0.5470106601715088 - score 0.3571\n",
+      "2021-09-21 21:59:35,082 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 21:59:38,813 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:38,813 Testing using best model ...\n",
+      "2021-09-21 21:59:38,815 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:03:05,568 \t0.625\n",
-      "2021-09-08 02:03:05,569 \n",
+      "2021-09-21 21:59:45,694 \t0.625\n",
+      "2021-09-21 21:59:45,695 \n",
       "Results:\n",
       "- F-score (micro) 0.625\n",
-      "- F-score (macro) 0.2583\n",
+      "- F-score (macro) 0.4271\n",
       "- Accuracy 0.625\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
-      "       travel     0.0000    0.0000    0.0000         1\n",
-      "   technology     1.0000    1.0000    1.0000         2\n",
-      "     wellness     1.0000    0.5000    0.6667         4\n",
+      "       travel     0.0000    0.0000    0.0000         3\n",
+      "   technology     0.3333    1.0000    0.5000         1\n",
+      "     wellness     1.0000    1.0000    1.0000         1\n",
       "        women     0.0000    0.0000    0.0000         0\n",
-      "      parents     0.6667    1.0000    0.8000         2\n",
-      "     business     0.0000    0.0000    0.0000         1\n",
+      "      parents     1.0000    1.0000    1.0000         1\n",
+      "     business     0.0000    0.0000    0.0000         0\n",
       "     weddings     0.0000    0.0000    0.0000         0\n",
-      "      fashion     0.0000    0.0000    0.0000         1\n",
+      "      fashion     1.0000    0.5000    0.6667         2\n",
       "entertainment     0.0000    0.0000    0.0000         0\n",
-      "      science     0.0000    0.0000    0.0000         0\n",
-      "      divorce     0.0000    0.0000    0.0000         0\n",
-      "        crime     0.0000    0.0000    0.0000         0\n",
-      "     religion     1.0000    0.5000    0.6667         2\n",
+      "      science     1.0000    0.5000    0.6667         4\n",
+      "      divorce     1.0000    1.0000    1.0000         2\n",
+      "        crime     1.0000    1.0000    1.0000         1\n",
+      "     religion     0.0000    0.0000    0.0000         0\n",
       "       sports     0.0000    0.0000    0.0000         0\n",
-      "     politics     1.0000    1.0000    1.0000         3\n",
+      "     politics     1.0000    1.0000    1.0000         1\n",
       "       comedy     0.0000    0.0000    0.0000         0\n",
       "\n",
       "    micro avg     0.6250    0.6250    0.6250        16\n",
-      "    macro avg     0.2917    0.2500    0.2583        16\n",
-      " weighted avg     0.7708    0.6250    0.6625        16\n",
+      "    macro avg     0.4583    0.4375    0.4271        16\n",
+      " weighted avg     0.7708    0.6250    0.6562        16\n",
       "  samples avg     0.6250    0.6250    0.6250        16\n",
       "\n",
-      "2021-09-08 02:03:05,569 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:13,629 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 21:59:45,695 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 21:59:58,256 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:03:18,583 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:00:02,268 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 63828.27it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 73975.40it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:03:18,587 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
-      "2021-09-08 02:03:18,602 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:18,604 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:00:02,271 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy']\n",
+      "2021-09-21 22:00:02,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:02,282 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2640,25 +2640,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:03:18,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:18,605 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:03:18,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:18,605 Parameters:\n",
-      "2021-09-08 02:03:18,606  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:03:18,606  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:03:18,606  - patience: \"3\"\n",
-      "2021-09-08 02:03:18,606  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:03:18,607  - max_epochs: \"10\"\n",
-      "2021-09-08 02:03:18,607  - shuffle: \"True\"\n",
-      "2021-09-08 02:03:18,607  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:03:18,608  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:03:18,608 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:18,608 Model training base path: \"temp\"\n",
-      "2021-09-08 02:03:18,608 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:18,609 Device: cuda:0\n",
-      "2021-09-08 02:03:18,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:18,609 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:03:18,616 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:00:02,282 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:02,282 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:00:02,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:02,283 Parameters:\n",
+      "2021-09-21 22:00:02,283  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:00:02,284  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:00:02,284  - patience: \"3\"\n",
+      "2021-09-21 22:00:02,284  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:00:02,284  - max_epochs: \"10\"\n",
+      "2021-09-21 22:00:02,285  - shuffle: \"True\"\n",
+      "2021-09-21 22:00:02,285  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:00:02,285  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:00:02,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:02,286 Model training base path: \"temp\"\n",
+      "2021-09-21 22:00:02,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:02,286 Device: cuda:0\n",
+      "2021-09-21 22:00:02,287 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:02,287 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:00:02,293 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2672,204 +2672,204 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:03:19,229 epoch 1 - iter 13/130 - loss 0.28199148 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 02:03:19,824 epoch 1 - iter 26/130 - loss 0.55938970 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 02:03:20,430 epoch 1 - iter 39/130 - loss 0.52856871 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:03:21,036 epoch 1 - iter 52/130 - loss 0.56681684 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 02:03:21,648 epoch 1 - iter 65/130 - loss 0.54965487 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 02:03:22,241 epoch 1 - iter 78/130 - loss 0.49001832 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 02:03:22,848 epoch 1 - iter 91/130 - loss 0.47878630 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 02:03:23,441 epoch 1 - iter 104/130 - loss 0.46418273 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 02:03:24,034 epoch 1 - iter 117/130 - loss 0.45508101 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 02:03:24,628 epoch 1 - iter 130/130 - loss 0.47671831 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 02:03:24,630 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:24,630 EPOCH 1 done: loss 0.4767 - lr 0.0200000\n",
-      "2021-09-08 02:03:24,861 DEV : loss 0.6513312458992004 - score 0.2143\n",
-      "2021-09-08 02:03:24,862 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:00:03,075 epoch 1 - iter 13/130 - loss 0.51875671 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 22:00:03,843 epoch 1 - iter 26/130 - loss 0.56872759 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 22:00:04,602 epoch 1 - iter 39/130 - loss 0.43457799 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 22:00:05,371 epoch 1 - iter 52/130 - loss 0.53410531 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 22:00:06,099 epoch 1 - iter 65/130 - loss 0.51586055 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 22:00:06,891 epoch 1 - iter 78/130 - loss 0.56503059 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 22:00:07,653 epoch 1 - iter 91/130 - loss 0.56368279 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 22:00:08,420 epoch 1 - iter 104/130 - loss 0.56560413 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 22:00:09,176 epoch 1 - iter 117/130 - loss 0.56379803 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 22:00:09,951 epoch 1 - iter 130/130 - loss 0.58035798 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 22:00:09,952 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:09,952 EPOCH 1 done: loss 0.5804 - lr 0.0200000\n",
+      "2021-09-21 22:00:10,439 DEV : loss 0.46049273014068604 - score 0.4286\n",
+      "2021-09-21 22:00:10,440 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:00:15,622 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:16,451 epoch 2 - iter 13/130 - loss 0.53440465 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 22:00:17,218 epoch 2 - iter 26/130 - loss 0.59651395 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 22:00:17,998 epoch 2 - iter 39/130 - loss 0.60826574 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 22:00:18,753 epoch 2 - iter 52/130 - loss 0.56841020 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 22:00:19,545 epoch 2 - iter 65/130 - loss 0.55090592 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 22:00:20,311 epoch 2 - iter 78/130 - loss 0.51725994 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 22:00:21,070 epoch 2 - iter 91/130 - loss 0.54995708 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 22:00:21,828 epoch 2 - iter 104/130 - loss 0.51228918 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 22:00:22,474 epoch 2 - iter 117/130 - loss 0.53415197 - samples/sec: 20.17 - lr: 0.020000\n",
+      "2021-09-21 22:00:23,082 epoch 2 - iter 130/130 - loss 0.57117206 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 22:00:23,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:23,084 EPOCH 2 done: loss 0.5712 - lr 0.0200000\n",
+      "2021-09-21 22:00:23,394 DEV : loss 0.37266501784324646 - score 0.6429\n",
+      "2021-09-21 22:00:23,395 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:03:29,926 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:30,550 epoch 2 - iter 13/130 - loss 0.72617368 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 02:03:31,146 epoch 2 - iter 26/130 - loss 0.65755066 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 02:03:31,758 epoch 2 - iter 39/130 - loss 0.63791537 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 02:03:32,363 epoch 2 - iter 52/130 - loss 0.60379382 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 02:03:32,955 epoch 2 - iter 65/130 - loss 0.59109274 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 02:03:33,547 epoch 2 - iter 78/130 - loss 0.54389626 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 02:03:34,136 epoch 2 - iter 91/130 - loss 0.54850721 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:03:34,737 epoch 2 - iter 104/130 - loss 0.54891397 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 02:03:35,335 epoch 2 - iter 117/130 - loss 0.53001819 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 02:03:35,965 epoch 2 - iter 130/130 - loss 0.51512000 - samples/sec: 20.66 - lr: 0.020000\n",
-      "2021-09-08 02:03:35,967 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:35,967 EPOCH 2 done: loss 0.5151 - lr 0.0200000\n",
-      "2021-09-08 02:03:36,221 DEV : loss 0.4992019236087799 - score 0.4286\n",
-      "2021-09-08 02:03:36,222 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:00:33,023 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:33,760 epoch 3 - iter 13/130 - loss 0.38724433 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 22:00:34,500 epoch 3 - iter 26/130 - loss 0.32746836 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 22:00:35,305 epoch 3 - iter 39/130 - loss 0.31586870 - samples/sec: 16.17 - lr: 0.020000\n",
+      "2021-09-21 22:00:36,086 epoch 3 - iter 52/130 - loss 0.41923340 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 22:00:36,862 epoch 3 - iter 65/130 - loss 0.39785212 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 22:00:37,745 epoch 3 - iter 78/130 - loss 0.42310796 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 22:00:38,562 epoch 3 - iter 91/130 - loss 0.43958815 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 22:00:39,336 epoch 3 - iter 104/130 - loss 0.42954294 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 22:00:40,095 epoch 3 - iter 117/130 - loss 0.43373956 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 22:00:40,842 epoch 3 - iter 130/130 - loss 0.40417297 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 22:00:40,843 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:40,844 EPOCH 3 done: loss 0.4042 - lr 0.0200000\n",
+      "2021-09-21 22:00:41,350 DEV : loss 0.19366015493869781 - score 0.7857\n",
+      "2021-09-21 22:00:41,351 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:03:40,482 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:41,108 epoch 3 - iter 13/130 - loss 0.79034996 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 02:03:41,709 epoch 3 - iter 26/130 - loss 0.60346365 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 02:03:42,302 epoch 3 - iter 39/130 - loss 0.52815981 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 02:03:42,905 epoch 3 - iter 52/130 - loss 0.52981575 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 02:03:43,500 epoch 3 - iter 65/130 - loss 0.49298165 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 02:03:44,103 epoch 3 - iter 78/130 - loss 0.48864108 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 02:03:44,700 epoch 3 - iter 91/130 - loss 0.49150005 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 02:03:45,305 epoch 3 - iter 104/130 - loss 0.48004358 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 02:03:45,894 epoch 3 - iter 117/130 - loss 0.46032786 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 02:03:46,488 epoch 3 - iter 130/130 - loss 0.48333672 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 02:03:46,490 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:46,490 EPOCH 3 done: loss 0.4833 - lr 0.0200000\n",
-      "2021-09-08 02:03:46,735 DEV : loss 0.2559770345687866 - score 0.5714\n",
-      "2021-09-08 02:03:46,736 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:00:50,481 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:51,078 epoch 4 - iter 13/130 - loss 0.48801055 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 22:00:51,676 epoch 4 - iter 26/130 - loss 0.41699814 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 22:00:52,247 epoch 4 - iter 39/130 - loss 0.39055698 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 22:00:52,815 epoch 4 - iter 52/130 - loss 0.36516336 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 22:00:53,396 epoch 4 - iter 65/130 - loss 0.38107946 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 22:00:53,957 epoch 4 - iter 78/130 - loss 0.34595641 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 22:00:54,537 epoch 4 - iter 91/130 - loss 0.35549465 - samples/sec: 22.43 - lr: 0.020000\n",
+      "2021-09-21 22:00:55,136 epoch 4 - iter 104/130 - loss 0.33381810 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 22:00:55,900 epoch 4 - iter 117/130 - loss 0.32808093 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 22:00:56,673 epoch 4 - iter 130/130 - loss 0.30799153 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 22:00:56,674 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:00:56,675 EPOCH 4 done: loss 0.3080 - lr 0.0200000\n",
+      "2021-09-21 22:00:57,216 DEV : loss 0.36171871423721313 - score 0.9286\n",
+      "2021-09-21 22:00:57,217 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:03:50,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:51,581 epoch 4 - iter 13/130 - loss 0.46673518 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 02:03:52,174 epoch 4 - iter 26/130 - loss 0.35507469 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 02:03:52,762 epoch 4 - iter 39/130 - loss 0.35890968 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 02:03:53,347 epoch 4 - iter 52/130 - loss 0.28758643 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 02:03:53,949 epoch 4 - iter 65/130 - loss 0.30315745 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 02:03:54,550 epoch 4 - iter 78/130 - loss 0.34164220 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 02:03:55,146 epoch 4 - iter 91/130 - loss 0.32672655 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 02:03:55,741 epoch 4 - iter 104/130 - loss 0.33641643 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 02:03:56,333 epoch 4 - iter 117/130 - loss 0.30787326 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 02:03:56,934 epoch 4 - iter 130/130 - loss 0.29815390 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 02:03:56,935 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:56,936 EPOCH 4 done: loss 0.2982 - lr 0.0200000\n",
-      "2021-09-08 02:03:57,385 DEV : loss 0.32965287566185 - score 0.5714\n",
-      "2021-09-08 02:03:57,386 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:03:57,391 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:03:57,996 epoch 5 - iter 13/130 - loss 0.21041557 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 02:03:58,573 epoch 5 - iter 26/130 - loss 0.28808381 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 02:03:59,153 epoch 5 - iter 39/130 - loss 0.31764521 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 02:03:59,744 epoch 5 - iter 52/130 - loss 0.28680595 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 02:04:00,350 epoch 5 - iter 65/130 - loss 0.33042608 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 02:04:00,934 epoch 5 - iter 78/130 - loss 0.32500984 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:04:01,518 epoch 5 - iter 91/130 - loss 0.31700272 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 02:04:02,102 epoch 5 - iter 104/130 - loss 0.31849201 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 02:04:02,693 epoch 5 - iter 117/130 - loss 0.33159072 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 02:04:03,281 epoch 5 - iter 130/130 - loss 0.31774205 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:04:03,283 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:03,283 EPOCH 5 done: loss 0.3177 - lr 0.0200000\n",
-      "2021-09-08 02:04:03,512 DEV : loss 0.3248217701911926 - score 0.5\n",
-      "2021-09-08 02:04:03,512 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:04:03,515 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:04,125 epoch 6 - iter 13/130 - loss 0.25569571 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 02:04:04,710 epoch 6 - iter 26/130 - loss 0.29913721 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 02:04:05,293 epoch 6 - iter 39/130 - loss 0.26298832 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 02:04:05,925 epoch 6 - iter 52/130 - loss 0.26162237 - samples/sec: 20.61 - lr: 0.020000\n",
-      "2021-09-08 02:04:06,508 epoch 6 - iter 65/130 - loss 0.24264931 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 02:04:07,098 epoch 6 - iter 78/130 - loss 0.28172132 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 02:04:07,676 epoch 6 - iter 91/130 - loss 0.26323578 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 02:04:08,258 epoch 6 - iter 104/130 - loss 0.26330414 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 02:04:08,842 epoch 6 - iter 117/130 - loss 0.24004806 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 02:04:09,421 epoch 6 - iter 130/130 - loss 0.22320420 - samples/sec: 22.51 - lr: 0.020000\n"
+      "2021-09-21 22:01:01,263 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:01,891 epoch 5 - iter 13/130 - loss 0.29475853 - samples/sec: 21.28 - lr: 0.020000\n",
+      "2021-09-21 22:01:02,499 epoch 5 - iter 26/130 - loss 0.25176042 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 22:01:03,067 epoch 5 - iter 39/130 - loss 0.22614000 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 22:01:03,637 epoch 5 - iter 52/130 - loss 0.25690210 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 22:01:04,200 epoch 5 - iter 65/130 - loss 0.21177037 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 22:01:04,762 epoch 5 - iter 78/130 - loss 0.22109417 - samples/sec: 23.15 - lr: 0.020000\n",
+      "2021-09-21 22:01:05,326 epoch 5 - iter 91/130 - loss 0.24616724 - samples/sec: 23.08 - lr: 0.020000\n",
+      "2021-09-21 22:01:05,974 epoch 5 - iter 104/130 - loss 0.23264551 - samples/sec: 20.09 - lr: 0.020000\n",
+      "2021-09-21 22:01:06,734 epoch 5 - iter 117/130 - loss 0.26257448 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 22:01:07,476 epoch 5 - iter 130/130 - loss 0.24678780 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 22:01:07,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:07,478 EPOCH 5 done: loss 0.2468 - lr 0.0200000\n",
+      "2021-09-21 22:01:07,978 DEV : loss 0.33842459321022034 - score 0.7143\n",
+      "2021-09-21 22:01:07,979 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:01:07,981 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:08,769 epoch 6 - iter 13/130 - loss 0.10385766 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 22:01:09,489 epoch 6 - iter 26/130 - loss 0.13483504 - samples/sec: 18.07 - lr: 0.020000\n",
+      "2021-09-21 22:01:10,233 epoch 6 - iter 39/130 - loss 0.09657467 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 22:01:10,966 epoch 6 - iter 52/130 - loss 0.10302717 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 22:01:11,721 epoch 6 - iter 65/130 - loss 0.12364355 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 22:01:12,452 epoch 6 - iter 78/130 - loss 0.15783966 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 22:01:13,057 epoch 6 - iter 91/130 - loss 0.17805071 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 22:01:13,624 epoch 6 - iter 104/130 - loss 0.18481847 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 22:01:14,187 epoch 6 - iter 117/130 - loss 0.18121049 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 22:01:14,751 epoch 6 - iter 130/130 - loss 0.16866427 - samples/sec: 23.09 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:04:09,422 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:09,422 EPOCH 6 done: loss 0.2232 - lr 0.0200000\n",
-      "2021-09-08 02:04:09,650 DEV : loss 0.3365236818790436 - score 0.7143\n",
-      "2021-09-08 02:04:09,651 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:04:13,613 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:14,209 epoch 7 - iter 13/130 - loss 0.01483150 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 02:04:14,801 epoch 7 - iter 26/130 - loss 0.00798788 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 02:04:15,406 epoch 7 - iter 39/130 - loss 0.10434307 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 02:04:15,982 epoch 7 - iter 52/130 - loss 0.11227265 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 02:04:16,553 epoch 7 - iter 65/130 - loss 0.12121161 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 02:04:17,138 epoch 7 - iter 78/130 - loss 0.17127582 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 02:04:17,720 epoch 7 - iter 91/130 - loss 0.20526955 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 02:04:18,300 epoch 7 - iter 104/130 - loss 0.18081980 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 02:04:18,876 epoch 7 - iter 117/130 - loss 0.17562964 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 02:04:19,459 epoch 7 - iter 130/130 - loss 0.17425550 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 02:04:19,461 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:19,461 EPOCH 7 done: loss 0.1743 - lr 0.0200000\n",
-      "2021-09-08 02:04:19,689 DEV : loss 0.33321183919906616 - score 0.7143\n",
-      "2021-09-08 02:04:19,690 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:04:23,583 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:24,230 epoch 8 - iter 13/130 - loss 0.26166361 - samples/sec: 20.67 - lr: 0.020000\n",
-      "2021-09-08 02:04:24,805 epoch 8 - iter 26/130 - loss 0.13340877 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 02:04:25,379 epoch 8 - iter 39/130 - loss 0.14828190 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 02:04:25,949 epoch 8 - iter 52/130 - loss 0.11261174 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 02:04:26,521 epoch 8 - iter 65/130 - loss 0.11722348 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:04:27,091 epoch 8 - iter 78/130 - loss 0.09805423 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 02:04:27,661 epoch 8 - iter 91/130 - loss 0.08929045 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 02:04:28,238 epoch 8 - iter 104/130 - loss 0.11286499 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 02:04:28,817 epoch 8 - iter 117/130 - loss 0.12340807 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 02:04:29,393 epoch 8 - iter 130/130 - loss 0.11318996 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 02:04:29,394 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:29,394 EPOCH 8 done: loss 0.1132 - lr 0.0200000\n",
-      "2021-09-08 02:04:29,622 DEV : loss 0.4573519825935364 - score 0.5714\n",
-      "2021-09-08 02:04:29,622 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:04:29,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:30,216 epoch 9 - iter 13/130 - loss 0.26394138 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 02:04:30,789 epoch 9 - iter 26/130 - loss 0.13848985 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 02:04:31,365 epoch 9 - iter 39/130 - loss 0.09393139 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 02:04:31,943 epoch 9 - iter 52/130 - loss 0.11014643 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 02:04:32,529 epoch 9 - iter 65/130 - loss 0.15974048 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:04:33,116 epoch 9 - iter 78/130 - loss 0.19055740 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:04:33,688 epoch 9 - iter 91/130 - loss 0.16653790 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:04:34,270 epoch 9 - iter 104/130 - loss 0.18616202 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 02:04:34,845 epoch 9 - iter 117/130 - loss 0.17787597 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 02:04:35,420 epoch 9 - iter 130/130 - loss 0.17073105 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 02:04:35,421 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:35,421 EPOCH 9 done: loss 0.1707 - lr 0.0200000\n",
-      "2021-09-08 02:04:35,649 DEV : loss 0.36868026852607727 - score 0.5714\n",
-      "2021-09-08 02:04:35,650 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:04:35,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:36,238 epoch 10 - iter 13/130 - loss 0.00191204 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 02:04:36,825 epoch 10 - iter 26/130 - loss 0.16439050 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 02:04:37,396 epoch 10 - iter 39/130 - loss 0.11125623 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 02:04:37,974 epoch 10 - iter 52/130 - loss 0.13126839 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 02:04:38,557 epoch 10 - iter 65/130 - loss 0.11991161 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 02:04:39,126 epoch 10 - iter 78/130 - loss 0.10015868 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 02:04:39,707 epoch 10 - iter 91/130 - loss 0.17087615 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 02:04:40,289 epoch 10 - iter 104/130 - loss 0.16102608 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 02:04:40,860 epoch 10 - iter 117/130 - loss 0.15129600 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 02:04:41,437 epoch 10 - iter 130/130 - loss 0.15549254 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 02:04:41,438 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:41,439 EPOCH 10 done: loss 0.1555 - lr 0.0200000\n",
-      "2021-09-08 02:04:41,666 DEV : loss 0.3267565667629242 - score 0.6429\n",
-      "2021-09-08 02:04:41,667 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:04:45,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:04:45,826 Testing using best model ...\n",
-      "2021-09-08 02:04:45,828 loading file temp/best-model.pt\n",
+      "2021-09-21 22:01:14,752 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:14,752 EPOCH 6 done: loss 0.1687 - lr 0.0200000\n",
+      "2021-09-21 22:01:15,094 DEV : loss 0.1563355028629303 - score 0.7857\n",
+      "2021-09-21 22:01:15,095 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:01:15,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:15,759 epoch 7 - iter 13/130 - loss 0.16086762 - samples/sec: 23.20 - lr: 0.020000\n",
+      "2021-09-21 22:01:16,321 epoch 7 - iter 26/130 - loss 0.27013639 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 22:01:16,875 epoch 7 - iter 39/130 - loss 0.18069246 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 22:01:17,553 epoch 7 - iter 52/130 - loss 0.14635342 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 22:01:18,272 epoch 7 - iter 65/130 - loss 0.14038876 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 22:01:18,963 epoch 7 - iter 78/130 - loss 0.14541413 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 22:01:19,684 epoch 7 - iter 91/130 - loss 0.12492675 - samples/sec: 18.05 - lr: 0.020000\n",
+      "2021-09-21 22:01:20,402 epoch 7 - iter 104/130 - loss 0.13790597 - samples/sec: 18.12 - lr: 0.020000\n",
+      "2021-09-21 22:01:21,150 epoch 7 - iter 117/130 - loss 0.12337445 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 22:01:21,900 epoch 7 - iter 130/130 - loss 0.14808746 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 22:01:21,901 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:21,902 EPOCH 7 done: loss 0.1481 - lr 0.0200000\n",
+      "2021-09-21 22:01:22,569 DEV : loss 0.08317986130714417 - score 0.7857\n",
+      "2021-09-21 22:01:22,569 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:01:22,571 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:23,365 epoch 8 - iter 13/130 - loss 0.00494305 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 22:01:24,098 epoch 8 - iter 26/130 - loss 0.00471542 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 22:01:24,833 epoch 8 - iter 39/130 - loss 0.00368439 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 22:01:25,575 epoch 8 - iter 52/130 - loss 0.04570622 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 22:01:26,328 epoch 8 - iter 65/130 - loss 0.03664223 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 22:01:27,057 epoch 8 - iter 78/130 - loss 0.03059573 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 22:01:27,826 epoch 8 - iter 91/130 - loss 0.06111803 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 22:01:28,572 epoch 8 - iter 104/130 - loss 0.08645114 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 22:01:29,344 epoch 8 - iter 117/130 - loss 0.07765715 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 22:01:30,073 epoch 8 - iter 130/130 - loss 0.06998196 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 22:01:30,074 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:30,075 EPOCH 8 done: loss 0.0700 - lr 0.0200000\n",
+      "2021-09-21 22:01:30,370 DEV : loss 0.12956547737121582 - score 0.7857\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:01:30,371 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:01:30,376 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:30,981 epoch 9 - iter 13/130 - loss 0.00081394 - samples/sec: 22.13 - lr: 0.010000\n",
+      "2021-09-21 22:01:31,540 epoch 9 - iter 26/130 - loss 0.00071508 - samples/sec: 23.27 - lr: 0.010000\n",
+      "2021-09-21 22:01:32,101 epoch 9 - iter 39/130 - loss 0.10739364 - samples/sec: 23.23 - lr: 0.010000\n",
+      "2021-09-21 22:01:32,663 epoch 9 - iter 52/130 - loss 0.08218647 - samples/sec: 23.17 - lr: 0.010000\n",
+      "2021-09-21 22:01:33,220 epoch 9 - iter 65/130 - loss 0.06620520 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 22:01:33,778 epoch 9 - iter 78/130 - loss 0.05977476 - samples/sec: 23.31 - lr: 0.010000\n",
+      "2021-09-21 22:01:34,337 epoch 9 - iter 91/130 - loss 0.07326312 - samples/sec: 23.32 - lr: 0.010000\n",
+      "2021-09-21 22:01:34,890 epoch 9 - iter 104/130 - loss 0.06444897 - samples/sec: 23.53 - lr: 0.010000\n",
+      "2021-09-21 22:01:35,451 epoch 9 - iter 117/130 - loss 0.08776192 - samples/sec: 23.21 - lr: 0.010000\n",
+      "2021-09-21 22:01:36,012 epoch 9 - iter 130/130 - loss 0.10156408 - samples/sec: 23.21 - lr: 0.010000\n",
+      "2021-09-21 22:01:36,013 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:36,013 EPOCH 9 done: loss 0.1016 - lr 0.0100000\n",
+      "2021-09-21 22:01:39,263 DEV : loss 0.13153468072414398 - score 0.7857\n",
+      "2021-09-21 22:01:39,264 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:01:39,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:40,067 epoch 10 - iter 13/130 - loss 0.14509452 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 22:01:40,780 epoch 10 - iter 26/130 - loss 0.07345577 - samples/sec: 18.27 - lr: 0.010000\n",
+      "2021-09-21 22:01:41,540 epoch 10 - iter 39/130 - loss 0.21009995 - samples/sec: 17.12 - lr: 0.010000\n",
+      "2021-09-21 22:01:42,298 epoch 10 - iter 52/130 - loss 0.16301835 - samples/sec: 17.15 - lr: 0.010000\n",
+      "2021-09-21 22:01:43,054 epoch 10 - iter 65/130 - loss 0.15325908 - samples/sec: 17.23 - lr: 0.010000\n",
+      "2021-09-21 22:01:43,789 epoch 10 - iter 78/130 - loss 0.13095529 - samples/sec: 17.70 - lr: 0.010000\n",
+      "2021-09-21 22:01:44,562 epoch 10 - iter 91/130 - loss 0.11567747 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 22:01:45,289 epoch 10 - iter 104/130 - loss 0.10148997 - samples/sec: 17.90 - lr: 0.010000\n",
+      "2021-09-21 22:01:46,001 epoch 10 - iter 117/130 - loss 0.10920730 - samples/sec: 18.29 - lr: 0.010000\n",
+      "2021-09-21 22:01:46,752 epoch 10 - iter 130/130 - loss 0.10666351 - samples/sec: 17.33 - lr: 0.010000\n",
+      "2021-09-21 22:01:46,753 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:46,754 EPOCH 10 done: loss 0.1067 - lr 0.0100000\n",
+      "2021-09-21 22:01:47,316 DEV : loss 0.10798890888690948 - score 0.7857\n",
+      "2021-09-21 22:01:47,316 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:01:51,668 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:01:51,669 Testing using best model ...\n",
+      "2021-09-21 22:01:51,670 loading file temp/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:04:50,754 \t0.5625\n",
-      "2021-09-08 02:04:50,755 \n",
+      "2021-09-21 22:01:58,681 \t0.4375\n",
+      "2021-09-21 22:01:58,682 \n",
       "Results:\n",
-      "- F-score (micro) 0.5625\n",
-      "- F-score (macro) 0.3562\n",
-      "- Accuracy 0.5625\n",
+      "- F-score (micro) 0.4375\n",
+      "- F-score (macro) 0.2688\n",
+      "- Accuracy 0.4375\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
       "       travel     0.0000    0.0000    0.0000         0\n",
-      "   technology     0.0000    0.0000    0.0000         0\n",
-      "     wellness     1.0000    1.0000    1.0000         1\n",
-      "        women     0.2500    1.0000    0.4000         1\n",
-      "      parents     1.0000    0.3333    0.5000         3\n",
-      "     business     1.0000    1.0000    1.0000         2\n",
-      "     weddings     1.0000    1.0000    1.0000         1\n",
-      "      fashion     0.0000    0.0000    0.0000         1\n",
-      "entertainment     0.0000    0.0000    0.0000         0\n",
-      "      science     1.0000    0.6667    0.8000         3\n",
-      "      divorce     0.0000    0.0000    0.0000         1\n",
-      "        crime     0.0000    0.0000    0.0000         1\n",
+      "   technology     0.0000    0.0000    0.0000         1\n",
+      "     wellness     1.0000    0.6667    0.8000         3\n",
+      "        women     0.3333    1.0000    0.5000         1\n",
+      "      parents     1.0000    0.5000    0.6667         2\n",
+      "     business     0.0000    0.0000    0.0000         0\n",
+      "     weddings     0.0000    0.0000    0.0000         0\n",
+      "      fashion     0.0000    0.0000    0.0000         0\n",
+      "entertainment     1.0000    0.5000    0.6667         2\n",
+      "      science     0.0000    0.0000    0.0000         2\n",
+      "      divorce     0.5000    1.0000    0.6667         1\n",
+      "        crime     0.0000    0.0000    0.0000         0\n",
       "     religion     0.0000    0.0000    0.0000         1\n",
       "       sports     0.0000    0.0000    0.0000         0\n",
       "     politics     1.0000    1.0000    1.0000         1\n",
-      "       comedy     0.0000    0.0000    0.0000         0\n",
+      "       comedy     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "    micro avg     0.5625    0.5625    0.5625        16\n",
-      "    macro avg     0.3906    0.3750    0.3563        16\n",
-      " weighted avg     0.7031    0.5625    0.5813        16\n",
-      "  samples avg     0.5625    0.5625    0.5625        16\n",
+      "    micro avg     0.4375    0.4375    0.4375        16\n",
+      "    macro avg     0.3021    0.2917    0.2687        16\n",
+      " weighted avg     0.5521    0.4375    0.4521        16\n",
+      "  samples avg     0.4375    0.4375    0.4375        16\n",
       "\n",
-      "2021-09-08 02:04:50,755 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.5193798449612403\n"
+      "2021-09-21 22:01:58,682 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.5189922480620155\n"
      ]
     }
    ],
@@ -2954,11 +2954,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "0c4025f0",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.624031007751938, 0.1065891472868217, 0.625968992248062, 0.6124031007751938, 0.625968992248062]\n",
+      "0.20626346916702856\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -2970,7 +2982,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2978,25 +2990,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:04:58,574 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:02:17,890 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:05:02,592 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:02:21,919 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 74879.97it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 76260.07it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:05:02,596 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
-      "2021-09-08 02:05:02,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:02,609 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:02:21,923 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
+      "2021-09-21 22:02:22,073 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:22,074 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3309,25 +3321,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:05:02,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:02,610 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:05:02,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:02,610 Parameters:\n",
-      "2021-09-08 02:05:02,611  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:05:02,611  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:05:02,611  - patience: \"3\"\n",
-      "2021-09-08 02:05:02,612  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:05:02,612  - max_epochs: \"10\"\n",
-      "2021-09-08 02:05:02,612  - shuffle: \"True\"\n",
-      "2021-09-08 02:05:02,612  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:05:02,613  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:05:02,613 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:02,613 Model training base path: \"None\"\n",
-      "2021-09-08 02:05:02,613 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:02,614 Device: cuda:0\n",
-      "2021-09-08 02:05:02,614 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:02,614 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:05:02,621 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:02:22,075 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:22,075 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:02:22,076 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:22,076 Parameters:\n",
+      "2021-09-21 22:02:22,076  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:02:22,076  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:02:22,077  - patience: \"3\"\n",
+      "2021-09-21 22:02:22,077  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:02:22,077  - max_epochs: \"10\"\n",
+      "2021-09-21 22:02:22,078  - shuffle: \"True\"\n",
+      "2021-09-21 22:02:22,078  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:02:22,078  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:02:22,078 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:22,079 Model training base path: \"None\"\n",
+      "2021-09-21 22:02:22,079 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:22,079 Device: cuda:0\n",
+      "2021-09-21 22:02:22,079 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:22,080 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -3341,229 +3352,224 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:05:03,263 epoch 1 - iter 13/130 - loss 0.41487520 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 02:05:03,880 epoch 1 - iter 26/130 - loss 0.50843304 - samples/sec: 21.11 - lr: 0.020000\n",
-      "2021-09-08 02:05:04,478 epoch 1 - iter 39/130 - loss 0.48194652 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 02:05:05,090 epoch 1 - iter 52/130 - loss 0.58268334 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 02:05:05,698 epoch 1 - iter 65/130 - loss 0.55880719 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 02:05:06,310 epoch 1 - iter 78/130 - loss 0.58294465 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 02:05:06,923 epoch 1 - iter 91/130 - loss 0.59326913 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 02:05:07,520 epoch 1 - iter 104/130 - loss 0.54657312 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 02:05:08,135 epoch 1 - iter 117/130 - loss 0.57346379 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 02:05:08,756 epoch 1 - iter 130/130 - loss 0.57023971 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 02:05:08,757 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:08,758 EPOCH 1 done: loss 0.5702 - lr 0.0200000\n",
-      "2021-09-08 02:05:09,058 DEV : loss 0.3864195644855499 - score 0.0714\n",
-      "2021-09-08 02:05:09,059 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:02:22,266 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:22,847 epoch 1 - iter 13/130 - loss 0.17900863 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 22:02:23,438 epoch 1 - iter 26/130 - loss 0.33109816 - samples/sec: 22.04 - lr: 0.020000\n",
+      "2021-09-21 22:02:24,016 epoch 1 - iter 39/130 - loss 0.36710402 - samples/sec: 22.50 - lr: 0.020000\n",
+      "2021-09-21 22:02:24,599 epoch 1 - iter 52/130 - loss 0.38986902 - samples/sec: 22.34 - lr: 0.020000\n",
+      "2021-09-21 22:02:25,187 epoch 1 - iter 65/130 - loss 0.45804023 - samples/sec: 22.14 - lr: 0.020000\n",
+      "2021-09-21 22:02:25,781 epoch 1 - iter 78/130 - loss 0.54614003 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 22:02:26,400 epoch 1 - iter 91/130 - loss 0.55910596 - samples/sec: 21.04 - lr: 0.020000\n",
+      "2021-09-21 22:02:27,025 epoch 1 - iter 104/130 - loss 0.55336136 - samples/sec: 20.84 - lr: 0.020000\n",
+      "2021-09-21 22:02:27,595 epoch 1 - iter 117/130 - loss 0.50341110 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 22:02:28,165 epoch 1 - iter 130/130 - loss 0.48832272 - samples/sec: 22.85 - lr: 0.020000\n",
+      "2021-09-21 22:02:28,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:28,167 EPOCH 1 done: loss 0.4883 - lr 0.0200000\n",
+      "2021-09-21 22:02:28,407 DEV : loss 0.2825157344341278 - score 0.3571\n",
+      "2021-09-21 22:02:28,408 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:05:13,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:13,811 epoch 2 - iter 13/130 - loss 0.72014232 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 02:05:14,426 epoch 2 - iter 26/130 - loss 0.69198077 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 02:05:15,038 epoch 2 - iter 39/130 - loss 0.64506401 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 02:05:15,649 epoch 2 - iter 52/130 - loss 0.58365976 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 02:05:16,254 epoch 2 - iter 65/130 - loss 0.54082019 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 02:05:16,866 epoch 2 - iter 78/130 - loss 0.53718093 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 02:05:17,481 epoch 2 - iter 91/130 - loss 0.55759869 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 02:05:18,084 epoch 2 - iter 104/130 - loss 0.53899549 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 02:05:18,686 epoch 2 - iter 117/130 - loss 0.52589381 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 02:05:19,297 epoch 2 - iter 130/130 - loss 0.54969732 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 02:05:19,298 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:19,299 EPOCH 2 done: loss 0.5497 - lr 0.0200000\n",
-      "2021-09-08 02:05:19,541 DEV : loss 0.3812975585460663 - score 0.5\n",
-      "2021-09-08 02:05:19,542 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:02:33,386 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:34,259 epoch 2 - iter 13/130 - loss 0.70131416 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 22:02:35,044 epoch 2 - iter 26/130 - loss 0.64901324 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 22:02:35,852 epoch 2 - iter 39/130 - loss 0.65736400 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 22:02:36,653 epoch 2 - iter 52/130 - loss 0.57009044 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 22:02:37,431 epoch 2 - iter 65/130 - loss 0.57796108 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 22:02:38,210 epoch 2 - iter 78/130 - loss 0.54130171 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 22:02:38,967 epoch 2 - iter 91/130 - loss 0.56264340 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 22:02:39,759 epoch 2 - iter 104/130 - loss 0.55149360 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 22:02:40,580 epoch 2 - iter 117/130 - loss 0.54676841 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 22:02:41,402 epoch 2 - iter 130/130 - loss 0.56859854 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 22:02:41,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:41,404 EPOCH 2 done: loss 0.5686 - lr 0.0200000\n",
+      "2021-09-21 22:02:41,792 DEV : loss 0.34875163435935974 - score 0.5\n",
+      "2021-09-21 22:02:41,793 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:05:23,453 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:24,073 epoch 3 - iter 13/130 - loss 0.29605471 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 02:05:24,660 epoch 3 - iter 26/130 - loss 0.27763861 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 02:05:25,275 epoch 3 - iter 39/130 - loss 0.35612240 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 02:05:25,876 epoch 3 - iter 52/130 - loss 0.33843728 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 02:05:26,477 epoch 3 - iter 65/130 - loss 0.41915355 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 02:05:27,072 epoch 3 - iter 78/130 - loss 0.40003034 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 02:05:27,675 epoch 3 - iter 91/130 - loss 0.43696080 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 02:05:28,268 epoch 3 - iter 104/130 - loss 0.41001500 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 02:05:28,857 epoch 3 - iter 117/130 - loss 0.40338964 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 02:05:29,478 epoch 3 - iter 130/130 - loss 0.45866334 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 02:05:29,480 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:29,481 EPOCH 3 done: loss 0.4587 - lr 0.0200000\n",
-      "2021-09-08 02:05:29,738 DEV : loss 0.4008079171180725 - score 0.3571\n",
-      "2021-09-08 02:05:29,739 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:05:29,741 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:30,346 epoch 4 - iter 13/130 - loss 0.38227268 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 02:05:30,947 epoch 4 - iter 26/130 - loss 0.40883921 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 02:05:31,549 epoch 4 - iter 39/130 - loss 0.47833734 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 02:05:32,142 epoch 4 - iter 52/130 - loss 0.37637943 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 02:05:32,730 epoch 4 - iter 65/130 - loss 0.35322313 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 02:05:33,354 epoch 4 - iter 78/130 - loss 0.40402179 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 02:05:33,959 epoch 4 - iter 91/130 - loss 0.45118319 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 02:05:34,546 epoch 4 - iter 104/130 - loss 0.43255285 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 02:05:35,146 epoch 4 - iter 117/130 - loss 0.45094402 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 02:05:35,734 epoch 4 - iter 130/130 - loss 0.43509061 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 02:05:35,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:35,736 EPOCH 4 done: loss 0.4351 - lr 0.0200000\n",
-      "2021-09-08 02:05:35,989 DEV : loss 0.3794110417366028 - score 0.3571\n",
-      "2021-09-08 02:05:35,990 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:05:36,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:36,599 epoch 5 - iter 13/130 - loss 0.09470102 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 02:05:37,187 epoch 5 - iter 26/130 - loss 0.18084245 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 02:05:37,784 epoch 5 - iter 39/130 - loss 0.32731089 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 02:05:38,369 epoch 5 - iter 52/130 - loss 0.28321619 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 02:05:38,950 epoch 5 - iter 65/130 - loss 0.32751836 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 02:05:39,557 epoch 5 - iter 78/130 - loss 0.32630177 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 02:05:40,175 epoch 5 - iter 91/130 - loss 0.34372083 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 02:05:40,773 epoch 5 - iter 104/130 - loss 0.39104060 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 02:05:41,348 epoch 5 - iter 117/130 - loss 0.35537361 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 02:05:41,932 epoch 5 - iter 130/130 - loss 0.35366997 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:05:41,934 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:41,934 EPOCH 5 done: loss 0.3537 - lr 0.0200000\n",
-      "2021-09-08 02:05:42,281 DEV : loss 0.35816675424575806 - score 0.5\n",
-      "2021-09-08 02:05:42,282 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:02:45,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:46,693 epoch 3 - iter 13/130 - loss 0.42205354 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 22:02:47,464 epoch 3 - iter 26/130 - loss 0.50138528 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 22:02:48,239 epoch 3 - iter 39/130 - loss 0.46494911 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 22:02:49,053 epoch 3 - iter 52/130 - loss 0.42529252 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 22:02:49,818 epoch 3 - iter 65/130 - loss 0.45947144 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 22:02:50,577 epoch 3 - iter 78/130 - loss 0.43816925 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 22:02:51,211 epoch 3 - iter 91/130 - loss 0.42788910 - samples/sec: 20.55 - lr: 0.020000\n",
+      "2021-09-21 22:02:51,810 epoch 3 - iter 104/130 - loss 0.43586036 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 22:02:52,376 epoch 3 - iter 117/130 - loss 0.41536733 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 22:02:52,957 epoch 3 - iter 130/130 - loss 0.45477600 - samples/sec: 22.39 - lr: 0.020000\n",
+      "2021-09-21 22:02:52,958 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:52,959 EPOCH 3 done: loss 0.4548 - lr 0.0200000\n",
+      "2021-09-21 22:02:53,194 DEV : loss 0.3010939061641693 - score 0.3571\n",
+      "2021-09-21 22:02:53,195 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:02:53,197 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:02:53,783 epoch 4 - iter 13/130 - loss 0.34204282 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 22:02:54,348 epoch 4 - iter 26/130 - loss 0.34197190 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 22:02:54,918 epoch 4 - iter 39/130 - loss 0.35412804 - samples/sec: 22.82 - lr: 0.020000\n",
+      "2021-09-21 22:02:55,514 epoch 4 - iter 52/130 - loss 0.29987307 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 22:02:56,296 epoch 4 - iter 65/130 - loss 0.33967568 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 22:02:57,082 epoch 4 - iter 78/130 - loss 0.35077712 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 22:02:57,820 epoch 4 - iter 91/130 - loss 0.37657425 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 22:02:58,599 epoch 4 - iter 104/130 - loss 0.40686041 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 22:02:59,392 epoch 4 - iter 117/130 - loss 0.38748709 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 22:03:00,147 epoch 4 - iter 130/130 - loss 0.40114917 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 22:03:00,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:00,148 EPOCH 4 done: loss 0.4011 - lr 0.0200000\n",
+      "2021-09-21 22:03:00,793 DEV : loss 0.2834978699684143 - score 0.5714\n",
+      "2021-09-21 22:03:00,794 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:05:47,359 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:47,965 epoch 6 - iter 13/130 - loss 0.34561151 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 02:05:48,547 epoch 6 - iter 26/130 - loss 0.29881839 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 02:05:49,170 epoch 6 - iter 39/130 - loss 0.26627585 - samples/sec: 20.91 - lr: 0.020000\n",
-      "2021-09-08 02:05:49,758 epoch 6 - iter 52/130 - loss 0.21524717 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 02:05:50,326 epoch 6 - iter 65/130 - loss 0.17309723 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 02:05:50,910 epoch 6 - iter 78/130 - loss 0.20186494 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 02:05:51,489 epoch 6 - iter 91/130 - loss 0.19709357 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 02:05:52,061 epoch 6 - iter 104/130 - loss 0.18859661 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 02:05:52,655 epoch 6 - iter 117/130 - loss 0.24052258 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 02:05:53,255 epoch 6 - iter 130/130 - loss 0.25096752 - samples/sec: 21.71 - lr: 0.020000\n"
+      "2021-09-21 22:03:04,282 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:04,918 epoch 5 - iter 13/130 - loss 0.14011328 - samples/sec: 21.05 - lr: 0.020000\n",
+      "2021-09-21 22:03:05,493 epoch 5 - iter 26/130 - loss 0.26675630 - samples/sec: 22.63 - lr: 0.020000\n",
+      "2021-09-21 22:03:06,093 epoch 5 - iter 39/130 - loss 0.24199715 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 22:03:06,697 epoch 5 - iter 52/130 - loss 0.33109802 - samples/sec: 21.54 - lr: 0.020000\n",
+      "2021-09-21 22:03:07,295 epoch 5 - iter 65/130 - loss 0.30727347 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 22:03:07,868 epoch 5 - iter 78/130 - loss 0.27957428 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 22:03:08,435 epoch 5 - iter 91/130 - loss 0.27246460 - samples/sec: 22.94 - lr: 0.020000\n",
+      "2021-09-21 22:03:08,998 epoch 5 - iter 104/130 - loss 0.26893884 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 22:03:09,642 epoch 5 - iter 117/130 - loss 0.29953485 - samples/sec: 20.23 - lr: 0.020000\n",
+      "2021-09-21 22:03:10,394 epoch 5 - iter 130/130 - loss 0.30784021 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 22:03:10,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:10,395 EPOCH 5 done: loss 0.3078 - lr 0.0200000\n",
+      "2021-09-21 22:03:11,120 DEV : loss 0.7631169557571411 - score 0.7143\n",
+      "2021-09-21 22:03:11,120 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:03:15,194 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:15,837 epoch 6 - iter 13/130 - loss 0.36604682 - samples/sec: 20.78 - lr: 0.020000\n",
+      "2021-09-21 22:03:16,394 epoch 6 - iter 26/130 - loss 0.22545535 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 22:03:16,957 epoch 6 - iter 39/130 - loss 0.24717104 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 22:03:17,515 epoch 6 - iter 52/130 - loss 0.23832955 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 22:03:18,077 epoch 6 - iter 65/130 - loss 0.23258404 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 22:03:18,647 epoch 6 - iter 78/130 - loss 0.35015774 - samples/sec: 22.87 - lr: 0.020000\n",
+      "2021-09-21 22:03:19,209 epoch 6 - iter 91/130 - loss 0.33581662 - samples/sec: 23.15 - lr: 0.020000\n",
+      "2021-09-21 22:03:19,949 epoch 6 - iter 104/130 - loss 0.32116992 - samples/sec: 17.57 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:05:53,256 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:53,256 EPOCH 6 done: loss 0.2510 - lr 0.0200000\n",
-      "2021-09-08 02:05:53,966 DEV : loss 0.21321454644203186 - score 0.5714\n",
-      "2021-09-08 02:05:53,967 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:05:58,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:05:59,337 epoch 7 - iter 13/130 - loss 0.07916351 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:05:59,969 epoch 7 - iter 26/130 - loss 0.19497309 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 02:06:00,547 epoch 7 - iter 39/130 - loss 0.18903019 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 02:06:01,135 epoch 7 - iter 52/130 - loss 0.17787588 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 02:06:01,707 epoch 7 - iter 65/130 - loss 0.16939779 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 02:06:02,294 epoch 7 - iter 78/130 - loss 0.16284203 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:06:02,874 epoch 7 - iter 91/130 - loss 0.15378637 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 02:06:03,456 epoch 7 - iter 104/130 - loss 0.14420563 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 02:06:04,030 epoch 7 - iter 117/130 - loss 0.14903550 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 02:06:04,614 epoch 7 - iter 130/130 - loss 0.15804350 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 02:06:04,615 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:04,615 EPOCH 7 done: loss 0.1580 - lr 0.0200000\n",
-      "2021-09-08 02:06:04,859 DEV : loss 0.38201647996902466 - score 0.5\n",
-      "2021-09-08 02:06:04,860 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:06:04,862 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:05,458 epoch 8 - iter 13/130 - loss 0.26806871 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 02:06:06,031 epoch 8 - iter 26/130 - loss 0.20994467 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 02:06:06,602 epoch 8 - iter 39/130 - loss 0.14170333 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 02:06:07,175 epoch 8 - iter 52/130 - loss 0.15128897 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 02:06:07,749 epoch 8 - iter 65/130 - loss 0.18970192 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 02:06:08,330 epoch 8 - iter 78/130 - loss 0.16556636 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 02:06:08,906 epoch 8 - iter 91/130 - loss 0.15037642 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 02:06:09,476 epoch 8 - iter 104/130 - loss 0.13592650 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 02:06:10,055 epoch 8 - iter 117/130 - loss 0.13962764 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 02:06:10,635 epoch 8 - iter 130/130 - loss 0.16893824 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 02:06:10,636 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:10,636 EPOCH 8 done: loss 0.1689 - lr 0.0200000\n",
-      "2021-09-08 02:06:10,899 DEV : loss 0.4729023277759552 - score 0.6429\n",
-      "2021-09-08 02:06:10,900 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:03:20,707 epoch 6 - iter 117/130 - loss 0.32715957 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 22:03:21,457 epoch 6 - iter 130/130 - loss 0.33703997 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 22:03:21,458 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:21,459 EPOCH 6 done: loss 0.3370 - lr 0.0200000\n",
+      "2021-09-21 22:03:22,058 DEV : loss 0.2528707981109619 - score 0.7143\n",
+      "2021-09-21 22:03:22,061 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:06:14,719 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:15,317 epoch 9 - iter 13/130 - loss 0.22345695 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 02:06:15,893 epoch 9 - iter 26/130 - loss 0.19900370 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 02:06:16,464 epoch 9 - iter 39/130 - loss 0.18268345 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 02:06:17,050 epoch 9 - iter 52/130 - loss 0.25191674 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:06:17,621 epoch 9 - iter 65/130 - loss 0.21848548 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 02:06:18,195 epoch 9 - iter 78/130 - loss 0.20125023 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 02:06:18,783 epoch 9 - iter 91/130 - loss 0.21346577 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 02:06:19,362 epoch 9 - iter 104/130 - loss 0.18825836 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 02:06:19,947 epoch 9 - iter 117/130 - loss 0.23058280 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 02:06:20,538 epoch 9 - iter 130/130 - loss 0.22443755 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 02:06:20,540 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:20,540 EPOCH 9 done: loss 0.2244 - lr 0.0200000\n",
-      "2021-09-08 02:06:20,785 DEV : loss 0.2306065559387207 - score 0.6429\n",
-      "2021-09-08 02:06:20,786 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:03:26,190 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:26,819 epoch 7 - iter 13/130 - loss 0.25809959 - samples/sec: 21.27 - lr: 0.020000\n",
+      "2021-09-21 22:03:27,382 epoch 7 - iter 26/130 - loss 0.13635795 - samples/sec: 23.10 - lr: 0.020000\n",
+      "2021-09-21 22:03:27,946 epoch 7 - iter 39/130 - loss 0.13937728 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 22:03:28,515 epoch 7 - iter 52/130 - loss 0.14740054 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 22:03:29,080 epoch 7 - iter 65/130 - loss 0.18284009 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 22:03:29,656 epoch 7 - iter 78/130 - loss 0.24087987 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 22:03:30,207 epoch 7 - iter 91/130 - loss 0.20790041 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 22:03:30,776 epoch 7 - iter 104/130 - loss 0.22577707 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 22:03:31,453 epoch 7 - iter 117/130 - loss 0.26315515 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 22:03:32,214 epoch 7 - iter 130/130 - loss 0.25855023 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 22:03:32,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:32,215 EPOCH 7 done: loss 0.2586 - lr 0.0200000\n",
+      "2021-09-21 22:03:32,800 DEV : loss 0.5245214700698853 - score 0.7143\n",
+      "2021-09-21 22:03:32,800 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:03:32,803 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:33,627 epoch 8 - iter 13/130 - loss 0.11353826 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 22:03:34,363 epoch 8 - iter 26/130 - loss 0.12132864 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 22:03:35,106 epoch 8 - iter 39/130 - loss 0.08250011 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 22:03:35,827 epoch 8 - iter 52/130 - loss 0.06887638 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 22:03:36,544 epoch 8 - iter 65/130 - loss 0.05732713 - samples/sec: 18.14 - lr: 0.020000\n",
+      "2021-09-21 22:03:37,305 epoch 8 - iter 78/130 - loss 0.11326432 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 22:03:38,055 epoch 8 - iter 91/130 - loss 0.12787179 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 22:03:38,756 epoch 8 - iter 104/130 - loss 0.14000914 - samples/sec: 18.56 - lr: 0.020000\n",
+      "2021-09-21 22:03:39,521 epoch 8 - iter 117/130 - loss 0.13558110 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 22:03:40,265 epoch 8 - iter 130/130 - loss 0.15448080 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 22:03:40,266 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:40,266 EPOCH 8 done: loss 0.1545 - lr 0.0200000\n",
+      "2021-09-21 22:03:40,810 DEV : loss 0.1527969241142273 - score 0.7143\n",
+      "2021-09-21 22:03:40,811 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:06:24,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:25,561 epoch 10 - iter 13/130 - loss 0.00293211 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 02:06:26,139 epoch 10 - iter 26/130 - loss 0.05296357 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 02:06:26,718 epoch 10 - iter 39/130 - loss 0.10859675 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 02:06:27,295 epoch 10 - iter 52/130 - loss 0.09047462 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 02:06:27,882 epoch 10 - iter 65/130 - loss 0.14005040 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 02:06:28,458 epoch 10 - iter 78/130 - loss 0.13510954 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 02:06:29,027 epoch 10 - iter 91/130 - loss 0.11785045 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 02:06:29,601 epoch 10 - iter 104/130 - loss 0.12792993 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 02:06:30,180 epoch 10 - iter 117/130 - loss 0.11816518 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 02:06:30,747 epoch 10 - iter 130/130 - loss 0.10643178 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 02:06:30,748 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:30,749 EPOCH 10 done: loss 0.1064 - lr 0.0200000\n",
-      "2021-09-08 02:06:30,994 DEV : loss 0.3376254737377167 - score 0.5\n",
-      "2021-09-08 02:06:30,995 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:06:35,169 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:35,169 Testing using best model ...\n",
-      "2021-09-08 02:06:35,171 loading file None/best-model.pt\n",
+      "2021-09-21 22:03:44,908 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:45,534 epoch 9 - iter 13/130 - loss 0.01234132 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 22:03:46,103 epoch 9 - iter 26/130 - loss 0.20334153 - samples/sec: 22.86 - lr: 0.020000\n",
+      "2021-09-21 22:03:46,666 epoch 9 - iter 39/130 - loss 0.13723078 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 22:03:47,230 epoch 9 - iter 52/130 - loss 0.14631280 - samples/sec: 23.07 - lr: 0.020000\n",
+      "2021-09-21 22:03:47,790 epoch 9 - iter 65/130 - loss 0.15681150 - samples/sec: 23.26 - lr: 0.020000\n",
+      "2021-09-21 22:03:48,361 epoch 9 - iter 78/130 - loss 0.20840077 - samples/sec: 22.81 - lr: 0.020000\n",
+      "2021-09-21 22:03:48,998 epoch 9 - iter 91/130 - loss 0.18036530 - samples/sec: 20.43 - lr: 0.020000\n",
+      "2021-09-21 22:03:49,736 epoch 9 - iter 104/130 - loss 0.15852646 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 22:03:50,495 epoch 9 - iter 117/130 - loss 0.15914764 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 22:03:51,256 epoch 9 - iter 130/130 - loss 0.17355029 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 22:03:51,257 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:51,258 EPOCH 9 done: loss 0.1736 - lr 0.0200000\n",
+      "2021-09-21 22:03:51,951 DEV : loss 0.20614656805992126 - score 0.6429\n",
+      "2021-09-21 22:03:51,952 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:03:51,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:52,731 epoch 10 - iter 13/130 - loss 0.09904307 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 22:03:53,476 epoch 10 - iter 26/130 - loss 0.10452721 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 22:03:54,233 epoch 10 - iter 39/130 - loss 0.07342700 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 22:03:54,933 epoch 10 - iter 52/130 - loss 0.05583007 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 22:03:55,607 epoch 10 - iter 65/130 - loss 0.06791020 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 22:03:56,199 epoch 10 - iter 78/130 - loss 0.10186242 - samples/sec: 21.97 - lr: 0.020000\n",
+      "2021-09-21 22:03:56,766 epoch 10 - iter 91/130 - loss 0.15669527 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 22:03:57,321 epoch 10 - iter 104/130 - loss 0.15128091 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 22:03:57,880 epoch 10 - iter 117/130 - loss 0.14585652 - samples/sec: 23.27 - lr: 0.020000\n",
+      "2021-09-21 22:03:58,433 epoch 10 - iter 130/130 - loss 0.13194071 - samples/sec: 23.55 - lr: 0.020000\n",
+      "2021-09-21 22:03:58,434 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:03:58,435 EPOCH 10 done: loss 0.1319 - lr 0.0200000\n",
+      "2021-09-21 22:03:59,302 DEV : loss 0.360601544380188 - score 0.5714\n",
+      "2021-09-21 22:03:59,303 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:04:02,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:02,951 Testing using best model ...\n",
+      "2021-09-21 22:04:02,952 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:06:40,277 \t0.6875\n",
-      "2021-09-08 02:06:40,277 \n",
+      "2021-09-21 22:04:08,774 \t0.0\n",
+      "2021-09-21 22:04:08,775 \n",
       "Results:\n",
-      "- F-score (micro) 0.6875\n",
-      "- F-score (macro) 0.5149\n",
-      "- Accuracy 0.6875\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
-      "                                               precision    recall  f1-score   support\n",
+      "                                           precision    recall  f1-score   support\n",
       "\n",
-      "                    this text is about travel     1.0000    1.0000    1.0000         1\n",
-      "                this text is about technology     1.0000    1.0000    1.0000         1\n",
-      "                  this text is about wellness     1.0000    1.0000    1.0000         1\n",
-      "                     this text is about women     0.0000    0.0000    0.0000         2\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.4000    1.0000    0.5714         2\n",
-      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         1\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about divorce     1.0000    1.0000    1.0000         2\n",
-      "                     this text is about crime     1.0000    1.0000    1.0000         1\n",
-      "                  this text is about religion     1.0000    1.0000    1.0000         1\n",
-      "                    this text is about sports     0.0000    0.0000    0.0000         2\n",
-      "                  this text is about politics     1.0000    1.0000    1.0000         1\n",
-      "                    this text is about comedy     0.5000    1.0000    0.6667         1\n",
+      "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Health     0.0000    0.0000    0.0000         0\n",
+      " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
+      " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Sports     0.0000    0.0000    0.0000         0\n",
+      "     This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
+      "    This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
+      " This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                    micro avg     0.6875    0.6875    0.6875        16\n",
-      "                                    macro avg     0.4938    0.5625    0.5149        16\n",
-      "                                 weighted avg     0.5813    0.6875    0.6131        16\n",
-      "                                  samples avg     0.6875    0.6875    0.6875        16\n",
-      "\n"
+      "                                micro avg     0.0000    0.0000    0.0000         0\n",
+      "                                macro avg     0.0000    0.0000    0.0000         0\n",
+      "                             weighted avg     0.0000    0.0000    0.0000         0\n",
+      "                              samples avg     0.0000    0.0000    0.0000         0\n",
+      "\n",
+      "2021-09-21 22:04:08,775 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:27,786 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:06:40,278 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:50,047 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:06:54,099 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:04:32,231 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 56486.34it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 74999.19it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:06:54,104 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
-      "2021-09-08 02:06:54,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:54,115 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:04:32,235 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
+      "2021-09-21 22:04:32,250 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:32,252 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3876,25 +3882,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:06:54,115 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:54,116 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:06:54,116 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:54,116 Parameters:\n",
-      "2021-09-08 02:06:54,116  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:06:54,117  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:06:54,117  - patience: \"3\"\n",
-      "2021-09-08 02:06:54,117  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:06:54,118  - max_epochs: \"10\"\n",
-      "2021-09-08 02:06:54,118  - shuffle: \"True\"\n",
-      "2021-09-08 02:06:54,118  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:06:54,118  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:06:54,119 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:54,119 Model training base path: \"None\"\n",
-      "2021-09-08 02:06:54,119 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:54,120 Device: cuda:0\n",
-      "2021-09-08 02:06:54,120 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:06:54,120 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:06:54,126 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:04:32,252 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:32,253 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:04:32,253 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:32,253 Parameters:\n",
+      "2021-09-21 22:04:32,254  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:04:32,254  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:04:32,254  - patience: \"3\"\n",
+      "2021-09-21 22:04:32,254  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:04:32,255  - max_epochs: \"10\"\n",
+      "2021-09-21 22:04:32,255  - shuffle: \"True\"\n",
+      "2021-09-21 22:04:32,255  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:04:32,255  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:04:32,256 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:32,256 Model training base path: \"None\"\n",
+      "2021-09-21 22:04:32,256 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:32,257 Device: cuda:0\n",
+      "2021-09-21 22:04:32,257 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:32,257 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:04:32,266 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3908,230 +3914,216 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:06:54,862 epoch 1 - iter 13/130 - loss 0.39319662 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 02:06:55,575 epoch 1 - iter 26/130 - loss 0.45275245 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 02:06:56,306 epoch 1 - iter 39/130 - loss 0.46132220 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 02:06:57,024 epoch 1 - iter 52/130 - loss 0.43880780 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 02:06:57,749 epoch 1 - iter 65/130 - loss 0.49984313 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 02:06:58,478 epoch 1 - iter 78/130 - loss 0.51229565 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 02:06:59,210 epoch 1 - iter 91/130 - loss 0.51017884 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 02:06:59,925 epoch 1 - iter 104/130 - loss 0.48155317 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 02:07:00,630 epoch 1 - iter 117/130 - loss 0.48406822 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 02:07:01,348 epoch 1 - iter 130/130 - loss 0.48231021 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:07:01,349 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:01,350 EPOCH 1 done: loss 0.4823 - lr 0.0200000\n",
-      "2021-09-08 02:07:01,620 DEV : loss 0.5065745711326599 - score 0.4286\n",
-      "2021-09-08 02:07:01,621 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:07:05,809 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:06,537 epoch 2 - iter 13/130 - loss 0.62475167 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 02:07:07,249 epoch 2 - iter 26/130 - loss 0.64429140 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 02:07:07,964 epoch 2 - iter 39/130 - loss 0.55462461 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 02:07:08,689 epoch 2 - iter 52/130 - loss 0.58944160 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 02:07:09,421 epoch 2 - iter 65/130 - loss 0.55926882 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 02:07:10,161 epoch 2 - iter 78/130 - loss 0.55653041 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 02:07:10,870 epoch 2 - iter 91/130 - loss 0.54188222 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 02:07:11,582 epoch 2 - iter 104/130 - loss 0.51329956 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 02:07:12,299 epoch 2 - iter 117/130 - loss 0.49859679 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:07:13,017 epoch 2 - iter 130/130 - loss 0.48770454 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:07:13,018 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:13,018 EPOCH 2 done: loss 0.4877 - lr 0.0200000\n",
-      "2021-09-08 02:07:13,323 DEV : loss 0.3723393976688385 - score 0.4286\n",
-      "2021-09-08 02:07:13,324 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:04:32,839 epoch 1 - iter 13/130 - loss 0.19329682 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 22:04:33,415 epoch 1 - iter 26/130 - loss 0.28447288 - samples/sec: 22.59 - lr: 0.020000\n",
+      "2021-09-21 22:04:33,991 epoch 1 - iter 39/130 - loss 0.39311656 - samples/sec: 22.60 - lr: 0.020000\n",
+      "2021-09-21 22:04:34,578 epoch 1 - iter 52/130 - loss 0.45428986 - samples/sec: 22.18 - lr: 0.020000\n",
+      "2021-09-21 22:04:35,230 epoch 1 - iter 65/130 - loss 0.52056000 - samples/sec: 19.97 - lr: 0.020000\n",
+      "2021-09-21 22:04:35,961 epoch 1 - iter 78/130 - loss 0.51445633 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 22:04:36,771 epoch 1 - iter 91/130 - loss 0.54139279 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 22:04:37,572 epoch 1 - iter 104/130 - loss 0.57544371 - samples/sec: 16.26 - lr: 0.020000\n",
+      "2021-09-21 22:04:38,370 epoch 1 - iter 117/130 - loss 0.58980555 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 22:04:39,128 epoch 1 - iter 130/130 - loss 0.60257387 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 22:04:39,129 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:39,129 EPOCH 1 done: loss 0.6026 - lr 0.0200000\n",
+      "2021-09-21 22:04:39,585 DEV : loss 0.2729204595088959 - score 0.0\n",
+      "2021-09-21 22:04:39,586 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:07:17,778 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:18,550 epoch 3 - iter 13/130 - loss 0.29803297 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 02:07:19,258 epoch 3 - iter 26/130 - loss 0.28220620 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 02:07:19,959 epoch 3 - iter 39/130 - loss 0.37300223 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 02:07:20,664 epoch 3 - iter 52/130 - loss 0.41979490 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 02:07:21,377 epoch 3 - iter 65/130 - loss 0.48981571 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 02:07:22,082 epoch 3 - iter 78/130 - loss 0.44893148 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 02:07:22,770 epoch 3 - iter 91/130 - loss 0.40973356 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 02:07:23,468 epoch 3 - iter 104/130 - loss 0.41026460 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 02:07:24,188 epoch 3 - iter 117/130 - loss 0.43019825 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 02:07:24,900 epoch 3 - iter 130/130 - loss 0.42624528 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 02:07:24,902 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:24,902 EPOCH 3 done: loss 0.4262 - lr 0.0200000\n",
-      "2021-09-08 02:07:25,176 DEV : loss 0.41176730394363403 - score 0.5\n",
-      "2021-09-08 02:07:25,177 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:04:43,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:44,134 epoch 2 - iter 13/130 - loss 0.64249010 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 22:04:44,850 epoch 2 - iter 26/130 - loss 0.64856973 - samples/sec: 18.17 - lr: 0.020000\n",
+      "2021-09-21 22:04:45,649 epoch 2 - iter 39/130 - loss 0.65286363 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 22:04:46,417 epoch 2 - iter 52/130 - loss 0.64982061 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 22:04:47,152 epoch 2 - iter 65/130 - loss 0.65509927 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 22:04:47,935 epoch 2 - iter 78/130 - loss 0.66083514 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 22:04:48,719 epoch 2 - iter 91/130 - loss 0.65279340 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 22:04:49,477 epoch 2 - iter 104/130 - loss 0.65677810 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 22:04:50,212 epoch 2 - iter 117/130 - loss 0.65502816 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 22:04:51,023 epoch 2 - iter 130/130 - loss 0.65199970 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 22:04:51,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:51,025 EPOCH 2 done: loss 0.6520 - lr 0.0200000\n",
+      "2021-09-21 22:04:51,547 DEV : loss 0.41488364338874817 - score 0.0\n",
+      "2021-09-21 22:04:51,548 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:04:51,552 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:52,350 epoch 3 - iter 13/130 - loss 0.67567746 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 22:04:53,072 epoch 3 - iter 26/130 - loss 0.66076358 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 22:04:53,842 epoch 3 - iter 39/130 - loss 0.66330311 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 22:04:54,626 epoch 3 - iter 52/130 - loss 0.67504068 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 22:04:55,401 epoch 3 - iter 65/130 - loss 0.66615202 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 22:04:56,144 epoch 3 - iter 78/130 - loss 0.66699892 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 22:04:56,771 epoch 3 - iter 91/130 - loss 0.66328918 - samples/sec: 20.76 - lr: 0.020000\n",
+      "2021-09-21 22:04:57,344 epoch 3 - iter 104/130 - loss 0.66344533 - samples/sec: 22.71 - lr: 0.020000\n",
+      "2021-09-21 22:04:57,899 epoch 3 - iter 117/130 - loss 0.66228068 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 22:04:58,447 epoch 3 - iter 130/130 - loss 0.66374190 - samples/sec: 23.74 - lr: 0.020000\n",
+      "2021-09-21 22:04:58,448 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:04:58,448 EPOCH 3 done: loss 0.6637 - lr 0.0200000\n",
+      "2021-09-21 22:04:58,691 DEV : loss 0.45258110761642456 - score 0.0714\n",
+      "2021-09-21 22:04:58,692 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:07:29,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:29,996 epoch 4 - iter 13/130 - loss 0.25072193 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 02:07:30,686 epoch 4 - iter 26/130 - loss 0.24643347 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 02:07:31,396 epoch 4 - iter 39/130 - loss 0.38453003 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 02:07:32,090 epoch 4 - iter 52/130 - loss 0.30586102 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 02:07:32,796 epoch 4 - iter 65/130 - loss 0.34059671 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 02:07:33,501 epoch 4 - iter 78/130 - loss 0.34107049 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 02:07:34,185 epoch 4 - iter 91/130 - loss 0.29416272 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 02:07:34,882 epoch 4 - iter 104/130 - loss 0.27588740 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 02:07:35,569 epoch 4 - iter 117/130 - loss 0.27007469 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 02:07:36,262 epoch 4 - iter 130/130 - loss 0.25522782 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 02:07:36,264 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:36,264 EPOCH 4 done: loss 0.2552 - lr 0.0200000\n",
-      "2021-09-08 02:07:36,559 DEV : loss 0.3413907289505005 - score 0.6429\n",
-      "2021-09-08 02:07:36,559 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:05:02,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:03,614 epoch 4 - iter 13/130 - loss 0.65690065 - samples/sec: 18.28 - lr: 0.020000\n",
+      "2021-09-21 22:05:04,381 epoch 4 - iter 26/130 - loss 0.64966208 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 22:05:05,125 epoch 4 - iter 39/130 - loss 0.64785347 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 22:05:05,882 epoch 4 - iter 52/130 - loss 0.64696014 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 22:05:06,630 epoch 4 - iter 65/130 - loss 0.64587017 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 22:05:07,385 epoch 4 - iter 78/130 - loss 0.64585690 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 22:05:08,162 epoch 4 - iter 91/130 - loss 0.64522563 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 22:05:08,763 epoch 4 - iter 104/130 - loss 0.64454879 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 22:05:09,322 epoch 4 - iter 117/130 - loss 0.64458690 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 22:05:09,875 epoch 4 - iter 130/130 - loss 0.64433837 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 22:05:09,877 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:09,877 EPOCH 4 done: loss 0.6443 - lr 0.0200000\n",
+      "2021-09-21 22:05:10,209 DEV : loss 0.4521152079105377 - score 0.0714\n",
+      "2021-09-21 22:05:10,210 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:07:41,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:41,627 epoch 5 - iter 13/130 - loss 0.32467606 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 02:07:42,217 epoch 5 - iter 26/130 - loss 0.32395553 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 02:07:42,809 epoch 5 - iter 39/130 - loss 0.33010371 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 02:07:43,410 epoch 5 - iter 52/130 - loss 0.29823754 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 02:07:43,993 epoch 5 - iter 65/130 - loss 0.30936453 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 02:07:44,583 epoch 5 - iter 78/130 - loss 0.31820429 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 02:07:45,166 epoch 5 - iter 91/130 - loss 0.31034357 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 02:07:45,750 epoch 5 - iter 104/130 - loss 0.29990263 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 02:07:46,339 epoch 5 - iter 117/130 - loss 0.29579581 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 02:07:46,919 epoch 5 - iter 130/130 - loss 0.27426495 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 02:07:46,920 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:46,921 EPOCH 5 done: loss 0.2743 - lr 0.0200000\n",
-      "2021-09-08 02:07:47,183 DEV : loss 0.2320578396320343 - score 0.6429\n",
-      "2021-09-08 02:07:47,183 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:05:15,687 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:16,457 epoch 5 - iter 13/130 - loss 0.64282539 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 22:05:17,204 epoch 5 - iter 26/130 - loss 0.62589841 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 22:05:17,838 epoch 5 - iter 39/130 - loss 0.64713188 - samples/sec: 20.56 - lr: 0.020000\n",
+      "2021-09-21 22:05:18,412 epoch 5 - iter 52/130 - loss 0.64022055 - samples/sec: 22.69 - lr: 0.020000\n",
+      "2021-09-21 22:05:18,964 epoch 5 - iter 65/130 - loss 0.64226105 - samples/sec: 23.58 - lr: 0.020000\n",
+      "2021-09-21 22:05:19,519 epoch 5 - iter 78/130 - loss 0.64322500 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 22:05:20,069 epoch 5 - iter 91/130 - loss 0.64452488 - samples/sec: 23.66 - lr: 0.020000\n",
+      "2021-09-21 22:05:20,624 epoch 5 - iter 104/130 - loss 0.64367973 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 22:05:21,172 epoch 5 - iter 117/130 - loss 0.64267717 - samples/sec: 23.73 - lr: 0.020000\n",
+      "2021-09-21 22:05:21,832 epoch 5 - iter 130/130 - loss 0.64210925 - samples/sec: 19.73 - lr: 0.020000\n",
+      "2021-09-21 22:05:21,833 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:21,834 EPOCH 5 done: loss 0.6421 - lr 0.0200000\n",
+      "2021-09-21 22:05:22,411 DEV : loss 0.48075300455093384 - score 0.1429\n",
+      "2021-09-21 22:05:22,412 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:07:51,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:52,367 epoch 6 - iter 13/130 - loss 0.00404345 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 02:07:52,967 epoch 6 - iter 26/130 - loss 0.29856729 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 02:07:53,545 epoch 6 - iter 39/130 - loss 0.21874470 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 02:07:54,118 epoch 6 - iter 52/130 - loss 0.16539513 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 02:07:54,700 epoch 6 - iter 65/130 - loss 0.22891682 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 02:07:55,280 epoch 6 - iter 78/130 - loss 0.19515494 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 02:07:55,859 epoch 6 - iter 91/130 - loss 0.20842263 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 02:07:56,439 epoch 6 - iter 104/130 - loss 0.22098910 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 02:07:57,018 epoch 6 - iter 117/130 - loss 0.21712525 - samples/sec: 22.47 - lr: 0.020000\n"
+      "2021-09-21 22:05:26,424 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:27,161 epoch 6 - iter 13/130 - loss 0.65868612 - samples/sec: 19.93 - lr: 0.020000\n",
+      "2021-09-21 22:05:27,735 epoch 6 - iter 26/130 - loss 0.65710965 - samples/sec: 22.70 - lr: 0.020000\n",
+      "2021-09-21 22:05:28,288 epoch 6 - iter 39/130 - loss 0.65981379 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 22:05:28,842 epoch 6 - iter 52/130 - loss 0.65345958 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 22:05:29,393 epoch 6 - iter 65/130 - loss 0.64809946 - samples/sec: 23.65 - lr: 0.020000\n",
+      "2021-09-21 22:05:29,946 epoch 6 - iter 78/130 - loss 0.65053032 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 22:05:30,495 epoch 6 - iter 91/130 - loss 0.64854624 - samples/sec: 23.73 - lr: 0.020000\n",
+      "2021-09-21 22:05:31,144 epoch 6 - iter 104/130 - loss 0.64579012 - samples/sec: 20.05 - lr: 0.020000\n",
+      "2021-09-21 22:05:31,843 epoch 6 - iter 117/130 - loss 0.64904411 - samples/sec: 18.62 - lr: 0.020000\n",
+      "2021-09-21 22:05:32,605 epoch 6 - iter 130/130 - loss 0.64883593 - samples/sec: 17.08 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:07:57,593 epoch 6 - iter 130/130 - loss 0.21018566 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 02:07:57,594 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:07:57,595 EPOCH 6 done: loss 0.2102 - lr 0.0200000\n",
-      "2021-09-08 02:07:57,881 DEV : loss 0.19677582383155823 - score 0.7143\n",
-      "2021-09-08 02:07:57,882 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:08:02,027 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:02,624 epoch 7 - iter 13/130 - loss 0.16997636 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 02:08:03,203 epoch 7 - iter 26/130 - loss 0.13018066 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 02:08:03,789 epoch 7 - iter 39/130 - loss 0.13537350 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 02:08:04,363 epoch 7 - iter 52/130 - loss 0.15842892 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 02:08:04,936 epoch 7 - iter 65/130 - loss 0.14426628 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 02:08:05,517 epoch 7 - iter 78/130 - loss 0.19207195 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 02:08:06,093 epoch 7 - iter 91/130 - loss 0.19569779 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 02:08:06,675 epoch 7 - iter 104/130 - loss 0.19296469 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 02:08:07,267 epoch 7 - iter 117/130 - loss 0.20533630 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 02:08:07,843 epoch 7 - iter 130/130 - loss 0.20092870 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 02:08:07,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:07,844 EPOCH 7 done: loss 0.2009 - lr 0.0200000\n",
-      "2021-09-08 02:08:08,088 DEV : loss 0.23970584571361542 - score 0.7143\n",
-      "2021-09-08 02:08:08,089 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:08:08,103 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:08,700 epoch 8 - iter 13/130 - loss 0.07151137 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 02:08:09,275 epoch 8 - iter 26/130 - loss 0.10881063 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 02:08:09,859 epoch 8 - iter 39/130 - loss 0.19857828 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 02:08:10,442 epoch 8 - iter 52/130 - loss 0.16244224 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 02:08:11,022 epoch 8 - iter 65/130 - loss 0.13120014 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 02:08:11,602 epoch 8 - iter 78/130 - loss 0.11238422 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 02:08:12,184 epoch 8 - iter 91/130 - loss 0.15899088 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 02:08:12,768 epoch 8 - iter 104/130 - loss 0.17734709 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:08:13,354 epoch 8 - iter 117/130 - loss 0.18774106 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 02:08:13,927 epoch 8 - iter 130/130 - loss 0.17016633 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 02:08:13,929 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:13,929 EPOCH 8 done: loss 0.1702 - lr 0.0200000\n",
-      "2021-09-08 02:08:14,172 DEV : loss 0.19434350728988647 - score 0.6429\n",
-      "2021-09-08 02:08:14,173 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:08:14,189 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:14,787 epoch 9 - iter 13/130 - loss 0.17379640 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 02:08:15,362 epoch 9 - iter 26/130 - loss 0.10358966 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 02:08:15,943 epoch 9 - iter 39/130 - loss 0.09435559 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 02:08:16,520 epoch 9 - iter 52/130 - loss 0.08224227 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 02:08:17,096 epoch 9 - iter 65/130 - loss 0.06688739 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 02:08:17,677 epoch 9 - iter 78/130 - loss 0.07153681 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 02:08:18,258 epoch 9 - iter 91/130 - loss 0.09552339 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 02:08:18,830 epoch 9 - iter 104/130 - loss 0.08376562 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 02:08:19,401 epoch 9 - iter 117/130 - loss 0.07451919 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 02:08:19,975 epoch 9 - iter 130/130 - loss 0.06712093 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 02:08:19,976 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:19,976 EPOCH 9 done: loss 0.0671 - lr 0.0200000\n",
-      "2021-09-08 02:08:20,230 DEV : loss 0.14138492941856384 - score 0.7857\n",
-      "2021-09-08 02:08:20,231 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:08:24,234 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:24,823 epoch 10 - iter 13/130 - loss 0.00136721 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 02:08:25,394 epoch 10 - iter 26/130 - loss 0.00266659 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 02:08:25,964 epoch 10 - iter 39/130 - loss 0.00219098 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 02:08:26,534 epoch 10 - iter 52/130 - loss 0.00172466 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 02:08:27,113 epoch 10 - iter 65/130 - loss 0.08018905 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 02:08:27,699 epoch 10 - iter 78/130 - loss 0.10526438 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 02:08:28,292 epoch 10 - iter 91/130 - loss 0.09035436 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 02:08:28,870 epoch 10 - iter 104/130 - loss 0.10976612 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 02:08:29,451 epoch 10 - iter 117/130 - loss 0.10771724 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 02:08:30,028 epoch 10 - iter 130/130 - loss 0.09722488 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 02:08:30,029 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:30,029 EPOCH 10 done: loss 0.0972 - lr 0.0200000\n",
-      "2021-09-08 02:08:30,293 DEV : loss 0.23276130855083466 - score 0.5714\n",
-      "2021-09-08 02:08:30,294 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:08:34,033 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:34,034 Testing using best model ...\n",
-      "2021-09-08 02:08:34,035 loading file None/best-model.pt\n",
+      "2021-09-21 22:05:32,606 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:32,606 EPOCH 6 done: loss 0.6488 - lr 0.0200000\n",
+      "2021-09-21 22:05:33,138 DEV : loss 0.3960593342781067 - score 0.0714\n",
+      "2021-09-21 22:05:33,138 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:05:33,140 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:33,935 epoch 7 - iter 13/130 - loss 0.65633893 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 22:05:34,707 epoch 7 - iter 26/130 - loss 0.65397556 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 22:05:35,449 epoch 7 - iter 39/130 - loss 0.64965818 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 22:05:36,196 epoch 7 - iter 52/130 - loss 0.64840743 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 22:05:36,947 epoch 7 - iter 65/130 - loss 0.64558457 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 22:05:37,662 epoch 7 - iter 78/130 - loss 0.64418153 - samples/sec: 18.21 - lr: 0.020000\n",
+      "2021-09-21 22:05:38,265 epoch 7 - iter 91/130 - loss 0.64611135 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 22:05:38,829 epoch 7 - iter 104/130 - loss 0.64331394 - samples/sec: 23.07 - lr: 0.020000\n",
+      "2021-09-21 22:05:39,384 epoch 7 - iter 117/130 - loss 0.64610109 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 22:05:39,937 epoch 7 - iter 130/130 - loss 0.64458861 - samples/sec: 23.55 - lr: 0.020000\n",
+      "2021-09-21 22:05:39,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:39,939 EPOCH 7 done: loss 0.6446 - lr 0.0200000\n",
+      "2021-09-21 22:05:40,277 DEV : loss 0.4040239751338959 - score 0.0714\n",
+      "2021-09-21 22:05:40,278 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:05:40,355 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:40,925 epoch 8 - iter 13/130 - loss 0.66281938 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 22:05:41,474 epoch 8 - iter 26/130 - loss 0.64241169 - samples/sec: 23.69 - lr: 0.020000\n",
+      "2021-09-21 22:05:42,069 epoch 8 - iter 39/130 - loss 0.64515527 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 22:05:42,835 epoch 8 - iter 52/130 - loss 0.64821558 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 22:05:43,593 epoch 8 - iter 65/130 - loss 0.64770694 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 22:05:44,307 epoch 8 - iter 78/130 - loss 0.65165531 - samples/sec: 18.24 - lr: 0.020000\n",
+      "2021-09-21 22:05:45,034 epoch 8 - iter 91/130 - loss 0.65012045 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 22:05:45,780 epoch 8 - iter 104/130 - loss 0.64661252 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 22:05:46,515 epoch 8 - iter 117/130 - loss 0.64780803 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 22:05:47,284 epoch 8 - iter 130/130 - loss 0.64530253 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 22:05:47,285 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:47,286 EPOCH 8 done: loss 0.6453 - lr 0.0200000\n",
+      "2021-09-21 22:05:47,953 DEV : loss 0.4391530156135559 - score 0.0714\n",
+      "2021-09-21 22:05:47,954 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:05:47,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:48,740 epoch 9 - iter 13/130 - loss 0.67801028 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 22:05:49,495 epoch 9 - iter 26/130 - loss 0.66199005 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 22:05:50,226 epoch 9 - iter 39/130 - loss 0.65577308 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 22:05:50,989 epoch 9 - iter 52/130 - loss 0.66062577 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 22:05:51,755 epoch 9 - iter 65/130 - loss 0.65769792 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 22:05:52,464 epoch 9 - iter 78/130 - loss 0.65501093 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 22:05:53,190 epoch 9 - iter 91/130 - loss 0.65096350 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 22:05:53,918 epoch 9 - iter 104/130 - loss 0.64928789 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 22:05:54,675 epoch 9 - iter 117/130 - loss 0.64989846 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 22:05:55,437 epoch 9 - iter 130/130 - loss 0.65042637 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 22:05:55,438 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:55,439 EPOCH 9 done: loss 0.6504 - lr 0.0200000\n",
+      "2021-09-21 22:05:55,961 DEV : loss 0.4027636647224426 - score 0.0714\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:05:55,962 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:05:55,964 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:05:56,793 epoch 10 - iter 13/130 - loss 0.66317090 - samples/sec: 16.47 - lr: 0.010000\n",
+      "2021-09-21 22:05:57,546 epoch 10 - iter 26/130 - loss 0.66939545 - samples/sec: 17.30 - lr: 0.010000\n",
+      "2021-09-21 22:05:58,287 epoch 10 - iter 39/130 - loss 0.65782442 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 22:05:59,023 epoch 10 - iter 52/130 - loss 0.64972055 - samples/sec: 17.68 - lr: 0.010000\n",
+      "2021-09-21 22:05:59,797 epoch 10 - iter 65/130 - loss 0.64925053 - samples/sec: 16.80 - lr: 0.010000\n",
+      "2021-09-21 22:06:00,529 epoch 10 - iter 78/130 - loss 0.65064408 - samples/sec: 17.78 - lr: 0.010000\n",
+      "2021-09-21 22:06:01,237 epoch 10 - iter 91/130 - loss 0.65054826 - samples/sec: 18.37 - lr: 0.010000\n",
+      "2021-09-21 22:06:02,021 epoch 10 - iter 104/130 - loss 0.64878038 - samples/sec: 16.60 - lr: 0.010000\n",
+      "2021-09-21 22:06:02,766 epoch 10 - iter 117/130 - loss 0.64692776 - samples/sec: 17.49 - lr: 0.010000\n",
+      "2021-09-21 22:06:03,493 epoch 10 - iter 130/130 - loss 0.64738401 - samples/sec: 17.89 - lr: 0.010000\n",
+      "2021-09-21 22:06:03,495 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:03,495 EPOCH 10 done: loss 0.6474 - lr 0.0100000\n",
+      "2021-09-21 22:06:03,957 DEV : loss 0.47889021039009094 - score 0.0714\n",
+      "2021-09-21 22:06:03,958 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:06:08,066 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:08,067 Testing using best model ...\n",
+      "2021-09-21 22:06:08,068 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:08:38,244 \t0.3125\n",
-      "2021-09-08 02:08:38,245 \n",
+      "2021-09-21 22:06:13,677 \t0.0\n",
+      "2021-09-21 22:06:13,678 \n",
       "Results:\n",
-      "- F-score (micro) 0.3125\n",
-      "- F-score (macro) 0.1979\n",
-      "- Accuracy 0.3125\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
-      "                                               precision    recall  f1-score   support\n",
+      "                                           precision    recall  f1-score   support\n",
       "\n",
-      "                    this text is about travel     1.0000    1.0000    1.0000         1\n",
-      "                this text is about technology     1.0000    1.0000    1.0000         2\n",
-      "                  this text is about wellness     1.0000    0.5000    0.6667         2\n",
-      "                     this text is about women     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about fashion     1.0000    0.3333    0.5000         3\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         1\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about crime     0.0000    0.0000    0.0000         2\n",
-      "                  this text is about religion     0.0000    0.0000    0.0000         1\n",
-      "                    this text is about sports     0.0000    0.0000    0.0000         1\n",
-      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         3\n",
+      "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Health     0.0000    0.0000    0.0000         0\n",
+      " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
+      " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Sports     0.0000    0.0000    0.0000         0\n",
+      "     This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
+      "    This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
+      " This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                    micro avg     0.3125    0.3125    0.3125        16\n",
-      "                                    macro avg     0.2500    0.1771    0.1979        16\n",
-      "                                 weighted avg     0.5000    0.3125    0.3646        16\n",
-      "                                  samples avg     0.3125    0.3125    0.3125        16\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 02:08:38,245 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:46,889 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "                                micro avg     0.0000    0.0000    0.0000         0\n",
+      "                                macro avg     0.0000    0.0000    0.0000         0\n",
+      "                             weighted avg     0.0000    0.0000    0.0000         0\n",
+      "                              samples avg     0.0000    0.0000    0.0000         0\n",
+      "\n",
+      "2021-09-21 22:06:13,678 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:25,478 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:08:50,797 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:06:29,576 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 72043.34it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 51589.59it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:50,800 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
-      "2021-09-08 02:08:50,813 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:50,815 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:06:29,581 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
+      "2021-09-21 22:06:29,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:29,592 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4444,25 +4436,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:50,815 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:50,816 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:08:50,816 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:50,816 Parameters:\n",
-      "2021-09-08 02:08:50,817  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:08:50,817  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:08:50,817  - patience: \"3\"\n",
-      "2021-09-08 02:08:50,817  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:08:50,818  - max_epochs: \"10\"\n",
-      "2021-09-08 02:08:50,818  - shuffle: \"True\"\n",
-      "2021-09-08 02:08:50,818  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:08:50,819  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:08:50,819 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:50,819 Model training base path: \"None\"\n",
-      "2021-09-08 02:08:50,819 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:50,820 Device: cuda:0\n",
-      "2021-09-08 02:08:50,820 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:50,820 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:08:50,829 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:06:29,593 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:29,593 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:06:29,594 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:29,594 Parameters:\n",
+      "2021-09-21 22:06:29,594  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:06:29,595  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:06:29,595  - patience: \"3\"\n",
+      "2021-09-21 22:06:29,595  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:06:29,595  - max_epochs: \"10\"\n",
+      "2021-09-21 22:06:29,596  - shuffle: \"True\"\n",
+      "2021-09-21 22:06:29,596  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:06:29,596  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:06:29,596 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:29,597 Model training base path: \"None\"\n",
+      "2021-09-21 22:06:29,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:29,597 Device: cuda:0\n",
+      "2021-09-21 22:06:29,598 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:29,598 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:06:29,604 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4476,172 +4468,171 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:08:51,440 epoch 1 - iter 13/130 - loss 0.39538078 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 02:08:52,044 epoch 1 - iter 26/130 - loss 0.39874898 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 02:08:52,677 epoch 1 - iter 39/130 - loss 0.56148330 - samples/sec: 20.57 - lr: 0.020000\n",
-      "2021-09-08 02:08:53,283 epoch 1 - iter 52/130 - loss 0.51275075 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:08:53,999 epoch 1 - iter 65/130 - loss 0.56618813 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 02:08:54,605 epoch 1 - iter 78/130 - loss 0.54304906 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:08:55,204 epoch 1 - iter 91/130 - loss 0.53513819 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 02:08:55,809 epoch 1 - iter 104/130 - loss 0.51814577 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 02:08:56,405 epoch 1 - iter 117/130 - loss 0.50723337 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 02:08:57,018 epoch 1 - iter 130/130 - loss 0.54180259 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 02:08:57,019 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:08:57,019 EPOCH 1 done: loss 0.5418 - lr 0.0200000\n",
-      "2021-09-08 02:08:57,260 DEV : loss 0.33372488617897034 - score 0.2143\n",
-      "2021-09-08 02:08:57,261 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:06:30,525 epoch 1 - iter 13/130 - loss 0.39319429 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 22:06:31,312 epoch 1 - iter 26/130 - loss 0.45285058 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 22:06:32,167 epoch 1 - iter 39/130 - loss 0.50120592 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 22:06:32,962 epoch 1 - iter 52/130 - loss 0.49370743 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 22:06:33,723 epoch 1 - iter 65/130 - loss 0.58014509 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 22:06:34,552 epoch 1 - iter 78/130 - loss 0.62619404 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 22:06:35,351 epoch 1 - iter 91/130 - loss 0.64561309 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 22:06:36,128 epoch 1 - iter 104/130 - loss 0.65792280 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 22:06:36,849 epoch 1 - iter 117/130 - loss 0.65771310 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 22:06:37,635 epoch 1 - iter 130/130 - loss 0.66710277 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 22:06:37,636 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:37,636 EPOCH 1 done: loss 0.6671 - lr 0.0200000\n",
+      "2021-09-21 22:06:38,197 DEV : loss 0.25207313895225525 - score 0.1429\n",
+      "2021-09-21 22:06:38,198 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:09:01,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:01,995 epoch 2 - iter 13/130 - loss 0.51882681 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 02:09:02,601 epoch 2 - iter 26/130 - loss 0.55887020 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 02:09:03,209 epoch 2 - iter 39/130 - loss 0.57330276 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 02:09:03,819 epoch 2 - iter 52/130 - loss 0.58813570 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 02:09:04,421 epoch 2 - iter 65/130 - loss 0.56435544 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 02:09:05,026 epoch 2 - iter 78/130 - loss 0.65477560 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 02:09:05,639 epoch 2 - iter 91/130 - loss 0.64606663 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 02:09:06,245 epoch 2 - iter 104/130 - loss 0.65225460 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 02:09:06,857 epoch 2 - iter 117/130 - loss 0.65271133 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 02:09:07,471 epoch 2 - iter 130/130 - loss 0.64194311 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 02:09:07,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:07,473 EPOCH 2 done: loss 0.6419 - lr 0.0200000\n",
-      "2021-09-08 02:09:07,716 DEV : loss 0.443383127450943 - score 0.5714\n",
-      "2021-09-08 02:09:07,717 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:06:42,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:43,223 epoch 2 - iter 13/130 - loss 0.67169091 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 22:06:44,000 epoch 2 - iter 26/130 - loss 0.67694767 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 22:06:44,728 epoch 2 - iter 39/130 - loss 0.68286199 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 22:06:45,522 epoch 2 - iter 52/130 - loss 0.67651221 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 22:06:46,331 epoch 2 - iter 65/130 - loss 0.67957318 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 22:06:47,146 epoch 2 - iter 78/130 - loss 0.65304104 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 22:06:47,934 epoch 2 - iter 91/130 - loss 0.65534409 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 22:06:48,590 epoch 2 - iter 104/130 - loss 0.64506989 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 22:06:49,201 epoch 2 - iter 117/130 - loss 0.62603905 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 22:06:49,782 epoch 2 - iter 130/130 - loss 0.61143266 - samples/sec: 22.40 - lr: 0.020000\n",
+      "2021-09-21 22:06:49,784 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:49,784 EPOCH 2 done: loss 0.6114 - lr 0.0200000\n",
+      "2021-09-21 22:06:50,026 DEV : loss 0.3177500367164612 - score 0.6429\n",
+      "2021-09-21 22:06:50,027 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:09:11,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:12,590 epoch 3 - iter 13/130 - loss 0.26330006 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 02:09:13,196 epoch 3 - iter 26/130 - loss 0.38003290 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:09:13,808 epoch 3 - iter 39/130 - loss 0.47787375 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 02:09:14,417 epoch 3 - iter 52/130 - loss 0.49931193 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 02:09:15,028 epoch 3 - iter 65/130 - loss 0.48865574 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 02:09:15,634 epoch 3 - iter 78/130 - loss 0.53760184 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:09:16,218 epoch 3 - iter 91/130 - loss 0.49526001 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 02:09:16,801 epoch 3 - iter 104/130 - loss 0.49841278 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 02:09:17,391 epoch 3 - iter 117/130 - loss 0.50637101 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 02:09:17,994 epoch 3 - iter 130/130 - loss 0.53047821 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 02:09:17,995 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:17,996 EPOCH 3 done: loss 0.5305 - lr 0.0200000\n",
-      "2021-09-08 02:09:18,233 DEV : loss 0.3345269560813904 - score 0.6429\n",
-      "2021-09-08 02:09:18,234 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:09:22,334 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:22,995 epoch 4 - iter 13/130 - loss 0.29733358 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 02:09:23,582 epoch 4 - iter 26/130 - loss 0.33668001 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 02:09:24,169 epoch 4 - iter 39/130 - loss 0.28440891 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:09:24,770 epoch 4 - iter 52/130 - loss 0.38895876 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 02:09:25,363 epoch 4 - iter 65/130 - loss 0.34937303 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 02:09:25,944 epoch 4 - iter 78/130 - loss 0.33542800 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 02:09:26,539 epoch 4 - iter 91/130 - loss 0.33745183 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 02:09:27,160 epoch 4 - iter 104/130 - loss 0.36530001 - samples/sec: 20.95 - lr: 0.020000\n",
-      "2021-09-08 02:09:27,762 epoch 4 - iter 117/130 - loss 0.39146654 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 02:09:28,351 epoch 4 - iter 130/130 - loss 0.36776860 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:09:28,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:28,352 EPOCH 4 done: loss 0.3678 - lr 0.0200000\n",
-      "2021-09-08 02:09:28,588 DEV : loss 0.2878572642803192 - score 0.4286\n",
-      "2021-09-08 02:09:28,589 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:09:28,597 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:29,194 epoch 5 - iter 13/130 - loss 0.19014483 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 02:09:29,786 epoch 5 - iter 26/130 - loss 0.34915906 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 02:09:30,383 epoch 5 - iter 39/130 - loss 0.37620647 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 02:09:30,979 epoch 5 - iter 52/130 - loss 0.37289295 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 02:09:31,571 epoch 5 - iter 65/130 - loss 0.35513338 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 02:09:32,152 epoch 5 - iter 78/130 - loss 0.33021030 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 02:09:32,744 epoch 5 - iter 91/130 - loss 0.32919387 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 02:09:33,340 epoch 5 - iter 104/130 - loss 0.34094657 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 02:09:33,929 epoch 5 - iter 117/130 - loss 0.31952292 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 02:09:34,525 epoch 5 - iter 130/130 - loss 0.33360925 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 02:09:34,527 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:34,527 EPOCH 5 done: loss 0.3336 - lr 0.0200000\n",
-      "2021-09-08 02:09:34,762 DEV : loss 0.3400193750858307 - score 0.5714\n",
-      "2021-09-08 02:09:34,763 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:09:34,764 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:35,371 epoch 6 - iter 13/130 - loss 0.38321327 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 02:09:35,946 epoch 6 - iter 26/130 - loss 0.24848228 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 02:09:36,517 epoch 6 - iter 39/130 - loss 0.21182515 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 02:09:37,103 epoch 6 - iter 52/130 - loss 0.26334760 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:09:37,692 epoch 6 - iter 65/130 - loss 0.24556152 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 02:09:38,279 epoch 6 - iter 78/130 - loss 0.25929081 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 02:09:38,858 epoch 6 - iter 91/130 - loss 0.25807259 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 02:09:39,442 epoch 6 - iter 104/130 - loss 0.28286673 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 02:09:40,027 epoch 6 - iter 117/130 - loss 0.29354880 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 02:09:40,623 epoch 6 - iter 130/130 - loss 0.29804902 - samples/sec: 21.85 - lr: 0.020000\n"
+      "2021-09-21 22:06:54,700 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:06:55,492 epoch 3 - iter 13/130 - loss 0.43094232 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 22:06:56,274 epoch 3 - iter 26/130 - loss 0.49945418 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 22:06:57,022 epoch 3 - iter 39/130 - loss 0.48548122 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 22:06:57,755 epoch 3 - iter 52/130 - loss 0.43850488 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 22:06:58,528 epoch 3 - iter 65/130 - loss 0.47998968 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 22:06:59,319 epoch 3 - iter 78/130 - loss 0.50891852 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 22:07:00,056 epoch 3 - iter 91/130 - loss 0.48729360 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 22:07:00,668 epoch 3 - iter 104/130 - loss 0.47235050 - samples/sec: 21.30 - lr: 0.020000\n",
+      "2021-09-21 22:07:01,236 epoch 3 - iter 117/130 - loss 0.48497977 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 22:07:01,844 epoch 3 - iter 130/130 - loss 0.51010035 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 22:07:01,845 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:01,846 EPOCH 3 done: loss 0.5101 - lr 0.0200000\n",
+      "2021-09-21 22:07:02,196 DEV : loss 0.45100510120391846 - score 0.5\n",
+      "2021-09-21 22:07:02,197 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:07:02,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:02,914 epoch 4 - iter 13/130 - loss 0.38033825 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 22:07:03,516 epoch 4 - iter 26/130 - loss 0.45031515 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 22:07:04,086 epoch 4 - iter 39/130 - loss 0.39558548 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 22:07:04,666 epoch 4 - iter 52/130 - loss 0.51424723 - samples/sec: 22.41 - lr: 0.020000\n",
+      "2021-09-21 22:07:05,362 epoch 4 - iter 65/130 - loss 0.49045755 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 22:07:06,118 epoch 4 - iter 78/130 - loss 0.50969123 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 22:07:06,887 epoch 4 - iter 91/130 - loss 0.48348530 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 22:07:07,644 epoch 4 - iter 104/130 - loss 0.46786222 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 22:07:08,405 epoch 4 - iter 117/130 - loss 0.48818712 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 22:07:09,190 epoch 4 - iter 130/130 - loss 0.49065987 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 22:07:09,191 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:09,191 EPOCH 4 done: loss 0.4907 - lr 0.0200000\n",
+      "2021-09-21 22:07:09,852 DEV : loss 0.4377276301383972 - score 0.5\n",
+      "2021-09-21 22:07:09,853 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:07:09,856 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:10,670 epoch 5 - iter 13/130 - loss 0.09641648 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 22:07:11,457 epoch 5 - iter 26/130 - loss 0.19032883 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 22:07:12,089 epoch 5 - iter 39/130 - loss 0.20068514 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 22:07:12,693 epoch 5 - iter 52/130 - loss 0.21154658 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 22:07:13,280 epoch 5 - iter 65/130 - loss 0.25546111 - samples/sec: 22.19 - lr: 0.020000\n",
+      "2021-09-21 22:07:13,883 epoch 5 - iter 78/130 - loss 0.28406274 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 22:07:14,479 epoch 5 - iter 91/130 - loss 0.28612890 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 22:07:15,088 epoch 5 - iter 104/130 - loss 0.32049155 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 22:07:15,658 epoch 5 - iter 117/130 - loss 0.29904199 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 22:07:16,257 epoch 5 - iter 130/130 - loss 0.30592491 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 22:07:16,258 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:16,258 EPOCH 5 done: loss 0.3059 - lr 0.0200000\n",
+      "2021-09-21 22:07:16,884 DEV : loss 0.4804399907588959 - score 0.5714\n",
+      "2021-09-21 22:07:16,884 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:07:16,886 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:17,704 epoch 6 - iter 13/130 - loss 0.10854813 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 22:07:18,477 epoch 6 - iter 26/130 - loss 0.16310384 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 22:07:19,236 epoch 6 - iter 39/130 - loss 0.14759325 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 22:07:19,989 epoch 6 - iter 52/130 - loss 0.16419707 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 22:07:20,786 epoch 6 - iter 65/130 - loss 0.22593673 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 22:07:21,533 epoch 6 - iter 78/130 - loss 0.20977843 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 22:07:22,280 epoch 6 - iter 91/130 - loss 0.19530065 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 22:07:23,008 epoch 6 - iter 104/130 - loss 0.19087042 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 22:07:23,621 epoch 6 - iter 117/130 - loss 0.23339728 - samples/sec: 21.25 - lr: 0.020000\n",
+      "2021-09-21 22:07:24,206 epoch 6 - iter 130/130 - loss 0.25997068 - samples/sec: 22.25 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:09:40,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:40,625 EPOCH 6 done: loss 0.2980 - lr 0.0200000\n",
-      "2021-09-08 02:09:40,867 DEV : loss 0.19337493181228638 - score 0.6429\n",
-      "2021-09-08 02:09:40,868 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:09:44,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:45,288 epoch 7 - iter 13/130 - loss 0.16229262 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 02:09:45,872 epoch 7 - iter 26/130 - loss 0.15663956 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 02:09:46,449 epoch 7 - iter 39/130 - loss 0.12357518 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 02:09:47,026 epoch 7 - iter 52/130 - loss 0.16827165 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 02:09:47,605 epoch 7 - iter 65/130 - loss 0.16972106 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 02:09:48,191 epoch 7 - iter 78/130 - loss 0.20480885 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 02:09:48,769 epoch 7 - iter 91/130 - loss 0.20817291 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 02:09:49,350 epoch 7 - iter 104/130 - loss 0.19634963 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 02:09:49,929 epoch 7 - iter 117/130 - loss 0.20138104 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 02:09:50,505 epoch 7 - iter 130/130 - loss 0.19503355 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 02:09:50,506 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:50,506 EPOCH 7 done: loss 0.1950 - lr 0.0200000\n",
-      "2021-09-08 02:09:50,741 DEV : loss 0.5275031328201294 - score 0.5\n",
-      "2021-09-08 02:09:50,742 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:09:50,744 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:51,334 epoch 8 - iter 13/130 - loss 0.01140115 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 02:09:51,902 epoch 8 - iter 26/130 - loss 0.00682892 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 02:09:52,469 epoch 8 - iter 39/130 - loss 0.00505148 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 02:09:53,053 epoch 8 - iter 52/130 - loss 0.05060713 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 02:09:53,627 epoch 8 - iter 65/130 - loss 0.06866864 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 02:09:54,244 epoch 8 - iter 78/130 - loss 0.07713652 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 02:09:54,842 epoch 8 - iter 91/130 - loss 0.07828041 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 02:09:55,429 epoch 8 - iter 104/130 - loss 0.11972469 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:09:55,996 epoch 8 - iter 117/130 - loss 0.10673832 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 02:09:56,573 epoch 8 - iter 130/130 - loss 0.11335332 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 02:09:56,575 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:56,575 EPOCH 8 done: loss 0.1134 - lr 0.0200000\n",
-      "2021-09-08 02:09:56,854 DEV : loss 0.41993120312690735 - score 0.5\n",
-      "2021-09-08 02:09:56,855 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:09:57,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:09:58,238 epoch 9 - iter 13/130 - loss 0.35969097 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 02:09:58,823 epoch 9 - iter 26/130 - loss 0.27705063 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 02:09:59,398 epoch 9 - iter 39/130 - loss 0.19744297 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 02:09:59,979 epoch 9 - iter 52/130 - loss 0.18583465 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 02:10:00,564 epoch 9 - iter 65/130 - loss 0.24258717 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 02:10:01,143 epoch 9 - iter 78/130 - loss 0.23021798 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 02:10:01,726 epoch 9 - iter 91/130 - loss 0.21154528 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 02:10:02,302 epoch 9 - iter 104/130 - loss 0.20432095 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 02:10:02,883 epoch 9 - iter 117/130 - loss 0.20160633 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 02:10:03,457 epoch 9 - iter 130/130 - loss 0.19441111 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 02:10:03,458 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:03,459 EPOCH 9 done: loss 0.1944 - lr 0.0200000\n",
-      "2021-09-08 02:10:03,696 DEV : loss 0.3747665584087372 - score 0.5714\n",
-      "2021-09-08 02:10:03,696 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:10:03,698 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:04,288 epoch 10 - iter 13/130 - loss 0.02755771 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 02:10:04,877 epoch 10 - iter 26/130 - loss 0.18194618 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:10:05,451 epoch 10 - iter 39/130 - loss 0.16081427 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 02:10:06,021 epoch 10 - iter 52/130 - loss 0.14414106 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 02:10:06,600 epoch 10 - iter 65/130 - loss 0.16706079 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 02:10:07,172 epoch 10 - iter 78/130 - loss 0.16606159 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 02:10:07,748 epoch 10 - iter 91/130 - loss 0.16194036 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 02:10:08,319 epoch 10 - iter 104/130 - loss 0.14980335 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 02:10:08,893 epoch 10 - iter 117/130 - loss 0.14416399 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 02:10:09,465 epoch 10 - iter 130/130 - loss 0.13005466 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 02:10:09,466 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:09,467 EPOCH 10 done: loss 0.1301 - lr 0.0200000\n",
-      "2021-09-08 02:10:09,702 DEV : loss 0.4095085859298706 - score 0.5\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:10:09,703 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:10:13,998 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:13,999 Testing using best model ...\n",
-      "2021-09-08 02:10:14,000 loading file None/best-model.pt\n",
+      "2021-09-21 22:07:24,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:24,208 EPOCH 6 done: loss 0.2600 - lr 0.0200000\n",
+      "2021-09-21 22:07:24,539 DEV : loss 0.24911977350711823 - score 0.5714\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:07:24,540 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:07:24,610 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:25,200 epoch 7 - iter 13/130 - loss 0.14681505 - samples/sec: 22.69 - lr: 0.010000\n",
+      "2021-09-21 22:07:25,760 epoch 7 - iter 26/130 - loss 0.11945253 - samples/sec: 23.24 - lr: 0.010000\n",
+      "2021-09-21 22:07:26,322 epoch 7 - iter 39/130 - loss 0.15872982 - samples/sec: 23.13 - lr: 0.010000\n",
+      "2021-09-21 22:07:26,882 epoch 7 - iter 52/130 - loss 0.23046598 - samples/sec: 23.26 - lr: 0.010000\n",
+      "2021-09-21 22:07:27,592 epoch 7 - iter 65/130 - loss 0.23459755 - samples/sec: 18.31 - lr: 0.010000\n",
+      "2021-09-21 22:07:28,337 epoch 7 - iter 78/130 - loss 0.19722883 - samples/sec: 17.47 - lr: 0.010000\n",
+      "2021-09-21 22:07:29,107 epoch 7 - iter 91/130 - loss 0.17393576 - samples/sec: 16.90 - lr: 0.010000\n",
+      "2021-09-21 22:07:29,882 epoch 7 - iter 104/130 - loss 0.24298068 - samples/sec: 16.80 - lr: 0.010000\n",
+      "2021-09-21 22:07:30,662 epoch 7 - iter 117/130 - loss 0.23801781 - samples/sec: 16.66 - lr: 0.010000\n",
+      "2021-09-21 22:07:31,399 epoch 7 - iter 130/130 - loss 0.24206072 - samples/sec: 17.66 - lr: 0.010000\n",
+      "2021-09-21 22:07:31,400 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:31,401 EPOCH 7 done: loss 0.2421 - lr 0.0100000\n",
+      "2021-09-21 22:07:32,039 DEV : loss 0.22677654027938843 - score 0.5\n",
+      "2021-09-21 22:07:32,040 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:07:32,042 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:32,846 epoch 8 - iter 13/130 - loss 0.08315505 - samples/sec: 16.98 - lr: 0.010000\n",
+      "2021-09-21 22:07:33,586 epoch 8 - iter 26/130 - loss 0.04332755 - samples/sec: 17.56 - lr: 0.010000\n",
+      "2021-09-21 22:07:34,364 epoch 8 - iter 39/130 - loss 0.03275574 - samples/sec: 16.72 - lr: 0.010000\n",
+      "2021-09-21 22:07:35,133 epoch 8 - iter 52/130 - loss 0.02639256 - samples/sec: 16.92 - lr: 0.010000\n",
+      "2021-09-21 22:07:35,898 epoch 8 - iter 65/130 - loss 0.08308553 - samples/sec: 17.01 - lr: 0.010000\n",
+      "2021-09-21 22:07:36,652 epoch 8 - iter 78/130 - loss 0.10614051 - samples/sec: 17.26 - lr: 0.010000\n",
+      "2021-09-21 22:07:37,418 epoch 8 - iter 91/130 - loss 0.13469228 - samples/sec: 16.98 - lr: 0.010000\n",
+      "2021-09-21 22:07:38,146 epoch 8 - iter 104/130 - loss 0.12345053 - samples/sec: 17.89 - lr: 0.010000\n",
+      "2021-09-21 22:07:38,909 epoch 8 - iter 117/130 - loss 0.12969694 - samples/sec: 17.05 - lr: 0.010000\n",
+      "2021-09-21 22:07:39,649 epoch 8 - iter 130/130 - loss 0.14866016 - samples/sec: 17.58 - lr: 0.010000\n",
+      "2021-09-21 22:07:39,650 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:39,650 EPOCH 8 done: loss 0.1487 - lr 0.0100000\n",
+      "2021-09-21 22:07:40,205 DEV : loss 0.3324142098426819 - score 0.5\n",
+      "2021-09-21 22:07:40,206 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:07:40,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:40,914 epoch 9 - iter 13/130 - loss 0.20799971 - samples/sec: 18.86 - lr: 0.010000\n",
+      "2021-09-21 22:07:41,510 epoch 9 - iter 26/130 - loss 0.18277819 - samples/sec: 21.86 - lr: 0.010000\n",
+      "2021-09-21 22:07:42,067 epoch 9 - iter 39/130 - loss 0.12409308 - samples/sec: 23.34 - lr: 0.010000\n",
+      "2021-09-21 22:07:42,639 epoch 9 - iter 52/130 - loss 0.15594736 - samples/sec: 22.77 - lr: 0.010000\n",
+      "2021-09-21 22:07:43,197 epoch 9 - iter 65/130 - loss 0.14957093 - samples/sec: 23.30 - lr: 0.010000\n",
+      "2021-09-21 22:07:43,755 epoch 9 - iter 78/130 - loss 0.13435955 - samples/sec: 23.33 - lr: 0.010000\n",
+      "2021-09-21 22:07:44,327 epoch 9 - iter 91/130 - loss 0.11569113 - samples/sec: 22.75 - lr: 0.010000\n",
+      "2021-09-21 22:07:45,064 epoch 9 - iter 104/130 - loss 0.10153118 - samples/sec: 17.66 - lr: 0.010000\n",
+      "2021-09-21 22:07:45,819 epoch 9 - iter 117/130 - loss 0.09189942 - samples/sec: 17.24 - lr: 0.010000\n",
+      "2021-09-21 22:07:46,575 epoch 9 - iter 130/130 - loss 0.10412182 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 22:07:46,576 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:46,577 EPOCH 9 done: loss 0.1041 - lr 0.0100000\n",
+      "2021-09-21 22:07:47,221 DEV : loss 0.3260226845741272 - score 0.5714\n",
+      "2021-09-21 22:07:47,222 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:07:47,224 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:48,023 epoch 10 - iter 13/130 - loss 0.28668364 - samples/sec: 16.81 - lr: 0.010000\n",
+      "2021-09-21 22:07:48,766 epoch 10 - iter 26/130 - loss 0.26112361 - samples/sec: 17.53 - lr: 0.010000\n",
+      "2021-09-21 22:07:49,531 epoch 10 - iter 39/130 - loss 0.18961903 - samples/sec: 17.01 - lr: 0.010000\n",
+      "2021-09-21 22:07:50,261 epoch 10 - iter 52/130 - loss 0.18085833 - samples/sec: 17.81 - lr: 0.010000\n",
+      "2021-09-21 22:07:51,011 epoch 10 - iter 65/130 - loss 0.18033480 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 22:07:51,769 epoch 10 - iter 78/130 - loss 0.16333836 - samples/sec: 17.15 - lr: 0.010000\n",
+      "2021-09-21 22:07:52,500 epoch 10 - iter 91/130 - loss 0.15297553 - samples/sec: 17.81 - lr: 0.010000\n",
+      "2021-09-21 22:07:53,246 epoch 10 - iter 104/130 - loss 0.15485911 - samples/sec: 17.45 - lr: 0.010000\n",
+      "2021-09-21 22:07:53,991 epoch 10 - iter 117/130 - loss 0.13803970 - samples/sec: 17.45 - lr: 0.010000\n",
+      "2021-09-21 22:07:54,726 epoch 10 - iter 130/130 - loss 0.15148580 - samples/sec: 17.72 - lr: 0.010000\n",
+      "2021-09-21 22:07:54,727 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:54,727 EPOCH 10 done: loss 0.1515 - lr 0.0100000\n",
+      "2021-09-21 22:07:55,317 DEV : loss 0.3007037043571472 - score 0.5714\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 22:07:55,317 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:07:59,610 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:07:59,611 Testing using best model ...\n",
+      "2021-09-21 22:08:01,128 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:10:22,575 \t0.0\n",
-      "2021-09-08 02:10:22,575 \n",
+      "2021-09-21 22:08:05,747 \t0.0\n",
+      "2021-09-21 22:08:05,747 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -4666,26 +4657,26 @@
       "                             weighted avg     0.0000    0.0000    0.0000         0\n",
       "                              samples avg     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "2021-09-08 02:10:22,576 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:31,104 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:08:05,748 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:21,160 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:10:34,924 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:08:25,211 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 76212.62it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 73655.09it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:10:34,927 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
-      "2021-09-08 02:10:34,936 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:34,938 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:08:25,215 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
+      "2021-09-21 22:08:25,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:25,359 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4998,25 +4989,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:10:34,938 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:34,938 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:10:34,939 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:34,939 Parameters:\n",
-      "2021-09-08 02:10:34,939  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:10:34,940  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:10:34,940  - patience: \"3\"\n",
-      "2021-09-08 02:10:34,940  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:10:34,940  - max_epochs: \"10\"\n",
-      "2021-09-08 02:10:34,941  - shuffle: \"True\"\n",
-      "2021-09-08 02:10:34,941  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:10:34,941  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:10:34,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:34,942 Model training base path: \"None\"\n",
-      "2021-09-08 02:10:34,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:34,942 Device: cuda:0\n",
-      "2021-09-08 02:10:34,943 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:34,943 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:10:34,949 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:08:25,360 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:25,360 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:08:25,361 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:25,361 Parameters:\n",
+      "2021-09-21 22:08:25,361  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:08:25,362  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:08:25,362  - patience: \"3\"\n",
+      "2021-09-21 22:08:25,362  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:08:25,362  - max_epochs: \"10\"\n",
+      "2021-09-21 22:08:25,363  - shuffle: \"True\"\n",
+      "2021-09-21 22:08:25,363  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:08:25,363  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:08:25,363 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:25,364 Model training base path: \"None\"\n",
+      "2021-09-21 22:08:25,364 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:25,364 Device: cuda:0\n",
+      "2021-09-21 22:08:25,365 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:25,365 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -5030,200 +5020,202 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:10:35,565 epoch 1 - iter 13/130 - loss 0.34075801 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 02:10:36,161 epoch 1 - iter 26/130 - loss 0.30507073 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 02:10:36,762 epoch 1 - iter 39/130 - loss 0.40979186 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 02:10:37,367 epoch 1 - iter 52/130 - loss 0.48755251 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 02:10:37,979 epoch 1 - iter 65/130 - loss 0.48729082 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 02:10:38,586 epoch 1 - iter 78/130 - loss 0.48864165 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 02:10:39,192 epoch 1 - iter 91/130 - loss 0.51747006 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:10:39,813 epoch 1 - iter 104/130 - loss 0.50719482 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 02:10:40,402 epoch 1 - iter 117/130 - loss 0.51528135 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 02:10:41,008 epoch 1 - iter 130/130 - loss 0.51860196 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:10:41,009 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:41,009 EPOCH 1 done: loss 0.5186 - lr 0.0200000\n",
-      "2021-09-08 02:10:41,271 DEV : loss 0.500018835067749 - score 0.2857\n",
-      "2021-09-08 02:10:41,271 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:08:25,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:26,126 epoch 1 - iter 13/130 - loss 0.33808452 - samples/sec: 22.41 - lr: 0.020000\n",
+      "2021-09-21 22:08:26,708 epoch 1 - iter 26/130 - loss 0.51875405 - samples/sec: 22.34 - lr: 0.020000\n",
+      "2021-09-21 22:08:27,305 epoch 1 - iter 39/130 - loss 0.65384818 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 22:08:27,929 epoch 1 - iter 52/130 - loss 0.66666921 - samples/sec: 20.86 - lr: 0.020000\n",
+      "2021-09-21 22:08:28,521 epoch 1 - iter 65/130 - loss 0.65078115 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 22:08:29,106 epoch 1 - iter 78/130 - loss 0.59811596 - samples/sec: 22.25 - lr: 0.020000\n",
+      "2021-09-21 22:08:29,691 epoch 1 - iter 91/130 - loss 0.58879223 - samples/sec: 22.22 - lr: 0.020000\n",
+      "2021-09-21 22:08:30,265 epoch 1 - iter 104/130 - loss 0.55484778 - samples/sec: 22.70 - lr: 0.020000\n",
+      "2021-09-21 22:08:30,835 epoch 1 - iter 117/130 - loss 0.54923456 - samples/sec: 22.81 - lr: 0.020000\n",
+      "2021-09-21 22:08:31,437 epoch 1 - iter 130/130 - loss 0.57236821 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 22:08:31,438 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:31,439 EPOCH 1 done: loss 0.5724 - lr 0.0200000\n",
+      "2021-09-21 22:08:31,692 DEV : loss 0.4598809778690338 - score 0.0714\n",
+      "2021-09-21 22:08:31,693 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:08:35,691 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:36,587 epoch 2 - iter 13/130 - loss 0.42163959 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 22:08:37,352 epoch 2 - iter 26/130 - loss 0.54620036 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 22:08:38,160 epoch 2 - iter 39/130 - loss 0.53555710 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 22:08:38,951 epoch 2 - iter 52/130 - loss 0.60508307 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 22:08:39,745 epoch 2 - iter 65/130 - loss 0.62186698 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 22:08:40,571 epoch 2 - iter 78/130 - loss 0.62830725 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 22:08:41,358 epoch 2 - iter 91/130 - loss 0.64402848 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 22:08:42,212 epoch 2 - iter 104/130 - loss 0.65125762 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 22:08:43,005 epoch 2 - iter 117/130 - loss 0.65375605 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 22:08:43,826 epoch 2 - iter 130/130 - loss 0.66756647 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 22:08:43,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:43,828 EPOCH 2 done: loss 0.6676 - lr 0.0200000\n",
+      "2021-09-21 22:08:44,316 DEV : loss 0.6772164106369019 - score 0.0714\n",
+      "2021-09-21 22:08:44,318 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:08:44,320 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:45,163 epoch 3 - iter 13/130 - loss 0.67977717 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 22:08:45,962 epoch 3 - iter 26/130 - loss 0.67426543 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 22:08:46,820 epoch 3 - iter 39/130 - loss 0.69016283 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 22:08:47,566 epoch 3 - iter 52/130 - loss 0.68731618 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 22:08:48,339 epoch 3 - iter 65/130 - loss 0.68356811 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 22:08:49,127 epoch 3 - iter 78/130 - loss 0.67488316 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 22:08:49,886 epoch 3 - iter 91/130 - loss 0.66683118 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 22:08:50,630 epoch 3 - iter 104/130 - loss 0.66862209 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 22:08:51,398 epoch 3 - iter 117/130 - loss 0.66425962 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 22:08:52,152 epoch 3 - iter 130/130 - loss 0.66043509 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 22:08:52,153 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:52,153 EPOCH 3 done: loss 0.6604 - lr 0.0200000\n",
+      "2021-09-21 22:08:52,613 DEV : loss 0.4579773545265198 - score 0.0\n",
+      "2021-09-21 22:08:52,613 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:08:52,615 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:53,282 epoch 4 - iter 13/130 - loss 0.67454791 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 22:08:53,857 epoch 4 - iter 26/130 - loss 0.65663803 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 22:08:54,417 epoch 4 - iter 39/130 - loss 0.65110187 - samples/sec: 23.27 - lr: 0.020000\n",
+      "2021-09-21 22:08:54,970 epoch 4 - iter 52/130 - loss 0.64022957 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 22:08:55,523 epoch 4 - iter 65/130 - loss 0.64283085 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 22:08:56,076 epoch 4 - iter 78/130 - loss 0.64715315 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 22:08:56,631 epoch 4 - iter 91/130 - loss 0.64870066 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 22:08:57,188 epoch 4 - iter 104/130 - loss 0.64765605 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 22:08:57,818 epoch 4 - iter 117/130 - loss 0.64823180 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 22:08:58,562 epoch 4 - iter 130/130 - loss 0.64675162 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 22:08:58,563 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:08:58,564 EPOCH 4 done: loss 0.6468 - lr 0.0200000\n",
+      "2021-09-21 22:08:59,155 DEV : loss 0.4564749300479889 - score 0.0714\n",
+      "2021-09-21 22:08:59,157 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:10:45,097 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:45,723 epoch 2 - iter 13/130 - loss 0.56727447 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 02:10:46,322 epoch 2 - iter 26/130 - loss 0.60289146 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 02:10:46,929 epoch 2 - iter 39/130 - loss 0.64815470 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 02:10:47,534 epoch 2 - iter 52/130 - loss 0.60392134 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 02:10:48,138 epoch 2 - iter 65/130 - loss 0.59664238 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 02:10:48,736 epoch 2 - iter 78/130 - loss 0.57656626 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 02:10:49,336 epoch 2 - iter 91/130 - loss 0.60377242 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 02:10:49,976 epoch 2 - iter 104/130 - loss 0.59988414 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 02:10:50,587 epoch 2 - iter 117/130 - loss 0.57378108 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 02:10:51,186 epoch 2 - iter 130/130 - loss 0.57752525 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 02:10:51,187 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:51,188 EPOCH 2 done: loss 0.5775 - lr 0.0200000\n",
-      "2021-09-08 02:10:51,573 DEV : loss 0.3265090882778168 - score 0.5\n",
-      "2021-09-08 02:10:51,573 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:09:03,113 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:03,755 epoch 5 - iter 13/130 - loss 0.62036299 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 22:09:04,325 epoch 5 - iter 26/130 - loss 0.62526360 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 22:09:04,898 epoch 5 - iter 39/130 - loss 0.64163052 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 22:09:05,461 epoch 5 - iter 52/130 - loss 0.64353390 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 22:09:06,030 epoch 5 - iter 65/130 - loss 0.64324735 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 22:09:06,591 epoch 5 - iter 78/130 - loss 0.64396327 - samples/sec: 23.20 - lr: 0.020000\n",
+      "2021-09-21 22:09:07,545 epoch 5 - iter 91/130 - loss 0.64242993 - samples/sec: 19.26 - lr: 0.020000\n",
+      "2021-09-21 22:09:08,281 epoch 5 - iter 104/130 - loss 0.64343468 - samples/sec: 17.69 - lr: 0.020000\n",
+      "2021-09-21 22:09:09,025 epoch 5 - iter 117/130 - loss 0.64675960 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 22:09:09,768 epoch 5 - iter 130/130 - loss 0.64551138 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 22:09:09,769 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:09,770 EPOCH 5 done: loss 0.6455 - lr 0.0200000\n",
+      "2021-09-21 22:09:10,303 DEV : loss 0.4339454472064972 - score 0.0714\n",
+      "2021-09-21 22:09:10,305 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:10:57,903 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:10:58,535 epoch 3 - iter 13/130 - loss 0.21440873 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 02:10:59,164 epoch 3 - iter 26/130 - loss 0.44946709 - samples/sec: 20.67 - lr: 0.020000\n",
-      "2021-09-08 02:10:59,757 epoch 3 - iter 39/130 - loss 0.51128342 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 02:11:00,353 epoch 3 - iter 52/130 - loss 0.50270757 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 02:11:00,939 epoch 3 - iter 65/130 - loss 0.46795223 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:11:01,534 epoch 3 - iter 78/130 - loss 0.49093070 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 02:11:02,120 epoch 3 - iter 91/130 - loss 0.47166023 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:11:02,710 epoch 3 - iter 104/130 - loss 0.45440956 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 02:11:03,301 epoch 3 - iter 117/130 - loss 0.42963830 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 02:11:03,908 epoch 3 - iter 130/130 - loss 0.46794820 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 02:11:03,909 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:03,909 EPOCH 3 done: loss 0.4679 - lr 0.0200000\n",
-      "2021-09-08 02:11:04,142 DEV : loss 0.35897931456565857 - score 0.3571\n",
-      "2021-09-08 02:11:04,143 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:11:04,145 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:04,756 epoch 4 - iter 13/130 - loss 0.39724553 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 02:11:05,344 epoch 4 - iter 26/130 - loss 0.29928298 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 02:11:05,939 epoch 4 - iter 39/130 - loss 0.42356145 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 02:11:06,535 epoch 4 - iter 52/130 - loss 0.41316729 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 02:11:07,111 epoch 4 - iter 65/130 - loss 0.36384378 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 02:11:07,705 epoch 4 - iter 78/130 - loss 0.38303067 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 02:11:08,292 epoch 4 - iter 91/130 - loss 0.37444367 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 02:11:08,876 epoch 4 - iter 104/130 - loss 0.37446448 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:11:09,464 epoch 4 - iter 117/130 - loss 0.38242920 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:11:10,059 epoch 4 - iter 130/130 - loss 0.39247339 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 02:11:10,060 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:10,060 EPOCH 4 done: loss 0.3925 - lr 0.0200000\n",
-      "2021-09-08 02:11:10,325 DEV : loss 0.31264781951904297 - score 0.4286\n",
-      "2021-09-08 02:11:10,326 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:11:10,329 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:10,923 epoch 5 - iter 13/130 - loss 0.15010265 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 02:11:11,507 epoch 5 - iter 26/130 - loss 0.23487660 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 02:11:12,095 epoch 5 - iter 39/130 - loss 0.27270033 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 02:11:12,674 epoch 5 - iter 52/130 - loss 0.22807872 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 02:11:13,256 epoch 5 - iter 65/130 - loss 0.22408939 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 02:11:13,839 epoch 5 - iter 78/130 - loss 0.21557427 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 02:11:14,425 epoch 5 - iter 91/130 - loss 0.24012949 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 02:11:15,012 epoch 5 - iter 104/130 - loss 0.24137489 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 02:11:15,587 epoch 5 - iter 117/130 - loss 0.22152521 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 02:11:16,174 epoch 5 - iter 130/130 - loss 0.25098893 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 02:11:16,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:16,175 EPOCH 5 done: loss 0.2510 - lr 0.0200000\n",
-      "2021-09-08 02:11:16,422 DEV : loss 0.38991227746009827 - score 0.2857\n",
-      "2021-09-08 02:11:16,422 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:11:16,429 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:17,022 epoch 6 - iter 13/130 - loss 0.17875192 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 02:11:17,600 epoch 6 - iter 26/130 - loss 0.25454359 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 02:11:18,172 epoch 6 - iter 39/130 - loss 0.26979991 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 02:11:18,757 epoch 6 - iter 52/130 - loss 0.31895228 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 02:11:19,345 epoch 6 - iter 65/130 - loss 0.28299219 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 02:11:19,920 epoch 6 - iter 78/130 - loss 0.25982426 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 02:11:20,495 epoch 6 - iter 91/130 - loss 0.24072829 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 02:11:21,070 epoch 6 - iter 104/130 - loss 0.22406084 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 02:11:21,647 epoch 6 - iter 117/130 - loss 0.21150842 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 02:11:22,233 epoch 6 - iter 130/130 - loss 0.21605580 - samples/sec: 22.22 - lr: 0.020000\n"
+      "2021-09-21 22:09:15,932 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:16,521 epoch 6 - iter 13/130 - loss 0.66385617 - samples/sec: 22.78 - lr: 0.020000\n",
+      "2021-09-21 22:09:17,110 epoch 6 - iter 26/130 - loss 0.64453985 - samples/sec: 22.12 - lr: 0.020000\n",
+      "2021-09-21 22:09:17,665 epoch 6 - iter 39/130 - loss 0.64242610 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 22:09:18,220 epoch 6 - iter 52/130 - loss 0.64698612 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 22:09:18,777 epoch 6 - iter 65/130 - loss 0.64541256 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 22:09:19,331 epoch 6 - iter 78/130 - loss 0.64799105 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 22:09:19,886 epoch 6 - iter 91/130 - loss 0.65250318 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 22:09:20,437 epoch 6 - iter 104/130 - loss 0.65264306 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 22:09:20,989 epoch 6 - iter 117/130 - loss 0.65413905 - samples/sec: 23.57 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:11:22,234 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:22,234 EPOCH 6 done: loss 0.2161 - lr 0.0200000\n",
-      "2021-09-08 02:11:22,494 DEV : loss 0.4111019968986511 - score 0.5714\n",
-      "2021-09-08 02:11:22,495 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:09:21,541 epoch 6 - iter 130/130 - loss 0.65042177 - samples/sec: 23.60 - lr: 0.020000\n",
+      "2021-09-21 22:09:21,542 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:21,542 EPOCH 6 done: loss 0.6504 - lr 0.0200000\n",
+      "2021-09-21 22:09:24,507 DEV : loss 0.389787882566452 - score 0.0714\n",
+      "2021-09-21 22:09:24,508 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:11:26,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:27,002 epoch 7 - iter 13/130 - loss 0.00151689 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 02:11:27,574 epoch 7 - iter 26/130 - loss 0.11115298 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 02:11:28,146 epoch 7 - iter 39/130 - loss 0.13568533 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:11:28,726 epoch 7 - iter 52/130 - loss 0.14835893 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 02:11:29,302 epoch 7 - iter 65/130 - loss 0.17836750 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 02:11:29,879 epoch 7 - iter 78/130 - loss 0.19820059 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 02:11:30,451 epoch 7 - iter 91/130 - loss 0.18666089 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 02:11:31,021 epoch 7 - iter 104/130 - loss 0.17875177 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 02:11:31,600 epoch 7 - iter 117/130 - loss 0.18956672 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 02:11:32,171 epoch 7 - iter 130/130 - loss 0.19669113 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 02:11:32,172 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:32,173 EPOCH 7 done: loss 0.1967 - lr 0.0200000\n",
-      "2021-09-08 02:11:32,412 DEV : loss 0.3337242305278778 - score 0.5\n",
-      "2021-09-08 02:11:32,413 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:11:32,415 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:33,009 epoch 8 - iter 13/130 - loss 0.23785404 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 02:11:33,592 epoch 8 - iter 26/130 - loss 0.26872675 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:11:34,164 epoch 8 - iter 39/130 - loss 0.22155466 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 02:11:34,736 epoch 8 - iter 52/130 - loss 0.22790881 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 02:11:35,308 epoch 8 - iter 65/130 - loss 0.18275883 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 02:11:35,894 epoch 8 - iter 78/130 - loss 0.22201836 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 02:11:36,470 epoch 8 - iter 91/130 - loss 0.21190008 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 02:11:37,041 epoch 8 - iter 104/130 - loss 0.18580892 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 02:11:37,614 epoch 8 - iter 117/130 - loss 0.19569999 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 02:11:38,186 epoch 8 - iter 130/130 - loss 0.18964348 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:11:38,187 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:38,187 EPOCH 8 done: loss 0.1896 - lr 0.0200000\n",
-      "2021-09-08 02:11:38,421 DEV : loss 0.30859559774398804 - score 0.5\n",
-      "2021-09-08 02:11:38,422 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:11:38,425 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:39,011 epoch 9 - iter 13/130 - loss 0.13755000 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 02:11:39,606 epoch 9 - iter 26/130 - loss 0.07058220 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 02:11:40,198 epoch 9 - iter 39/130 - loss 0.04737569 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 02:11:40,766 epoch 9 - iter 52/130 - loss 0.03689401 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 02:11:41,339 epoch 9 - iter 65/130 - loss 0.06603347 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 02:11:41,906 epoch 9 - iter 78/130 - loss 0.08974816 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 02:11:42,474 epoch 9 - iter 91/130 - loss 0.08833643 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 02:11:43,047 epoch 9 - iter 104/130 - loss 0.08420724 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 02:11:43,622 epoch 9 - iter 117/130 - loss 0.08386438 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 02:11:44,208 epoch 9 - iter 130/130 - loss 0.09846693 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 02:11:44,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:44,210 EPOCH 9 done: loss 0.0985 - lr 0.0200000\n",
-      "2021-09-08 02:11:44,452 DEV : loss 0.39329954981803894 - score 0.5\n",
-      "2021-09-08 02:11:44,453 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:11:44,456 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:45,068 epoch 10 - iter 13/130 - loss 0.10328250 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 02:11:45,639 epoch 10 - iter 26/130 - loss 0.18977309 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 02:11:46,211 epoch 10 - iter 39/130 - loss 0.14353351 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 02:11:46,784 epoch 10 - iter 52/130 - loss 0.17813366 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 02:11:47,355 epoch 10 - iter 65/130 - loss 0.14453733 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 02:11:47,947 epoch 10 - iter 78/130 - loss 0.12854652 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 02:11:48,520 epoch 10 - iter 91/130 - loss 0.11048004 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 02:11:49,090 epoch 10 - iter 104/130 - loss 0.09678973 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 02:11:49,656 epoch 10 - iter 117/130 - loss 0.08607906 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 02:11:50,234 epoch 10 - iter 130/130 - loss 0.09360191 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 02:11:50,235 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:50,235 EPOCH 10 done: loss 0.0936 - lr 0.0200000\n",
-      "2021-09-08 02:11:50,471 DEV : loss 0.4766509532928467 - score 0.5\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:11:50,472 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:11:54,656 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:11:54,657 Testing using best model ...\n",
-      "2021-09-08 02:11:54,658 loading file None/best-model.pt\n",
+      "2021-09-21 22:09:29,049 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:29,810 epoch 7 - iter 13/130 - loss 0.67036234 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 22:09:30,468 epoch 7 - iter 26/130 - loss 0.65112663 - samples/sec: 19.78 - lr: 0.020000\n",
+      "2021-09-21 22:09:31,040 epoch 7 - iter 39/130 - loss 0.64996853 - samples/sec: 22.80 - lr: 0.020000\n",
+      "2021-09-21 22:09:31,596 epoch 7 - iter 52/130 - loss 0.65562327 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 22:09:32,150 epoch 7 - iter 65/130 - loss 0.65562178 - samples/sec: 23.50 - lr: 0.020000\n",
+      "2021-09-21 22:09:32,703 epoch 7 - iter 78/130 - loss 0.65475236 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 22:09:33,260 epoch 7 - iter 91/130 - loss 0.65484584 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 22:09:33,812 epoch 7 - iter 104/130 - loss 0.65288943 - samples/sec: 23.60 - lr: 0.020000\n",
+      "2021-09-21 22:09:34,378 epoch 7 - iter 117/130 - loss 0.65131576 - samples/sec: 22.99 - lr: 0.020000\n",
+      "2021-09-21 22:09:35,104 epoch 7 - iter 130/130 - loss 0.64974729 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 22:09:35,106 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:35,106 EPOCH 7 done: loss 0.6497 - lr 0.0200000\n",
+      "2021-09-21 22:09:35,721 DEV : loss 0.5586951971054077 - score 0.0\n",
+      "2021-09-21 22:09:35,722 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:09:35,724 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:36,499 epoch 8 - iter 13/130 - loss 0.67020435 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 22:09:37,247 epoch 8 - iter 26/130 - loss 0.66289168 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 22:09:38,013 epoch 8 - iter 39/130 - loss 0.65398946 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 22:09:38,758 epoch 8 - iter 52/130 - loss 0.64435709 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 22:09:39,501 epoch 8 - iter 65/130 - loss 0.64478426 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 22:09:40,252 epoch 8 - iter 78/130 - loss 0.64051892 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 22:09:41,021 epoch 8 - iter 91/130 - loss 0.63671171 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 22:09:41,749 epoch 8 - iter 104/130 - loss 0.64127716 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 22:09:42,513 epoch 8 - iter 117/130 - loss 0.63991925 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 22:09:43,245 epoch 8 - iter 130/130 - loss 0.63859806 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 22:09:43,247 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:43,247 EPOCH 8 done: loss 0.6386 - lr 0.0200000\n",
+      "2021-09-21 22:09:43,863 DEV : loss 0.4506939947605133 - score 0.0714\n",
+      "2021-09-21 22:09:43,864 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:09:43,866 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:44,643 epoch 9 - iter 13/130 - loss 0.64338876 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 22:09:45,366 epoch 9 - iter 26/130 - loss 0.66629292 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 22:09:46,106 epoch 9 - iter 39/130 - loss 0.66362383 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 22:09:46,870 epoch 9 - iter 52/130 - loss 0.65960856 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 22:09:47,620 epoch 9 - iter 65/130 - loss 0.65925449 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 22:09:48,366 epoch 9 - iter 78/130 - loss 0.65804763 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 22:09:49,116 epoch 9 - iter 91/130 - loss 0.65671477 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 22:09:49,860 epoch 9 - iter 104/130 - loss 0.65433052 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 22:09:50,574 epoch 9 - iter 117/130 - loss 0.65695013 - samples/sec: 18.21 - lr: 0.020000\n",
+      "2021-09-21 22:09:51,333 epoch 9 - iter 130/130 - loss 0.65766783 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 22:09:51,334 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:51,335 EPOCH 9 done: loss 0.6577 - lr 0.0200000\n",
+      "2021-09-21 22:09:51,932 DEV : loss 0.46302592754364014 - score 0.1429\n",
+      "2021-09-21 22:09:51,932 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:09:56,004 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:09:56,800 epoch 10 - iter 13/130 - loss 0.65358609 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 22:09:57,545 epoch 10 - iter 26/130 - loss 0.65655773 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 22:09:58,312 epoch 10 - iter 39/130 - loss 0.65376800 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 22:09:59,040 epoch 10 - iter 52/130 - loss 0.65492203 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 22:09:59,802 epoch 10 - iter 65/130 - loss 0.65301962 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 22:10:00,567 epoch 10 - iter 78/130 - loss 0.65575304 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 22:10:01,303 epoch 10 - iter 91/130 - loss 0.65275368 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 22:10:02,070 epoch 10 - iter 104/130 - loss 0.65018374 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 22:10:02,796 epoch 10 - iter 117/130 - loss 0.64991357 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 22:10:03,567 epoch 10 - iter 130/130 - loss 0.64833770 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 22:10:03,568 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:03,568 EPOCH 10 done: loss 0.6483 - lr 0.0200000\n",
+      "2021-09-21 22:10:04,176 DEV : loss 0.42456677556037903 - score 0.0714\n",
+      "2021-09-21 22:10:04,177 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:10:08,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:08,262 Testing using best model ...\n",
+      "2021-09-21 22:10:08,263 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:11:58,826 \t0.75\n",
-      "2021-09-08 02:11:58,827 \n",
+      "2021-09-21 22:10:13,737 \t0.125\n",
+      "2021-09-21 22:10:13,738 \n",
       "Results:\n",
-      "- F-score (micro) 0.75\n",
-      "- F-score (macro) 0.3729\n",
-      "- Accuracy 0.75\n",
+      "- F-score (micro) 0.125\n",
+      "- F-score (macro) 0.025\n",
+      "- Accuracy 0.125\n",
       "\n",
       "By class:\n",
       "                                               precision    recall  f1-score   support\n",
       "\n",
-      "                    this text is about travel     0.5000    1.0000    0.6667         1\n",
-      "                this text is about technology     1.0000    1.0000    1.0000         1\n",
-      "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about women     1.0000    0.6667    0.8000         3\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         1\n",
-      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about fashion     1.0000    1.0000    1.0000         2\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about science     0.5000    0.5000    0.5000         2\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         1\n",
-      "                     this text is about crime     1.0000    1.0000    1.0000         2\n",
-      "                  this text is about religion     1.0000    1.0000    1.0000         3\n",
-      "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
+      "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
+      "                this text is about technology     0.0000    0.0000    0.0000         1\n",
+      "                  this text is about wellness     0.0000    0.0000    0.0000         1\n",
+      "                     this text is about women     0.0000    0.0000    0.0000         1\n",
+      "                   this text is about parents     0.0000    0.0000    0.0000         1\n",
+      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about weddings     0.0000    0.0000    0.0000         1\n",
+      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
+      "this text is about entertainmen,the attention     0.2500    1.0000    0.4000         2\n",
+      "                   this text is about science     0.0000    0.0000    0.0000         2\n",
+      "                   this text is about divorce     0.0000    0.0000    0.0000         2\n",
+      "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about religion     0.0000    0.0000    0.0000         1\n",
+      "                    this text is about sports     0.0000    0.0000    0.0000         1\n",
+      "                  this text is about politics     0.0000    0.0000    0.0000         2\n",
+      "                    this text is about comedy     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                    micro avg     0.7500    0.7500    0.7500        16\n",
-      "                                    macro avg     0.3750    0.3854    0.3729        16\n",
-      "                                 weighted avg     0.7812    0.7500    0.7542        16\n",
-      "                                  samples avg     0.7500    0.7500    0.7500        16\n",
+      "                                    micro avg     0.1250    0.1250    0.1250        16\n",
+      "                                    macro avg     0.0156    0.0625    0.0250        16\n",
+      "                                 weighted avg     0.0312    0.1250    0.0500        16\n",
+      "                                  samples avg     0.1250    0.1250    0.1250        16\n",
       "\n"
      ]
     },
@@ -5231,26 +5223,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:11:58,827 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:08,598 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:10:13,738 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:25,231 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:12:12,502 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:10:29,255 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 71189.07it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 65725.92it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:12:12,506 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
-      "2021-09-08 02:12:12,525 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:12,527 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:10:29,259 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy']\n",
+      "2021-09-21 22:10:29,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:29,269 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5563,25 +5555,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:12:12,528 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:12,528 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:12:12,528 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:12,529 Parameters:\n",
-      "2021-09-08 02:12:12,529  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:12:12,529  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:12:12,530  - patience: \"3\"\n",
-      "2021-09-08 02:12:12,530  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:12:12,530  - max_epochs: \"10\"\n",
-      "2021-09-08 02:12:12,530  - shuffle: \"True\"\n",
-      "2021-09-08 02:12:12,531  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:12:12,531  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:12:12,531 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:12,532 Model training base path: \"None\"\n",
-      "2021-09-08 02:12:12,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:12,532 Device: cuda:0\n",
-      "2021-09-08 02:12:12,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:12,533 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:12:12,538 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:10:29,270 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:29,270 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:10:29,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:29,271 Parameters:\n",
+      "2021-09-21 22:10:29,271  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:10:29,271  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:10:29,272  - patience: \"3\"\n",
+      "2021-09-21 22:10:29,272  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:10:29,272  - max_epochs: \"10\"\n",
+      "2021-09-21 22:10:29,273  - shuffle: \"True\"\n",
+      "2021-09-21 22:10:29,273  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:10:29,273  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:10:29,273 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:29,274 Model training base path: \"None\"\n",
+      "2021-09-21 22:10:29,274 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:29,274 Device: cuda:0\n",
+      "2021-09-21 22:10:29,275 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:29,275 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:10:29,282 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -5595,171 +5587,170 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:12:13,264 epoch 1 - iter 13/130 - loss 0.39542704 - samples/sec: 18.43 - lr: 0.020000\n",
-      "2021-09-08 02:12:14,138 epoch 1 - iter 26/130 - loss 0.52381338 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 02:12:14,850 epoch 1 - iter 39/130 - loss 0.52884701 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 02:12:15,573 epoch 1 - iter 52/130 - loss 0.47842344 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 02:12:16,281 epoch 1 - iter 65/130 - loss 0.53853711 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 02:12:17,010 epoch 1 - iter 78/130 - loss 0.54295626 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 02:12:17,729 epoch 1 - iter 91/130 - loss 0.57832739 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 02:12:18,455 epoch 1 - iter 104/130 - loss 0.57022092 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 02:12:19,160 epoch 1 - iter 117/130 - loss 0.54817042 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 02:12:19,875 epoch 1 - iter 130/130 - loss 0.55201675 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 02:12:19,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:19,877 EPOCH 1 done: loss 0.5520 - lr 0.0200000\n",
-      "2021-09-08 02:12:20,147 DEV : loss 0.6090412139892578 - score 0.1429\n",
-      "2021-09-08 02:12:20,147 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:12:24,124 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:24,922 epoch 2 - iter 13/130 - loss 0.65877315 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 02:12:25,651 epoch 2 - iter 26/130 - loss 0.63998156 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 02:12:26,372 epoch 2 - iter 39/130 - loss 0.56679508 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 02:12:27,098 epoch 2 - iter 52/130 - loss 0.61557938 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 02:12:27,817 epoch 2 - iter 65/130 - loss 0.57710154 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 02:12:28,548 epoch 2 - iter 78/130 - loss 0.64028858 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 02:12:29,263 epoch 2 - iter 91/130 - loss 0.61358918 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 02:12:29,991 epoch 2 - iter 104/130 - loss 0.61730409 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 02:12:30,710 epoch 2 - iter 117/130 - loss 0.60500606 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 02:12:31,442 epoch 2 - iter 130/130 - loss 0.60084809 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 02:12:31,444 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:31,444 EPOCH 2 done: loss 0.6008 - lr 0.0200000\n",
-      "2021-09-08 02:12:31,715 DEV : loss 0.3442552387714386 - score 0.5\n",
-      "2021-09-08 02:12:31,716 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:10:30,119 epoch 1 - iter 13/130 - loss 0.36476676 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 22:10:30,953 epoch 1 - iter 26/130 - loss 0.36708248 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 22:10:31,767 epoch 1 - iter 39/130 - loss 0.50843229 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 22:10:32,521 epoch 1 - iter 52/130 - loss 0.48989737 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 22:10:33,326 epoch 1 - iter 65/130 - loss 0.51622877 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 22:10:34,139 epoch 1 - iter 78/130 - loss 0.51950660 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 22:10:34,906 epoch 1 - iter 91/130 - loss 0.55082733 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 22:10:35,672 epoch 1 - iter 104/130 - loss 0.51522305 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 22:10:36,477 epoch 1 - iter 117/130 - loss 0.51679781 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 22:10:37,316 epoch 1 - iter 130/130 - loss 0.53797488 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 22:10:37,317 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:37,317 EPOCH 1 done: loss 0.5380 - lr 0.0200000\n",
+      "2021-09-21 22:10:37,991 DEV : loss 0.3266531229019165 - score 0.0714\n",
+      "2021-09-21 22:10:37,992 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:12:35,915 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:36,701 epoch 3 - iter 13/130 - loss 0.28090777 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 02:12:37,411 epoch 3 - iter 26/130 - loss 0.39698690 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 02:12:38,119 epoch 3 - iter 39/130 - loss 0.46397763 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 02:12:38,831 epoch 3 - iter 52/130 - loss 0.44100758 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 02:12:39,544 epoch 3 - iter 65/130 - loss 0.49695367 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 02:12:40,263 epoch 3 - iter 78/130 - loss 0.47151271 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 02:12:40,961 epoch 3 - iter 91/130 - loss 0.43317249 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 02:12:41,690 epoch 3 - iter 104/130 - loss 0.45515189 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 02:12:42,395 epoch 3 - iter 117/130 - loss 0.42871990 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 02:12:43,098 epoch 3 - iter 130/130 - loss 0.41261644 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 02:12:43,100 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:43,100 EPOCH 3 done: loss 0.4126 - lr 0.0200000\n",
-      "2021-09-08 02:12:43,369 DEV : loss 0.8053849935531616 - score 0.2857\n",
-      "2021-09-08 02:12:43,370 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:12:43,372 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:44,083 epoch 4 - iter 13/130 - loss 0.23764843 - samples/sec: 18.78 - lr: 0.020000\n",
-      "2021-09-08 02:12:44,785 epoch 4 - iter 26/130 - loss 0.30592384 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 02:12:45,485 epoch 4 - iter 39/130 - loss 0.35920555 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 02:12:46,182 epoch 4 - iter 52/130 - loss 0.37541829 - samples/sec: 18.65 - lr: 0.020000\n",
-      "2021-09-08 02:12:46,896 epoch 4 - iter 65/130 - loss 0.36111128 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 02:12:47,608 epoch 4 - iter 78/130 - loss 0.38377403 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 02:12:48,301 epoch 4 - iter 91/130 - loss 0.37412619 - samples/sec: 18.78 - lr: 0.020000\n",
-      "2021-09-08 02:12:49,019 epoch 4 - iter 104/130 - loss 0.39389356 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:12:49,726 epoch 4 - iter 117/130 - loss 0.37679325 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 02:12:50,442 epoch 4 - iter 130/130 - loss 0.38870063 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 02:12:50,443 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:50,443 EPOCH 4 done: loss 0.3887 - lr 0.0200000\n",
-      "2021-09-08 02:12:50,712 DEV : loss 0.47934478521347046 - score 0.2857\n",
-      "2021-09-08 02:12:50,713 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:12:50,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:51,434 epoch 5 - iter 13/130 - loss 0.04621922 - samples/sec: 18.59 - lr: 0.020000\n",
-      "2021-09-08 02:12:52,128 epoch 5 - iter 26/130 - loss 0.04939766 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 02:12:52,827 epoch 5 - iter 39/130 - loss 0.16151325 - samples/sec: 18.62 - lr: 0.020000\n",
-      "2021-09-08 02:12:53,509 epoch 5 - iter 52/130 - loss 0.19906052 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 02:12:54,206 epoch 5 - iter 65/130 - loss 0.28145738 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 02:12:54,913 epoch 5 - iter 78/130 - loss 0.33597582 - samples/sec: 18.41 - lr: 0.020000\n",
-      "2021-09-08 02:12:55,616 epoch 5 - iter 91/130 - loss 0.35823182 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 02:12:56,316 epoch 5 - iter 104/130 - loss 0.35606512 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 02:12:57,007 epoch 5 - iter 117/130 - loss 0.33510102 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 02:12:57,706 epoch 5 - iter 130/130 - loss 0.33265199 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 02:12:57,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:57,708 EPOCH 5 done: loss 0.3327 - lr 0.0200000\n",
-      "2021-09-08 02:12:57,992 DEV : loss 0.32544660568237305 - score 0.4286\n",
-      "2021-09-08 02:12:57,993 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:12:57,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:12:58,704 epoch 6 - iter 13/130 - loss 0.19871358 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 02:12:59,395 epoch 6 - iter 26/130 - loss 0.12812310 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 02:13:00,080 epoch 6 - iter 39/130 - loss 0.16120524 - samples/sec: 19.00 - lr: 0.020000\n",
-      "2021-09-08 02:13:00,779 epoch 6 - iter 52/130 - loss 0.21742352 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 02:13:01,463 epoch 6 - iter 65/130 - loss 0.19342408 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 02:13:02,075 epoch 6 - iter 78/130 - loss 0.24365253 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 02:13:02,671 epoch 6 - iter 91/130 - loss 0.27641597 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 02:13:03,261 epoch 6 - iter 104/130 - loss 0.29039195 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 02:13:03,856 epoch 6 - iter 117/130 - loss 0.26286431 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 02:13:04,435 epoch 6 - iter 130/130 - loss 0.23788774 - samples/sec: 22.49 - lr: 0.020000\n"
+      "2021-09-21 22:10:42,034 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:42,865 epoch 2 - iter 13/130 - loss 0.66364731 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 22:10:43,623 epoch 2 - iter 26/130 - loss 0.70818664 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 22:10:44,406 epoch 2 - iter 39/130 - loss 0.71238092 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 22:10:45,180 epoch 2 - iter 52/130 - loss 0.70405036 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 22:10:45,958 epoch 2 - iter 65/130 - loss 0.69238351 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 22:10:46,649 epoch 2 - iter 78/130 - loss 0.67776509 - samples/sec: 18.83 - lr: 0.020000\n",
+      "2021-09-21 22:10:47,219 epoch 2 - iter 91/130 - loss 0.67998377 - samples/sec: 22.82 - lr: 0.020000\n",
+      "2021-09-21 22:10:47,796 epoch 2 - iter 104/130 - loss 0.68075326 - samples/sec: 22.57 - lr: 0.020000\n",
+      "2021-09-21 22:10:48,347 epoch 2 - iter 117/130 - loss 0.68083404 - samples/sec: 23.63 - lr: 0.020000\n",
+      "2021-09-21 22:10:48,895 epoch 2 - iter 130/130 - loss 0.67929734 - samples/sec: 23.75 - lr: 0.020000\n",
+      "2021-09-21 22:10:48,896 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:48,897 EPOCH 2 done: loss 0.6793 - lr 0.0200000\n",
+      "2021-09-21 22:10:49,128 DEV : loss 0.38105446100234985 - score 0.0\n",
+      "2021-09-21 22:10:49,129 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:10:49,131 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:49,696 epoch 3 - iter 13/130 - loss 0.64618589 - samples/sec: 23.67 - lr: 0.020000\n",
+      "2021-09-21 22:10:50,251 epoch 3 - iter 26/130 - loss 0.64513771 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 22:10:50,805 epoch 3 - iter 39/130 - loss 0.65458414 - samples/sec: 23.48 - lr: 0.020000\n",
+      "2021-09-21 22:10:51,383 epoch 3 - iter 52/130 - loss 0.65376503 - samples/sec: 22.52 - lr: 0.020000\n",
+      "2021-09-21 22:10:52,091 epoch 3 - iter 65/130 - loss 0.64436000 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 22:10:52,818 epoch 3 - iter 78/130 - loss 0.64579508 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 22:10:53,557 epoch 3 - iter 91/130 - loss 0.64831301 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 22:10:54,308 epoch 3 - iter 104/130 - loss 0.64810701 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 22:10:55,048 epoch 3 - iter 117/130 - loss 0.64860683 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 22:10:55,817 epoch 3 - iter 130/130 - loss 0.64752222 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 22:10:55,819 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:55,819 EPOCH 3 done: loss 0.6475 - lr 0.0200000\n",
+      "2021-09-21 22:10:56,353 DEV : loss 0.4421682357788086 - score 0.0714\n",
+      "2021-09-21 22:10:56,355 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:10:56,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:10:57,202 epoch 4 - iter 13/130 - loss 0.69749306 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 22:10:57,852 epoch 4 - iter 26/130 - loss 0.67496524 - samples/sec: 20.01 - lr: 0.020000\n",
+      "2021-09-21 22:10:58,427 epoch 4 - iter 39/130 - loss 0.66449051 - samples/sec: 22.65 - lr: 0.020000\n",
+      "2021-09-21 22:10:58,983 epoch 4 - iter 52/130 - loss 0.65474909 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 22:10:59,536 epoch 4 - iter 65/130 - loss 0.64956596 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 22:11:00,113 epoch 4 - iter 78/130 - loss 0.64594841 - samples/sec: 22.60 - lr: 0.020000\n",
+      "2021-09-21 22:11:00,675 epoch 4 - iter 91/130 - loss 0.64863793 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 22:11:01,314 epoch 4 - iter 104/130 - loss 0.64796709 - samples/sec: 20.38 - lr: 0.020000\n",
+      "2021-09-21 22:11:02,059 epoch 4 - iter 117/130 - loss 0.64751372 - samples/sec: 17.48 - lr: 0.020000\n",
+      "2021-09-21 22:11:02,835 epoch 4 - iter 130/130 - loss 0.64795061 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 22:11:02,836 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:02,837 EPOCH 4 done: loss 0.6480 - lr 0.0200000\n",
+      "2021-09-21 22:11:03,438 DEV : loss 0.4234341084957123 - score 0.0\n",
+      "2021-09-21 22:11:03,439 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:11:03,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:04,242 epoch 5 - iter 13/130 - loss 0.66180506 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 22:11:04,977 epoch 5 - iter 26/130 - loss 0.66263700 - samples/sec: 17.72 - lr: 0.020000\n",
+      "2021-09-21 22:11:05,698 epoch 5 - iter 39/130 - loss 0.65849738 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 22:11:06,466 epoch 5 - iter 52/130 - loss 0.66209830 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 22:11:07,199 epoch 5 - iter 65/130 - loss 0.65989452 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 22:11:07,960 epoch 5 - iter 78/130 - loss 0.66143698 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 22:11:08,698 epoch 5 - iter 91/130 - loss 0.65725082 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 22:11:09,395 epoch 5 - iter 104/130 - loss 0.65528837 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 22:11:10,146 epoch 5 - iter 117/130 - loss 0.65362990 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 22:11:10,884 epoch 5 - iter 130/130 - loss 0.65479845 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 22:11:10,886 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:10,886 EPOCH 5 done: loss 0.6548 - lr 0.0200000\n",
+      "2021-09-21 22:11:11,401 DEV : loss 0.4192907214164734 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:11:11,402 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:11:11,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:12,230 epoch 6 - iter 13/130 - loss 0.66364571 - samples/sec: 16.51 - lr: 0.010000\n",
+      "2021-09-21 22:11:12,987 epoch 6 - iter 26/130 - loss 0.65673702 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 22:11:13,747 epoch 6 - iter 39/130 - loss 0.65612748 - samples/sec: 17.13 - lr: 0.010000\n",
+      "2021-09-21 22:11:14,468 epoch 6 - iter 52/130 - loss 0.64596127 - samples/sec: 18.04 - lr: 0.010000\n",
+      "2021-09-21 22:11:15,218 epoch 6 - iter 65/130 - loss 0.64271040 - samples/sec: 17.34 - lr: 0.010000\n",
+      "2021-09-21 22:11:15,958 epoch 6 - iter 78/130 - loss 0.63847419 - samples/sec: 17.61 - lr: 0.010000\n",
+      "2021-09-21 22:11:16,709 epoch 6 - iter 91/130 - loss 0.63742872 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 22:11:17,464 epoch 6 - iter 104/130 - loss 0.63682563 - samples/sec: 17.23 - lr: 0.010000\n",
+      "2021-09-21 22:11:18,221 epoch 6 - iter 117/130 - loss 0.63892147 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 22:11:18,974 epoch 6 - iter 130/130 - loss 0.63704422 - samples/sec: 17.28 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:13:04,436 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:04,436 EPOCH 6 done: loss 0.2379 - lr 0.0200000\n",
-      "2021-09-08 02:13:04,674 DEV : loss 0.4103996157646179 - score 0.4286\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:13:04,675 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:13:04,678 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:05,269 epoch 7 - iter 13/130 - loss 0.16017683 - samples/sec: 22.64 - lr: 0.010000\n",
-      "2021-09-08 02:13:05,844 epoch 7 - iter 26/130 - loss 0.12642350 - samples/sec: 22.63 - lr: 0.010000\n",
-      "2021-09-08 02:13:06,420 epoch 7 - iter 39/130 - loss 0.14862587 - samples/sec: 22.60 - lr: 0.010000\n",
-      "2021-09-08 02:13:06,988 epoch 7 - iter 52/130 - loss 0.11194959 - samples/sec: 22.88 - lr: 0.010000\n",
-      "2021-09-08 02:13:07,566 epoch 7 - iter 65/130 - loss 0.11684006 - samples/sec: 22.54 - lr: 0.010000\n",
-      "2021-09-08 02:13:08,136 epoch 7 - iter 78/130 - loss 0.09767181 - samples/sec: 22.84 - lr: 0.010000\n",
-      "2021-09-08 02:13:08,728 epoch 7 - iter 91/130 - loss 0.12696924 - samples/sec: 22.00 - lr: 0.010000\n",
-      "2021-09-08 02:13:09,311 epoch 7 - iter 104/130 - loss 0.13648024 - samples/sec: 22.31 - lr: 0.010000\n",
-      "2021-09-08 02:13:09,892 epoch 7 - iter 117/130 - loss 0.13999378 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 02:13:10,471 epoch 7 - iter 130/130 - loss 0.14523535 - samples/sec: 22.48 - lr: 0.010000\n",
-      "2021-09-08 02:13:10,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:10,472 EPOCH 7 done: loss 0.1452 - lr 0.0100000\n",
-      "2021-09-08 02:13:10,732 DEV : loss 0.4054662585258484 - score 0.5\n",
-      "2021-09-08 02:13:10,733 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:13:10,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:11,322 epoch 8 - iter 13/130 - loss 0.00251137 - samples/sec: 22.82 - lr: 0.010000\n",
-      "2021-09-08 02:13:11,897 epoch 8 - iter 26/130 - loss 0.07023994 - samples/sec: 22.62 - lr: 0.010000\n",
-      "2021-09-08 02:13:12,478 epoch 8 - iter 39/130 - loss 0.16983606 - samples/sec: 22.41 - lr: 0.010000\n",
-      "2021-09-08 02:13:13,054 epoch 8 - iter 52/130 - loss 0.15857195 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 02:13:13,655 epoch 8 - iter 65/130 - loss 0.13645745 - samples/sec: 21.69 - lr: 0.010000\n",
-      "2021-09-08 02:13:14,258 epoch 8 - iter 78/130 - loss 0.13418315 - samples/sec: 21.60 - lr: 0.010000\n",
-      "2021-09-08 02:13:14,830 epoch 8 - iter 91/130 - loss 0.11573869 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 02:13:15,400 epoch 8 - iter 104/130 - loss 0.10153736 - samples/sec: 22.81 - lr: 0.010000\n",
-      "2021-09-08 02:13:15,973 epoch 8 - iter 117/130 - loss 0.09187991 - samples/sec: 22.72 - lr: 0.010000\n",
-      "2021-09-08 02:13:16,547 epoch 8 - iter 130/130 - loss 0.09745779 - samples/sec: 22.67 - lr: 0.010000\n",
-      "2021-09-08 02:13:16,548 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:16,548 EPOCH 8 done: loss 0.0975 - lr 0.0100000\n",
-      "2021-09-08 02:13:17,444 DEV : loss 0.502996563911438 - score 0.3571\n",
-      "2021-09-08 02:13:17,445 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:13:17,447 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:18,044 epoch 9 - iter 13/130 - loss 0.27218428 - samples/sec: 22.40 - lr: 0.010000\n",
-      "2021-09-08 02:13:18,642 epoch 9 - iter 26/130 - loss 0.13700894 - samples/sec: 21.77 - lr: 0.010000\n",
-      "2021-09-08 02:13:19,214 epoch 9 - iter 39/130 - loss 0.09249927 - samples/sec: 22.74 - lr: 0.010000\n",
-      "2021-09-08 02:13:19,793 epoch 9 - iter 52/130 - loss 0.11239775 - samples/sec: 22.48 - lr: 0.010000\n",
-      "2021-09-08 02:13:20,364 epoch 9 - iter 65/130 - loss 0.09973352 - samples/sec: 22.79 - lr: 0.010000\n",
-      "2021-09-08 02:13:20,942 epoch 9 - iter 78/130 - loss 0.11624354 - samples/sec: 22.54 - lr: 0.010000\n",
-      "2021-09-08 02:13:21,524 epoch 9 - iter 91/130 - loss 0.10007046 - samples/sec: 22.37 - lr: 0.010000\n",
-      "2021-09-08 02:13:22,117 epoch 9 - iter 104/130 - loss 0.08838287 - samples/sec: 21.95 - lr: 0.010000\n",
-      "2021-09-08 02:13:22,692 epoch 9 - iter 117/130 - loss 0.09453644 - samples/sec: 22.65 - lr: 0.010000\n",
-      "2021-09-08 02:13:23,267 epoch 9 - iter 130/130 - loss 0.09576977 - samples/sec: 22.63 - lr: 0.010000\n",
-      "2021-09-08 02:13:23,268 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:23,269 EPOCH 9 done: loss 0.0958 - lr 0.0100000\n",
-      "2021-09-08 02:13:23,507 DEV : loss 0.4516597092151642 - score 0.3571\n",
-      "2021-09-08 02:13:23,508 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:13:23,510 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:24,097 epoch 10 - iter 13/130 - loss 0.00188244 - samples/sec: 22.78 - lr: 0.010000\n",
-      "2021-09-08 02:13:24,669 epoch 10 - iter 26/130 - loss 0.00788171 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 02:13:25,255 epoch 10 - iter 39/130 - loss 0.07976428 - samples/sec: 22.20 - lr: 0.010000\n",
-      "2021-09-08 02:13:25,825 epoch 10 - iter 52/130 - loss 0.06030168 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 02:13:26,404 epoch 10 - iter 65/130 - loss 0.05168447 - samples/sec: 22.47 - lr: 0.010000\n",
-      "2021-09-08 02:13:26,974 epoch 10 - iter 78/130 - loss 0.04322950 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 02:13:27,552 epoch 10 - iter 91/130 - loss 0.06879526 - samples/sec: 22.50 - lr: 0.010000\n",
-      "2021-09-08 02:13:28,128 epoch 10 - iter 104/130 - loss 0.08510047 - samples/sec: 22.59 - lr: 0.010000\n",
-      "2021-09-08 02:13:28,707 epoch 10 - iter 117/130 - loss 0.07584101 - samples/sec: 22.51 - lr: 0.010000\n",
-      "2021-09-08 02:13:29,283 epoch 10 - iter 130/130 - loss 0.07956747 - samples/sec: 22.61 - lr: 0.010000\n",
-      "2021-09-08 02:13:29,284 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:29,284 EPOCH 10 done: loss 0.0796 - lr 0.0100000\n",
-      "2021-09-08 02:13:29,520 DEV : loss 0.41482943296432495 - score 0.4286\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 02:13:29,521 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:13:33,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:13:33,704 Testing using best model ...\n",
-      "2021-09-08 02:13:33,705 loading file None/best-model.pt\n",
+      "2021-09-21 22:11:18,975 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:18,976 EPOCH 6 done: loss 0.6370 - lr 0.0100000\n",
+      "2021-09-21 22:11:19,474 DEV : loss 0.4613257050514221 - score 0.0\n",
+      "2021-09-21 22:11:19,475 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:11:19,477 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:20,259 epoch 7 - iter 13/130 - loss 0.63679948 - samples/sec: 17.24 - lr: 0.010000\n",
+      "2021-09-21 22:11:21,028 epoch 7 - iter 26/130 - loss 0.64291432 - samples/sec: 16.92 - lr: 0.010000\n",
+      "2021-09-21 22:11:21,792 epoch 7 - iter 39/130 - loss 0.64088676 - samples/sec: 17.03 - lr: 0.010000\n",
+      "2021-09-21 22:11:22,552 epoch 7 - iter 52/130 - loss 0.63694678 - samples/sec: 17.14 - lr: 0.010000\n",
+      "2021-09-21 22:11:23,281 epoch 7 - iter 65/130 - loss 0.63574478 - samples/sec: 17.84 - lr: 0.010000\n",
+      "2021-09-21 22:11:24,007 epoch 7 - iter 78/130 - loss 0.64015078 - samples/sec: 17.92 - lr: 0.010000\n",
+      "2021-09-21 22:11:24,783 epoch 7 - iter 91/130 - loss 0.63372835 - samples/sec: 16.77 - lr: 0.010000\n",
+      "2021-09-21 22:11:25,526 epoch 7 - iter 104/130 - loss 0.63386477 - samples/sec: 17.52 - lr: 0.010000\n",
+      "2021-09-21 22:11:26,126 epoch 7 - iter 117/130 - loss 0.63699266 - samples/sec: 21.70 - lr: 0.010000\n",
+      "2021-09-21 22:11:26,684 epoch 7 - iter 130/130 - loss 0.64040975 - samples/sec: 23.32 - lr: 0.010000\n",
+      "2021-09-21 22:11:26,686 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:26,686 EPOCH 7 done: loss 0.6404 - lr 0.0100000\n",
+      "2021-09-21 22:11:27,015 DEV : loss 0.46512478590011597 - score 0.0\n",
+      "2021-09-21 22:11:27,016 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:11:27,091 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:27,669 epoch 8 - iter 13/130 - loss 0.62414341 - samples/sec: 23.29 - lr: 0.010000\n",
+      "2021-09-21 22:11:28,226 epoch 8 - iter 26/130 - loss 0.61598126 - samples/sec: 23.36 - lr: 0.010000\n",
+      "2021-09-21 22:11:28,783 epoch 8 - iter 39/130 - loss 0.62656523 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 22:11:29,338 epoch 8 - iter 52/130 - loss 0.64218640 - samples/sec: 23.44 - lr: 0.010000\n",
+      "2021-09-21 22:11:30,003 epoch 8 - iter 65/130 - loss 0.64750095 - samples/sec: 19.58 - lr: 0.010000\n",
+      "2021-09-21 22:11:30,745 epoch 8 - iter 78/130 - loss 0.65231478 - samples/sec: 17.52 - lr: 0.010000\n",
+      "2021-09-21 22:11:31,500 epoch 8 - iter 91/130 - loss 0.65467211 - samples/sec: 17.23 - lr: 0.010000\n",
+      "2021-09-21 22:11:32,237 epoch 8 - iter 104/130 - loss 0.65215178 - samples/sec: 17.67 - lr: 0.010000\n",
+      "2021-09-21 22:11:32,963 epoch 8 - iter 117/130 - loss 0.65071669 - samples/sec: 17.94 - lr: 0.010000\n",
+      "2021-09-21 22:11:33,714 epoch 8 - iter 130/130 - loss 0.65044260 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 22:11:33,715 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:33,715 EPOCH 8 done: loss 0.6504 - lr 0.0100000\n",
+      "2021-09-21 22:11:34,274 DEV : loss 0.42444488406181335 - score 0.0714\n",
+      "2021-09-21 22:11:34,276 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:11:34,278 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:35,019 epoch 9 - iter 13/130 - loss 0.65045426 - samples/sec: 18.22 - lr: 0.010000\n",
+      "2021-09-21 22:11:35,787 epoch 9 - iter 26/130 - loss 0.64281982 - samples/sec: 16.93 - lr: 0.010000\n",
+      "2021-09-21 22:11:36,383 epoch 9 - iter 39/130 - loss 0.63422748 - samples/sec: 21.84 - lr: 0.010000\n",
+      "2021-09-21 22:11:36,969 epoch 9 - iter 52/130 - loss 0.64123433 - samples/sec: 22.21 - lr: 0.010000\n",
+      "2021-09-21 22:11:37,531 epoch 9 - iter 65/130 - loss 0.64347457 - samples/sec: 23.18 - lr: 0.010000\n",
+      "2021-09-21 22:11:38,092 epoch 9 - iter 78/130 - loss 0.64035602 - samples/sec: 23.21 - lr: 0.010000\n",
+      "2021-09-21 22:11:38,655 epoch 9 - iter 91/130 - loss 0.64182112 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 22:11:39,215 epoch 9 - iter 104/130 - loss 0.64125262 - samples/sec: 23.26 - lr: 0.010000\n",
+      "2021-09-21 22:11:39,768 epoch 9 - iter 117/130 - loss 0.64357804 - samples/sec: 23.55 - lr: 0.010000\n",
+      "2021-09-21 22:11:40,582 epoch 9 - iter 130/130 - loss 0.64342102 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 22:11:40,583 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:40,584 EPOCH 9 done: loss 0.6434 - lr 0.0100000\n",
+      "2021-09-21 22:11:41,113 DEV : loss 0.4535703659057617 - score 0.0714\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 22:11:41,115 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:11:41,119 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:41,895 epoch 10 - iter 13/130 - loss 0.63725265 - samples/sec: 17.57 - lr: 0.005000\n",
+      "2021-09-21 22:11:42,653 epoch 10 - iter 26/130 - loss 0.63464297 - samples/sec: 17.18 - lr: 0.005000\n",
+      "2021-09-21 22:11:43,413 epoch 10 - iter 39/130 - loss 0.63410456 - samples/sec: 17.13 - lr: 0.005000\n",
+      "2021-09-21 22:11:44,136 epoch 10 - iter 52/130 - loss 0.63794264 - samples/sec: 17.99 - lr: 0.005000\n",
+      "2021-09-21 22:11:44,896 epoch 10 - iter 65/130 - loss 0.63806583 - samples/sec: 17.12 - lr: 0.005000\n",
+      "2021-09-21 22:11:45,651 epoch 10 - iter 78/130 - loss 0.63882884 - samples/sec: 17.23 - lr: 0.005000\n",
+      "2021-09-21 22:11:46,384 epoch 10 - iter 91/130 - loss 0.64087068 - samples/sec: 17.77 - lr: 0.005000\n",
+      "2021-09-21 22:11:47,150 epoch 10 - iter 104/130 - loss 0.64059345 - samples/sec: 17.00 - lr: 0.005000\n",
+      "2021-09-21 22:11:47,906 epoch 10 - iter 117/130 - loss 0.64008367 - samples/sec: 17.21 - lr: 0.005000\n",
+      "2021-09-21 22:11:48,676 epoch 10 - iter 130/130 - loss 0.63969715 - samples/sec: 16.88 - lr: 0.005000\n",
+      "2021-09-21 22:11:48,678 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:48,678 EPOCH 10 done: loss 0.6397 - lr 0.0050000\n",
+      "2021-09-21 22:11:49,169 DEV : loss 0.44200319051742554 - score 0.0\n",
+      "2021-09-21 22:11:49,169 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:11:53,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:11:53,203 Testing using best model ...\n",
+      "2021-09-21 22:11:53,204 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:13:37,885 \t0.0\n",
-      "2021-09-08 02:13:37,886 \n",
+      "2021-09-21 22:11:58,697 \t0.0\n",
+      "2021-09-21 22:11:58,698 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -5789,8 +5780,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:13:37,886 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.5886792452830188\n"
+      "2021-09-21 22:11:58,698 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.26754716981132076\n"
      ]
     }
    ],
@@ -5875,11 +5866,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "86988d74",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.5943396226415094, 0.07735849056603773, 0.5622641509433962, 0.052830188679245285, 0.0509433962264151]\n",
+      "0.25410380423170903\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -5891,7 +5894,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5899,25 +5902,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:32:59,962 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:12:08,687 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:33:03,881 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:12:12,878 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 69555.70it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 74073.83it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:33:03,885 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
-      "2021-09-08 02:33:03,894 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:03,895 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:12:12,882 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 22:12:12,949 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:12,951 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6230,25 +6233,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:33:03,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:03,896 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:33:03,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:03,897 Parameters:\n",
-      "2021-09-08 02:33:03,897  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:33:03,897  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:33:03,898  - patience: \"3\"\n",
-      "2021-09-08 02:33:03,898  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:33:03,898  - max_epochs: \"10\"\n",
-      "2021-09-08 02:33:03,898  - shuffle: \"True\"\n",
-      "2021-09-08 02:33:03,899  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:33:03,899  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:33:03,899 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:03,900 Model training base path: \"None\"\n",
-      "2021-09-08 02:33:03,900 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:03,900 Device: cuda:0\n",
-      "2021-09-08 02:33:03,900 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:03,901 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:33:03,907 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:12:12,952 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:12,952 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:12:12,952 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:12,952 Parameters:\n",
+      "2021-09-21 22:12:12,953  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:12:12,953  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:12:12,953  - patience: \"3\"\n",
+      "2021-09-21 22:12:12,954  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:12:12,954  - max_epochs: \"10\"\n",
+      "2021-09-21 22:12:12,954  - shuffle: \"True\"\n",
+      "2021-09-21 22:12:12,954  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:12:12,955  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:12:12,955 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:12,955 Model training base path: \"None\"\n",
+      "2021-09-21 22:12:12,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:12,956 Device: cuda:0\n",
+      "2021-09-21 22:12:12,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:12,956 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:12:13,023 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -6262,201 +6265,202 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:33:04,533 epoch 1 - iter 13/130 - loss 0.53307120 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 02:33:05,141 epoch 1 - iter 26/130 - loss 0.58677911 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 02:33:05,749 epoch 1 - iter 39/130 - loss 0.50998892 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 02:33:06,351 epoch 1 - iter 52/130 - loss 0.54949956 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 02:33:06,965 epoch 1 - iter 65/130 - loss 0.58776748 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 02:33:07,562 epoch 1 - iter 78/130 - loss 0.54455884 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 02:33:08,168 epoch 1 - iter 91/130 - loss 0.54808884 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 02:33:08,758 epoch 1 - iter 104/130 - loss 0.52177372 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 02:33:09,358 epoch 1 - iter 117/130 - loss 0.52809275 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 02:33:09,971 epoch 1 - iter 130/130 - loss 0.52212012 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 02:33:09,972 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:09,972 EPOCH 1 done: loss 0.5221 - lr 0.0200000\n",
-      "2021-09-08 02:33:10,287 DEV : loss 0.6690225005149841 - score 0.0714\n",
-      "2021-09-08 02:33:10,287 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:12:13,834 epoch 1 - iter 13/130 - loss 0.33742400 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 22:12:14,648 epoch 1 - iter 26/130 - loss 0.40101722 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 22:12:15,507 epoch 1 - iter 39/130 - loss 0.44592950 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 22:12:16,305 epoch 1 - iter 52/130 - loss 0.47854369 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 22:12:17,066 epoch 1 - iter 65/130 - loss 0.51912912 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 22:12:17,929 epoch 1 - iter 78/130 - loss 0.54014279 - samples/sec: 15.08 - lr: 0.020000\n",
+      "2021-09-21 22:12:18,791 epoch 1 - iter 91/130 - loss 0.54627587 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 22:12:19,602 epoch 1 - iter 104/130 - loss 0.53667164 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 22:12:20,373 epoch 1 - iter 117/130 - loss 0.53259258 - samples/sec: 16.87 - lr: 0.020000\n",
+      "2021-09-21 22:12:21,274 epoch 1 - iter 130/130 - loss 0.53526030 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 22:12:21,275 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:21,275 EPOCH 1 done: loss 0.5353 - lr 0.0200000\n",
+      "2021-09-21 22:12:21,974 DEV : loss 0.711533784866333 - score 0.0714\n",
+      "2021-09-21 22:12:21,975 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:33:14,153 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:14,794 epoch 2 - iter 13/130 - loss 0.92306317 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 02:33:15,397 epoch 2 - iter 26/130 - loss 0.73867359 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 02:33:16,010 epoch 2 - iter 39/130 - loss 0.71366819 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 02:33:16,617 epoch 2 - iter 52/130 - loss 0.70171995 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 02:33:17,225 epoch 2 - iter 65/130 - loss 0.68740219 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 02:33:17,842 epoch 2 - iter 78/130 - loss 0.68011482 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 02:33:18,452 epoch 2 - iter 91/130 - loss 0.69758957 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 02:33:19,031 epoch 2 - iter 104/130 - loss 0.69362482 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 02:33:19,637 epoch 2 - iter 117/130 - loss 0.68600384 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 02:33:20,246 epoch 2 - iter 130/130 - loss 0.68244513 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 02:33:20,247 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:20,247 EPOCH 2 done: loss 0.6824 - lr 0.0200000\n",
-      "2021-09-08 02:33:20,565 DEV : loss 0.4263591170310974 - score 0.2143\n",
-      "2021-09-08 02:33:20,565 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:12:26,181 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:27,124 epoch 2 - iter 13/130 - loss 0.96846276 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 22:12:27,952 epoch 2 - iter 26/130 - loss 0.84581264 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 22:12:28,770 epoch 2 - iter 39/130 - loss 0.81911207 - samples/sec: 15.90 - lr: 0.020000\n",
+      "2021-09-21 22:12:29,490 epoch 2 - iter 52/130 - loss 0.77675979 - samples/sec: 18.07 - lr: 0.020000\n",
+      "2021-09-21 22:12:30,111 epoch 2 - iter 65/130 - loss 0.75865088 - samples/sec: 20.98 - lr: 0.020000\n",
+      "2021-09-21 22:12:30,704 epoch 2 - iter 78/130 - loss 0.72299411 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 22:12:31,295 epoch 2 - iter 91/130 - loss 0.72238054 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 22:12:31,891 epoch 2 - iter 104/130 - loss 0.70224595 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 22:12:32,480 epoch 2 - iter 117/130 - loss 0.69218556 - samples/sec: 22.10 - lr: 0.020000\n",
+      "2021-09-21 22:12:33,064 epoch 2 - iter 130/130 - loss 0.68177837 - samples/sec: 22.31 - lr: 0.020000\n",
+      "2021-09-21 22:12:33,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:33,065 EPOCH 2 done: loss 0.6818 - lr 0.0200000\n",
+      "2021-09-21 22:12:33,386 DEV : loss 0.4214078187942505 - score 0.2143\n",
+      "2021-09-21 22:12:33,387 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:33:24,618 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:25,260 epoch 3 - iter 13/130 - loss 0.53060470 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 02:33:25,872 epoch 3 - iter 26/130 - loss 0.51422824 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 02:33:26,482 epoch 3 - iter 39/130 - loss 0.52014966 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 02:33:27,092 epoch 3 - iter 52/130 - loss 0.55221413 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 02:33:27,697 epoch 3 - iter 65/130 - loss 0.54790859 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 02:33:28,305 epoch 3 - iter 78/130 - loss 0.56212209 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 02:33:28,918 epoch 3 - iter 91/130 - loss 0.57672020 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 02:33:29,529 epoch 3 - iter 104/130 - loss 0.55644595 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 02:33:30,135 epoch 3 - iter 117/130 - loss 0.57592510 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 02:33:30,745 epoch 3 - iter 130/130 - loss 0.56227923 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 02:33:30,746 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:30,746 EPOCH 3 done: loss 0.5623 - lr 0.0200000\n",
-      "2021-09-08 02:33:31,061 DEV : loss 0.6534147262573242 - score 0.5\n",
-      "2021-09-08 02:33:31,062 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:12:37,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:38,271 epoch 3 - iter 13/130 - loss 0.57669328 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 22:12:39,101 epoch 3 - iter 26/130 - loss 0.58719077 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 22:12:39,770 epoch 3 - iter 39/130 - loss 0.66294636 - samples/sec: 19.47 - lr: 0.020000\n",
+      "2021-09-21 22:12:40,385 epoch 3 - iter 52/130 - loss 0.63841927 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 22:12:40,978 epoch 3 - iter 65/130 - loss 0.61065106 - samples/sec: 21.95 - lr: 0.020000\n",
+      "2021-09-21 22:12:41,571 epoch 3 - iter 78/130 - loss 0.59648082 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 22:12:42,172 epoch 3 - iter 91/130 - loss 0.61284997 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 22:12:42,767 epoch 3 - iter 104/130 - loss 0.61835609 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 22:12:43,509 epoch 3 - iter 117/130 - loss 0.60260446 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 22:12:44,313 epoch 3 - iter 130/130 - loss 0.59389792 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 22:12:44,314 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:44,315 EPOCH 3 done: loss 0.5939 - lr 0.0200000\n",
+      "2021-09-21 22:12:44,958 DEV : loss 0.6574377417564392 - score 0.2857\n",
+      "2021-09-21 22:12:44,959 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:33:35,116 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:35,851 epoch 4 - iter 13/130 - loss 0.60289827 - samples/sec: 18.33 - lr: 0.020000\n",
-      "2021-09-08 02:33:36,564 epoch 4 - iter 26/130 - loss 0.45752522 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 02:33:37,283 epoch 4 - iter 39/130 - loss 0.46035004 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 02:33:37,995 epoch 4 - iter 52/130 - loss 0.44518540 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 02:33:38,706 epoch 4 - iter 65/130 - loss 0.46366692 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 02:33:39,435 epoch 4 - iter 78/130 - loss 0.45272784 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 02:33:40,155 epoch 4 - iter 91/130 - loss 0.45173951 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 02:33:40,865 epoch 4 - iter 104/130 - loss 0.46171763 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 02:33:41,588 epoch 4 - iter 117/130 - loss 0.44293711 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 02:33:42,307 epoch 4 - iter 130/130 - loss 0.48106628 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 02:33:42,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:42,309 EPOCH 4 done: loss 0.4811 - lr 0.0200000\n",
-      "2021-09-08 02:33:42,637 DEV : loss 0.6699220538139343 - score 0.5\n",
-      "2021-09-08 02:33:42,638 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:33:42,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:43,372 epoch 5 - iter 13/130 - loss 0.38181260 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 02:33:44,077 epoch 5 - iter 26/130 - loss 0.39083004 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 02:33:44,793 epoch 5 - iter 39/130 - loss 0.47372590 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 02:33:45,501 epoch 5 - iter 52/130 - loss 0.48928896 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 02:33:46,207 epoch 5 - iter 65/130 - loss 0.50459049 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 02:33:46,918 epoch 5 - iter 78/130 - loss 0.47792861 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 02:33:47,637 epoch 5 - iter 91/130 - loss 0.46830205 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 02:33:48,344 epoch 5 - iter 104/130 - loss 0.44553225 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 02:33:49,060 epoch 5 - iter 117/130 - loss 0.43280662 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 02:33:49,772 epoch 5 - iter 130/130 - loss 0.43448628 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 02:33:49,773 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:49,774 EPOCH 5 done: loss 0.4345 - lr 0.0200000\n",
-      "2021-09-08 02:33:50,218 DEV : loss 0.5386702418327332 - score 0.2857\n",
-      "2021-09-08 02:33:50,219 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:33:50,220 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:50,959 epoch 6 - iter 13/130 - loss 0.57166521 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:33:51,661 epoch 6 - iter 26/130 - loss 0.36024017 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 02:33:52,367 epoch 6 - iter 39/130 - loss 0.39895760 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 02:33:53,053 epoch 6 - iter 52/130 - loss 0.35760896 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 02:33:53,750 epoch 6 - iter 65/130 - loss 0.35080620 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 02:33:54,444 epoch 6 - iter 78/130 - loss 0.33156095 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 02:33:55,136 epoch 6 - iter 91/130 - loss 0.28755549 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 02:33:55,837 epoch 6 - iter 104/130 - loss 0.30979048 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 02:33:56,549 epoch 6 - iter 117/130 - loss 0.31228422 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 02:33:57,237 epoch 6 - iter 130/130 - loss 0.33404437 - samples/sec: 18.91 - lr: 0.020000\n"
+      "2021-09-21 22:12:48,948 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:49,604 epoch 4 - iter 13/130 - loss 0.61737801 - samples/sec: 20.51 - lr: 0.020000\n",
+      "2021-09-21 22:12:50,193 epoch 4 - iter 26/130 - loss 0.56187064 - samples/sec: 22.08 - lr: 0.020000\n",
+      "2021-09-21 22:12:50,784 epoch 4 - iter 39/130 - loss 0.55922549 - samples/sec: 22.03 - lr: 0.020000\n",
+      "2021-09-21 22:12:51,367 epoch 4 - iter 52/130 - loss 0.52664275 - samples/sec: 22.37 - lr: 0.020000\n",
+      "2021-09-21 22:12:51,966 epoch 4 - iter 65/130 - loss 0.53753397 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 22:12:52,548 epoch 4 - iter 78/130 - loss 0.49018286 - samples/sec: 22.39 - lr: 0.020000\n",
+      "2021-09-21 22:12:53,133 epoch 4 - iter 91/130 - loss 0.50331327 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 22:12:53,720 epoch 4 - iter 104/130 - loss 0.55882509 - samples/sec: 22.19 - lr: 0.020000\n",
+      "2021-09-21 22:12:54,338 epoch 4 - iter 117/130 - loss 0.53874861 - samples/sec: 21.08 - lr: 0.020000\n",
+      "2021-09-21 22:12:55,148 epoch 4 - iter 130/130 - loss 0.56198525 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 22:12:55,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:12:55,150 EPOCH 4 done: loss 0.5620 - lr 0.0200000\n",
+      "2021-09-21 22:12:55,979 DEV : loss 0.6067566871643066 - score 0.4286\n",
+      "2021-09-21 22:12:55,980 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:13:00,602 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:01,447 epoch 5 - iter 13/130 - loss 0.11107525 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 22:13:02,227 epoch 5 - iter 26/130 - loss 0.24434266 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 22:13:03,026 epoch 5 - iter 39/130 - loss 0.27820493 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 22:13:03,797 epoch 5 - iter 52/130 - loss 0.39094076 - samples/sec: 16.87 - lr: 0.020000\n",
+      "2021-09-21 22:13:04,602 epoch 5 - iter 65/130 - loss 0.45616250 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 22:13:05,414 epoch 5 - iter 78/130 - loss 0.47139149 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 22:13:06,207 epoch 5 - iter 91/130 - loss 0.44833503 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 22:13:07,013 epoch 5 - iter 104/130 - loss 0.43659389 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 22:13:07,798 epoch 5 - iter 117/130 - loss 0.47415694 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 22:13:08,604 epoch 5 - iter 130/130 - loss 0.45318001 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 22:13:08,606 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:08,606 EPOCH 5 done: loss 0.4532 - lr 0.0200000\n",
+      "2021-09-21 22:13:09,476 DEV : loss 0.4921734929084778 - score 0.2857\n",
+      "2021-09-21 22:13:09,477 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:13:09,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:10,384 epoch 6 - iter 13/130 - loss 0.28128610 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 22:13:11,187 epoch 6 - iter 26/130 - loss 0.26003541 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 22:13:11,994 epoch 6 - iter 39/130 - loss 0.42635217 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 22:13:12,746 epoch 6 - iter 52/130 - loss 0.39709735 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 22:13:13,524 epoch 6 - iter 65/130 - loss 0.39661897 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 22:13:14,307 epoch 6 - iter 78/130 - loss 0.40955069 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 22:13:15,049 epoch 6 - iter 91/130 - loss 0.40826772 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 22:13:15,861 epoch 6 - iter 104/130 - loss 0.41244785 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 22:13:16,673 epoch 6 - iter 117/130 - loss 0.42617573 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 22:13:17,486 epoch 6 - iter 130/130 - loss 0.42600864 - samples/sec: 16.01 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:33:57,238 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:33:57,239 EPOCH 6 done: loss 0.3340 - lr 0.0200000\n",
-      "2021-09-08 02:33:57,557 DEV : loss 0.41058042645454407 - score 0.6429\n",
-      "2021-09-08 02:33:57,558 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:13:17,487 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:17,487 EPOCH 6 done: loss 0.4260 - lr 0.0200000\n",
+      "2021-09-21 22:13:18,248 DEV : loss 0.40349504351615906 - score 0.4286\n",
+      "2021-09-21 22:13:18,250 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:13:22,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:23,290 epoch 7 - iter 13/130 - loss 0.55260335 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 22:13:24,073 epoch 7 - iter 26/130 - loss 0.32724387 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 22:13:24,886 epoch 7 - iter 39/130 - loss 0.29576843 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 22:13:25,675 epoch 7 - iter 52/130 - loss 0.28415370 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 22:13:26,465 epoch 7 - iter 65/130 - loss 0.29211530 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 22:13:27,282 epoch 7 - iter 78/130 - loss 0.30292827 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 22:13:28,081 epoch 7 - iter 91/130 - loss 0.29764027 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 22:13:28,859 epoch 7 - iter 104/130 - loss 0.29395423 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 22:13:29,648 epoch 7 - iter 117/130 - loss 0.30208280 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 22:13:30,461 epoch 7 - iter 130/130 - loss 0.28967013 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 22:13:30,462 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:30,462 EPOCH 7 done: loss 0.2897 - lr 0.0200000\n",
+      "2021-09-21 22:13:31,104 DEV : loss 0.4738401472568512 - score 0.3571\n",
+      "2021-09-21 22:13:31,105 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:13:31,107 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:31,939 epoch 8 - iter 13/130 - loss 0.12778198 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 22:13:32,644 epoch 8 - iter 26/130 - loss 0.07858935 - samples/sec: 18.48 - lr: 0.020000\n",
+      "2021-09-21 22:13:33,441 epoch 8 - iter 39/130 - loss 0.25502483 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 22:13:34,252 epoch 8 - iter 52/130 - loss 0.31621476 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 22:13:35,037 epoch 8 - iter 65/130 - loss 0.28373165 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 22:13:35,818 epoch 8 - iter 78/130 - loss 0.26467839 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 22:13:36,592 epoch 8 - iter 91/130 - loss 0.27269362 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 22:13:37,364 epoch 8 - iter 104/130 - loss 0.26908497 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 22:13:38,164 epoch 8 - iter 117/130 - loss 0.30090911 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 22:13:38,927 epoch 8 - iter 130/130 - loss 0.27196299 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 22:13:38,928 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:38,929 EPOCH 8 done: loss 0.2720 - lr 0.0200000\n",
+      "2021-09-21 22:13:39,666 DEV : loss 0.693524181842804 - score 0.3571\n",
+      "2021-09-21 22:13:39,667 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:13:39,669 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:40,509 epoch 9 - iter 13/130 - loss 0.26741341 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 22:13:41,298 epoch 9 - iter 26/130 - loss 0.25308993 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 22:13:42,088 epoch 9 - iter 39/130 - loss 0.20378818 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 22:13:42,834 epoch 9 - iter 52/130 - loss 0.17788411 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 22:13:43,601 epoch 9 - iter 65/130 - loss 0.23443158 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 22:13:44,381 epoch 9 - iter 78/130 - loss 0.28158355 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 22:13:45,195 epoch 9 - iter 91/130 - loss 0.28863341 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 22:13:45,802 epoch 9 - iter 104/130 - loss 0.29002066 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 22:13:46,381 epoch 9 - iter 117/130 - loss 0.27064375 - samples/sec: 22.47 - lr: 0.020000\n",
+      "2021-09-21 22:13:46,942 epoch 9 - iter 130/130 - loss 0.25142470 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 22:13:46,944 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:46,944 EPOCH 9 done: loss 0.2514 - lr 0.0200000\n",
+      "2021-09-21 22:13:47,371 DEV : loss 0.6856667995452881 - score 0.3571\n",
+      "2021-09-21 22:13:47,372 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:13:47,457 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:48,046 epoch 10 - iter 13/130 - loss 0.08070960 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:13:48,612 epoch 10 - iter 26/130 - loss 0.05116468 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 22:13:49,170 epoch 10 - iter 39/130 - loss 0.08310681 - samples/sec: 23.35 - lr: 0.020000\n",
+      "2021-09-21 22:13:49,755 epoch 10 - iter 52/130 - loss 0.17763308 - samples/sec: 22.26 - lr: 0.020000\n",
+      "2021-09-21 22:13:50,348 epoch 10 - iter 65/130 - loss 0.14596105 - samples/sec: 21.96 - lr: 0.020000\n",
+      "2021-09-21 22:13:50,915 epoch 10 - iter 78/130 - loss 0.13825174 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 22:13:51,470 epoch 10 - iter 91/130 - loss 0.12670297 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 22:13:52,036 epoch 10 - iter 104/130 - loss 0.17664812 - samples/sec: 23.01 - lr: 0.020000\n",
+      "2021-09-21 22:13:52,595 epoch 10 - iter 117/130 - loss 0.16346476 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 22:13:53,162 epoch 10 - iter 130/130 - loss 0.16888596 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 22:13:53,163 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:13:53,164 EPOCH 10 done: loss 0.1689 - lr 0.0200000\n",
+      "2021-09-21 22:13:53,486 DEV : loss 0.35857054591178894 - score 0.5\n",
+      "2021-09-21 22:13:53,487 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:34:01,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:02,359 epoch 7 - iter 13/130 - loss 0.39025238 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 02:34:02,961 epoch 7 - iter 26/130 - loss 0.42873092 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 02:34:03,554 epoch 7 - iter 39/130 - loss 0.41701842 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 02:34:04,144 epoch 7 - iter 52/130 - loss 0.38218196 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 02:34:04,734 epoch 7 - iter 65/130 - loss 0.34140322 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 02:34:05,317 epoch 7 - iter 78/130 - loss 0.30579723 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 02:34:05,908 epoch 7 - iter 91/130 - loss 0.32392974 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 02:34:06,497 epoch 7 - iter 104/130 - loss 0.35822035 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 02:34:07,091 epoch 7 - iter 117/130 - loss 0.33879939 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 02:34:07,685 epoch 7 - iter 130/130 - loss 0.33028197 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 02:34:07,686 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:07,687 EPOCH 7 done: loss 0.3303 - lr 0.0200000\n",
-      "2021-09-08 02:34:08,003 DEV : loss 0.5072647929191589 - score 0.6429\n",
-      "2021-09-08 02:34:08,004 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:34:08,008 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:08,607 epoch 8 - iter 13/130 - loss 0.13766828 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 02:34:09,199 epoch 8 - iter 26/130 - loss 0.26841270 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 02:34:09,797 epoch 8 - iter 39/130 - loss 0.29373833 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 02:34:10,377 epoch 8 - iter 52/130 - loss 0.26497209 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 02:34:10,954 epoch 8 - iter 65/130 - loss 0.25494176 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 02:34:11,541 epoch 8 - iter 78/130 - loss 0.28767464 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 02:34:12,124 epoch 8 - iter 91/130 - loss 0.25469702 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 02:34:12,716 epoch 8 - iter 104/130 - loss 0.27541444 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 02:34:13,313 epoch 8 - iter 117/130 - loss 0.27467657 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 02:34:13,887 epoch 8 - iter 130/130 - loss 0.24996148 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 02:34:13,888 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:13,888 EPOCH 8 done: loss 0.2500 - lr 0.0200000\n",
-      "2021-09-08 02:34:14,205 DEV : loss 0.5568490028381348 - score 0.6429\n",
-      "2021-09-08 02:34:14,206 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:34:14,208 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:14,802 epoch 9 - iter 13/130 - loss 0.07555978 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 02:34:15,384 epoch 9 - iter 26/130 - loss 0.12052924 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 02:34:15,966 epoch 9 - iter 39/130 - loss 0.13832932 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 02:34:16,551 epoch 9 - iter 52/130 - loss 0.10626731 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 02:34:17,137 epoch 9 - iter 65/130 - loss 0.14280606 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 02:34:17,710 epoch 9 - iter 78/130 - loss 0.12736411 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 02:34:18,290 epoch 9 - iter 91/130 - loss 0.16072957 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 02:34:18,883 epoch 9 - iter 104/130 - loss 0.19373199 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 02:34:19,462 epoch 9 - iter 117/130 - loss 0.19642048 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 02:34:20,041 epoch 9 - iter 130/130 - loss 0.19351328 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 02:34:20,042 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:20,043 EPOCH 9 done: loss 0.1935 - lr 0.0200000\n",
-      "2021-09-08 02:34:20,368 DEV : loss 0.47939348220825195 - score 0.6429\n",
-      "2021-09-08 02:34:20,369 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:34:20,371 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:20,963 epoch 10 - iter 13/130 - loss 0.15656553 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 02:34:21,537 epoch 10 - iter 26/130 - loss 0.13698586 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 02:34:22,105 epoch 10 - iter 39/130 - loss 0.09255115 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 02:34:22,678 epoch 10 - iter 52/130 - loss 0.06998418 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 02:34:23,260 epoch 10 - iter 65/130 - loss 0.10937768 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 02:34:23,853 epoch 10 - iter 78/130 - loss 0.20344842 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 02:34:24,426 epoch 10 - iter 91/130 - loss 0.17586066 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 02:34:24,998 epoch 10 - iter 104/130 - loss 0.17146105 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:34:25,573 epoch 10 - iter 117/130 - loss 0.16995543 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 02:34:26,145 epoch 10 - iter 130/130 - loss 0.15340432 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 02:34:26,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:26,147 EPOCH 10 done: loss 0.1534 - lr 0.0200000\n",
-      "2021-09-08 02:34:26,463 DEV : loss 0.3925793468952179 - score 0.4286\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:34:26,464 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:34:30,502 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:30,503 Testing using best model ...\n",
-      "2021-09-08 02:34:30,504 loading file None/best-model.pt\n",
+      "2021-09-21 22:14:01,555 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:01,556 Testing using best model ...\n",
+      "2021-09-21 22:14:01,557 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:34:35,502 \t0.5625\n",
-      "2021-09-08 02:34:35,502 \n",
+      "2021-09-21 22:14:07,215 \t0.6875\n",
+      "2021-09-21 22:14:07,215 \n",
       "Results:\n",
-      "- F-score (micro) 0.5625\n",
-      "- F-score (macro) 0.3312\n",
-      "- Accuracy 0.5625\n",
+      "- F-score (micro) 0.6875\n",
+      "- F-score (macro) 0.425\n",
+      "- Accuracy 0.6875\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
-      "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
+      "                                                                                                           undertake a journey or trip     0.5000    1.0000    0.6667         1\n",
       "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         1\n",
-      "                                                                                        a healthy state of wellbeing free from disease     1.0000    0.5000    0.6667         2\n",
-      "                                                                                          an adult female person (as opposed to a man)     1.0000    0.5000    0.6667         2\n",
-      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     1.0000    0.6667    0.8000         3\n",
-      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
-      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         0\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
-      "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         1\n",
-      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
+      "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         0\n",
+      "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
+      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
+      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         1\n",
+      "                                                                       the social event at which the ceremony of marriage is performed     1.0000    1.0000    1.0000         1\n",
+      "                                                               the latest and most admired style in clothes and cosmetics and behavior     1.0000    1.0000    1.0000         3\n",
+      "                                                                            an activity that is diverting and that holds the attention     0.6667    1.0000    0.8000         2\n",
+      "                                                                                           a particular branch of scientific knowledge     0.5000    1.0000    0.6667         1\n",
       "                                                                                                   the legal dissolution of a marriage     1.0000    1.0000    1.0000         1\n",
-      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.3333    1.0000    0.5000         1\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     1.0000    1.0000    1.0000         2\n",
-      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         1\n",
-      "                                                                        social relations involving intrigue to gain authority or power     0.5000    1.0000    0.6667         1\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         1\n",
+      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
+      "                                                          a strong belief in a supernatural power or powers that control human destiny     1.0000    1.0000    1.0000         1\n",
+      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
+      "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         2\n",
+      "                                                                                          light and humorous drama with a happy ending     1.0000    0.5000    0.6667         2\n",
       "\n",
-      "                                                                                                                             micro avg     0.5625    0.5625    0.5625        16\n",
-      "                                                                                                                             macro avg     0.3646    0.3542    0.3312        16\n",
-      "                                                                                                                          weighted avg     0.6771    0.5625    0.5771        16\n",
-      "                                                                                                                           samples avg     0.5625    0.5625    0.5625        16\n",
+      "                                                                                                                             micro avg     0.6875    0.6875    0.6875        16\n",
+      "                                                                                                                             macro avg     0.4167    0.4688    0.4250        16\n",
+      "                                                                                                                          weighted avg     0.6458    0.6875    0.6417        16\n",
+      "                                                                                                                           samples avg     0.6875    0.6875    0.6875        16\n",
       "\n"
      ]
     },
@@ -6464,26 +6468,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:34:35,503 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:48,013 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:14:07,216 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:26,199 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:34:51,856 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:14:30,263 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 72521.13it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 73743.78it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:34:51,860 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
-      "2021-09-08 02:34:51,871 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:51,873 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:14:30,267 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 22:14:30,277 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:30,279 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6796,25 +6800,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:34:51,873 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:51,873 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:34:51,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:51,874 Parameters:\n",
-      "2021-09-08 02:34:51,874  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:34:51,875  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:34:51,875  - patience: \"3\"\n",
-      "2021-09-08 02:34:51,875  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:34:51,875  - max_epochs: \"10\"\n",
-      "2021-09-08 02:34:51,876  - shuffle: \"True\"\n",
-      "2021-09-08 02:34:51,876  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:34:51,876  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:34:51,876 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:51,877 Model training base path: \"None\"\n",
-      "2021-09-08 02:34:51,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:51,877 Device: cuda:0\n",
-      "2021-09-08 02:34:51,878 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:51,878 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:34:51,912 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:14:30,279 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:30,280 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:14:30,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:30,280 Parameters:\n",
+      "2021-09-21 22:14:30,281  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:14:30,281  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:14:30,281  - patience: \"3\"\n",
+      "2021-09-21 22:14:30,281  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:14:30,282  - max_epochs: \"10\"\n",
+      "2021-09-21 22:14:30,282  - shuffle: \"True\"\n",
+      "2021-09-21 22:14:30,282  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:14:30,283  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:14:30,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:30,283 Model training base path: \"None\"\n",
+      "2021-09-21 22:14:30,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:30,284 Device: cuda:0\n",
+      "2021-09-21 22:14:30,284 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:30,284 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:14:30,291 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -6828,170 +6832,171 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:34:52,536 epoch 1 - iter 13/130 - loss 0.35291556 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 02:34:53,133 epoch 1 - iter 26/130 - loss 0.67152268 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 02:34:53,745 epoch 1 - iter 39/130 - loss 0.66547213 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 02:34:54,338 epoch 1 - iter 52/130 - loss 0.63288813 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 02:34:54,954 epoch 1 - iter 65/130 - loss 0.61943315 - samples/sec: 21.11 - lr: 0.020000\n",
-      "2021-09-08 02:34:55,563 epoch 1 - iter 78/130 - loss 0.60069423 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 02:34:56,162 epoch 1 - iter 91/130 - loss 0.58503994 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 02:34:56,775 epoch 1 - iter 104/130 - loss 0.56865752 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 02:34:57,387 epoch 1 - iter 117/130 - loss 0.59092214 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 02:34:58,163 epoch 1 - iter 130/130 - loss 0.58953734 - samples/sec: 16.76 - lr: 0.020000\n",
-      "2021-09-08 02:34:58,165 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:34:58,165 EPOCH 1 done: loss 0.5895 - lr 0.0200000\n",
-      "2021-09-08 02:34:58,560 DEV : loss 0.8101728558540344 - score 0.1429\n",
-      "2021-09-08 02:34:58,561 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:14:31,182 epoch 1 - iter 13/130 - loss 0.67968372 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 22:14:31,998 epoch 1 - iter 26/130 - loss 0.63929883 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 22:14:32,634 epoch 1 - iter 39/130 - loss 0.67183507 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 22:14:33,240 epoch 1 - iter 52/130 - loss 0.66462239 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 22:14:33,828 epoch 1 - iter 65/130 - loss 0.59629172 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 22:14:34,420 epoch 1 - iter 78/130 - loss 0.58992308 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 22:14:35,016 epoch 1 - iter 91/130 - loss 0.61375072 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 22:14:35,599 epoch 1 - iter 104/130 - loss 0.59142622 - samples/sec: 22.34 - lr: 0.020000\n",
+      "2021-09-21 22:14:36,180 epoch 1 - iter 117/130 - loss 0.58724711 - samples/sec: 22.40 - lr: 0.020000\n",
+      "2021-09-21 22:14:36,975 epoch 1 - iter 130/130 - loss 0.59632939 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 22:14:36,976 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:36,976 EPOCH 1 done: loss 0.5963 - lr 0.0200000\n",
+      "2021-09-21 22:14:37,788 DEV : loss 0.577997624874115 - score 0.0714\n",
+      "2021-09-21 22:14:37,789 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:35:02,848 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:03,704 epoch 2 - iter 13/130 - loss 0.81710813 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 02:35:04,520 epoch 2 - iter 26/130 - loss 0.77518420 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 02:35:05,353 epoch 2 - iter 39/130 - loss 0.76191155 - samples/sec: 15.63 - lr: 0.020000\n",
-      "2021-09-08 02:35:06,207 epoch 2 - iter 52/130 - loss 0.74560759 - samples/sec: 15.25 - lr: 0.020000\n",
-      "2021-09-08 02:35:07,007 epoch 2 - iter 65/130 - loss 0.73231533 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 02:35:07,833 epoch 2 - iter 78/130 - loss 0.74007790 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 02:35:08,605 epoch 2 - iter 91/130 - loss 0.73206886 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 02:35:09,342 epoch 2 - iter 104/130 - loss 0.72226399 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 02:35:10,081 epoch 2 - iter 117/130 - loss 0.71593677 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 02:35:10,843 epoch 2 - iter 130/130 - loss 0.71182878 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 02:35:10,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:10,844 EPOCH 2 done: loss 0.7118 - lr 0.0200000\n",
-      "2021-09-08 02:35:11,309 DEV : loss 0.42717429995536804 - score 0.0714\n",
-      "2021-09-08 02:35:11,310 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:35:11,313 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:12,094 epoch 3 - iter 13/130 - loss 0.66336492 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 02:35:12,839 epoch 3 - iter 26/130 - loss 0.67418009 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 02:35:13,579 epoch 3 - iter 39/130 - loss 0.66760164 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 02:35:14,332 epoch 3 - iter 52/130 - loss 0.66458264 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 02:35:15,079 epoch 3 - iter 65/130 - loss 0.66907420 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 02:35:15,820 epoch 3 - iter 78/130 - loss 0.67260161 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 02:35:16,564 epoch 3 - iter 91/130 - loss 0.67245411 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 02:35:17,311 epoch 3 - iter 104/130 - loss 0.67166939 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 02:35:18,053 epoch 3 - iter 117/130 - loss 0.67104788 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 02:35:18,793 epoch 3 - iter 130/130 - loss 0.66916982 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 02:35:18,794 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:18,794 EPOCH 3 done: loss 0.6692 - lr 0.0200000\n",
-      "2021-09-08 02:35:19,292 DEV : loss 0.6514389514923096 - score 0.0714\n",
-      "2021-09-08 02:35:19,293 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:35:19,295 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:20,064 epoch 4 - iter 13/130 - loss 0.66455147 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 02:35:20,802 epoch 4 - iter 26/130 - loss 0.66564168 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 02:35:21,563 epoch 4 - iter 39/130 - loss 0.66030383 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 02:35:22,299 epoch 4 - iter 52/130 - loss 0.64827026 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 02:35:23,022 epoch 4 - iter 65/130 - loss 0.65065292 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 02:35:23,753 epoch 4 - iter 78/130 - loss 0.65829421 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 02:35:24,473 epoch 4 - iter 91/130 - loss 0.65762722 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 02:35:25,217 epoch 4 - iter 104/130 - loss 0.65763986 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 02:35:25,960 epoch 4 - iter 117/130 - loss 0.66341224 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 02:35:26,700 epoch 4 - iter 130/130 - loss 0.65837193 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 02:35:26,701 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:26,701 EPOCH 4 done: loss 0.6584 - lr 0.0200000\n",
-      "2021-09-08 02:35:27,174 DEV : loss 0.27584850788116455 - score 0.0\n",
-      "2021-09-08 02:35:27,175 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:35:27,177 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:28,040 epoch 5 - iter 13/130 - loss 0.69387656 - samples/sec: 15.50 - lr: 0.020000\n",
-      "2021-09-08 02:35:28,771 epoch 5 - iter 26/130 - loss 0.65804250 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 02:35:29,516 epoch 5 - iter 39/130 - loss 0.66591918 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 02:35:30,234 epoch 5 - iter 52/130 - loss 0.66529385 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 02:35:30,981 epoch 5 - iter 65/130 - loss 0.65848548 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 02:35:31,709 epoch 5 - iter 78/130 - loss 0.65775494 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 02:35:32,450 epoch 5 - iter 91/130 - loss 0.65619321 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 02:35:33,187 epoch 5 - iter 104/130 - loss 0.65693001 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 02:35:33,911 epoch 5 - iter 117/130 - loss 0.65652259 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 02:35:34,654 epoch 5 - iter 130/130 - loss 0.65693136 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 02:35:34,655 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:34,655 EPOCH 5 done: loss 0.6569 - lr 0.0200000\n",
-      "2021-09-08 02:35:35,138 DEV : loss 0.36412325501441956 - score 0.0\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:35:35,139 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:35:35,141 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:35,901 epoch 6 - iter 13/130 - loss 0.67591160 - samples/sec: 17.65 - lr: 0.010000\n",
-      "2021-09-08 02:35:36,639 epoch 6 - iter 26/130 - loss 0.65148072 - samples/sec: 17.64 - lr: 0.010000\n",
-      "2021-09-08 02:35:37,378 epoch 6 - iter 39/130 - loss 0.64741068 - samples/sec: 17.61 - lr: 0.010000\n",
-      "2021-09-08 02:35:38,152 epoch 6 - iter 52/130 - loss 0.64384818 - samples/sec: 16.83 - lr: 0.010000\n",
-      "2021-09-08 02:35:38,935 epoch 6 - iter 65/130 - loss 0.64285367 - samples/sec: 16.61 - lr: 0.010000\n",
-      "2021-09-08 02:35:39,728 epoch 6 - iter 78/130 - loss 0.64383789 - samples/sec: 16.42 - lr: 0.010000\n",
-      "2021-09-08 02:35:40,460 epoch 6 - iter 91/130 - loss 0.64416585 - samples/sec: 17.78 - lr: 0.010000\n",
-      "2021-09-08 02:35:41,196 epoch 6 - iter 104/130 - loss 0.64269134 - samples/sec: 17.70 - lr: 0.010000\n",
-      "2021-09-08 02:35:41,918 epoch 6 - iter 117/130 - loss 0.64339951 - samples/sec: 18.03 - lr: 0.010000\n",
-      "2021-09-08 02:35:42,649 epoch 6 - iter 130/130 - loss 0.64594579 - samples/sec: 17.79 - lr: 0.010000\n"
+      "2021-09-21 22:14:41,937 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:42,790 epoch 2 - iter 13/130 - loss 0.84082836 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 22:14:43,637 epoch 2 - iter 26/130 - loss 0.77801437 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 22:14:44,513 epoch 2 - iter 39/130 - loss 0.73986544 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 22:14:45,331 epoch 2 - iter 52/130 - loss 0.72221353 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 22:14:46,093 epoch 2 - iter 65/130 - loss 0.70888850 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 22:14:46,878 epoch 2 - iter 78/130 - loss 0.69438500 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 22:14:47,664 epoch 2 - iter 91/130 - loss 0.69250413 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 22:14:48,460 epoch 2 - iter 104/130 - loss 0.68835871 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 22:14:49,199 epoch 2 - iter 117/130 - loss 0.68277522 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 22:14:49,976 epoch 2 - iter 130/130 - loss 0.68156516 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 22:14:49,977 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:49,978 EPOCH 2 done: loss 0.6816 - lr 0.0200000\n",
+      "2021-09-21 22:14:50,843 DEV : loss 0.27082157135009766 - score 0.2143\n",
+      "2021-09-21 22:14:50,844 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:14:54,858 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:14:55,691 epoch 3 - iter 13/130 - loss 0.68945044 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 22:14:56,489 epoch 3 - iter 26/130 - loss 0.67988752 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 22:14:57,286 epoch 3 - iter 39/130 - loss 0.68110682 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 22:14:58,061 epoch 3 - iter 52/130 - loss 0.67197437 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 22:14:58,825 epoch 3 - iter 65/130 - loss 0.66717918 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 22:14:59,532 epoch 3 - iter 78/130 - loss 0.66344229 - samples/sec: 18.43 - lr: 0.020000\n",
+      "2021-09-21 22:15:00,296 epoch 3 - iter 91/130 - loss 0.65936880 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 22:15:01,054 epoch 3 - iter 104/130 - loss 0.65874939 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 22:15:01,809 epoch 3 - iter 117/130 - loss 0.65840309 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 22:15:02,581 epoch 3 - iter 130/130 - loss 0.65585792 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 22:15:02,582 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:02,583 EPOCH 3 done: loss 0.6559 - lr 0.0200000\n",
+      "2021-09-21 22:15:03,342 DEV : loss 0.5508749485015869 - score 0.0714\n",
+      "2021-09-21 22:15:03,343 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:15:03,356 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:04,154 epoch 4 - iter 13/130 - loss 0.68399387 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 22:15:04,907 epoch 4 - iter 26/130 - loss 0.65688290 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 22:15:05,653 epoch 4 - iter 39/130 - loss 0.65475945 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 22:15:06,435 epoch 4 - iter 52/130 - loss 0.65024707 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 22:15:07,231 epoch 4 - iter 65/130 - loss 0.65203568 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 22:15:08,002 epoch 4 - iter 78/130 - loss 0.65405384 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 22:15:08,797 epoch 4 - iter 91/130 - loss 0.65346549 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 22:15:09,572 epoch 4 - iter 104/130 - loss 0.65277623 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 22:15:10,323 epoch 4 - iter 117/130 - loss 0.65270771 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 22:15:11,102 epoch 4 - iter 130/130 - loss 0.65210311 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 22:15:11,103 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:11,103 EPOCH 4 done: loss 0.6521 - lr 0.0200000\n",
+      "2021-09-21 22:15:11,897 DEV : loss 0.4593525528907776 - score 0.0714\n",
+      "2021-09-21 22:15:11,898 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:15:11,900 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:12,681 epoch 5 - iter 13/130 - loss 0.64118784 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 22:15:13,453 epoch 5 - iter 26/130 - loss 0.62871750 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 22:15:14,233 epoch 5 - iter 39/130 - loss 0.62714458 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 22:15:14,990 epoch 5 - iter 52/130 - loss 0.64246401 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 22:15:15,762 epoch 5 - iter 65/130 - loss 0.64122816 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 22:15:16,519 epoch 5 - iter 78/130 - loss 0.64250897 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 22:15:17,295 epoch 5 - iter 91/130 - loss 0.64109648 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 22:15:18,063 epoch 5 - iter 104/130 - loss 0.63873411 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 22:15:18,768 epoch 5 - iter 117/130 - loss 0.64142592 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 22:15:19,526 epoch 5 - iter 130/130 - loss 0.64400193 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 22:15:19,527 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:19,528 EPOCH 5 done: loss 0.6440 - lr 0.0200000\n",
+      "2021-09-21 22:15:20,346 DEV : loss 0.41412869095802307 - score 0.0714\n",
+      "2021-09-21 22:15:20,347 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:15:20,349 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:21,155 epoch 6 - iter 13/130 - loss 0.63472652 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 22:15:21,932 epoch 6 - iter 26/130 - loss 0.64570740 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 22:15:22,708 epoch 6 - iter 39/130 - loss 0.64672578 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 22:15:23,514 epoch 6 - iter 52/130 - loss 0.64739036 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 22:15:24,277 epoch 6 - iter 65/130 - loss 0.65598171 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 22:15:25,059 epoch 6 - iter 78/130 - loss 0.65296081 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 22:15:25,852 epoch 6 - iter 91/130 - loss 0.65194995 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 22:15:26,633 epoch 6 - iter 104/130 - loss 0.64845680 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 22:15:27,409 epoch 6 - iter 117/130 - loss 0.64754441 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 22:15:28,156 epoch 6 - iter 130/130 - loss 0.64862898 - samples/sec: 17.42 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:35:42,651 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:42,651 EPOCH 6 done: loss 0.6459 - lr 0.0100000\n",
-      "2021-09-08 02:35:43,144 DEV : loss 0.4616144597530365 - score 0.0\n",
-      "2021-09-08 02:35:43,145 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:35:43,147 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:43,897 epoch 7 - iter 13/130 - loss 0.65443348 - samples/sec: 17.98 - lr: 0.010000\n",
-      "2021-09-08 02:35:44,624 epoch 7 - iter 26/130 - loss 0.63584380 - samples/sec: 17.88 - lr: 0.010000\n",
-      "2021-09-08 02:35:45,360 epoch 7 - iter 39/130 - loss 0.64813770 - samples/sec: 17.69 - lr: 0.010000\n",
-      "2021-09-08 02:35:46,089 epoch 7 - iter 52/130 - loss 0.64592451 - samples/sec: 17.84 - lr: 0.010000\n",
-      "2021-09-08 02:35:46,816 epoch 7 - iter 65/130 - loss 0.63988501 - samples/sec: 17.91 - lr: 0.010000\n",
-      "2021-09-08 02:35:47,540 epoch 7 - iter 78/130 - loss 0.63816423 - samples/sec: 17.96 - lr: 0.010000\n",
-      "2021-09-08 02:35:48,269 epoch 7 - iter 91/130 - loss 0.63722461 - samples/sec: 17.87 - lr: 0.010000\n",
-      "2021-09-08 02:35:49,004 epoch 7 - iter 104/130 - loss 0.63794373 - samples/sec: 17.71 - lr: 0.010000\n",
-      "2021-09-08 02:35:49,730 epoch 7 - iter 117/130 - loss 0.63671209 - samples/sec: 17.92 - lr: 0.010000\n",
-      "2021-09-08 02:35:50,467 epoch 7 - iter 130/130 - loss 0.63745171 - samples/sec: 17.65 - lr: 0.010000\n",
-      "2021-09-08 02:35:50,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:50,469 EPOCH 7 done: loss 0.6375 - lr 0.0100000\n",
-      "2021-09-08 02:35:50,930 DEV : loss 0.4389827847480774 - score 0.0\n",
-      "2021-09-08 02:35:50,931 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:35:50,933 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:51,717 epoch 8 - iter 13/130 - loss 0.63942166 - samples/sec: 17.86 - lr: 0.010000\n",
-      "2021-09-08 02:35:52,450 epoch 8 - iter 26/130 - loss 0.63750424 - samples/sec: 17.75 - lr: 0.010000\n",
-      "2021-09-08 02:35:53,177 epoch 8 - iter 39/130 - loss 0.63591416 - samples/sec: 17.91 - lr: 0.010000\n",
-      "2021-09-08 02:35:53,911 epoch 8 - iter 52/130 - loss 0.63655831 - samples/sec: 17.72 - lr: 0.010000\n",
-      "2021-09-08 02:35:54,627 epoch 8 - iter 65/130 - loss 0.63572409 - samples/sec: 18.18 - lr: 0.010000\n",
-      "2021-09-08 02:35:55,365 epoch 8 - iter 78/130 - loss 0.64154341 - samples/sec: 17.64 - lr: 0.010000\n",
-      "2021-09-08 02:35:56,086 epoch 8 - iter 91/130 - loss 0.63806958 - samples/sec: 18.05 - lr: 0.010000\n",
-      "2021-09-08 02:35:56,804 epoch 8 - iter 104/130 - loss 0.63573417 - samples/sec: 18.13 - lr: 0.010000\n",
-      "2021-09-08 02:35:57,535 epoch 8 - iter 117/130 - loss 0.63684510 - samples/sec: 17.79 - lr: 0.010000\n",
-      "2021-09-08 02:35:58,258 epoch 8 - iter 130/130 - loss 0.63356675 - samples/sec: 18.02 - lr: 0.010000\n",
-      "2021-09-08 02:35:58,259 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:58,259 EPOCH 8 done: loss 0.6336 - lr 0.0100000\n",
-      "2021-09-08 02:35:58,737 DEV : loss 0.42103248834609985 - score 0.0\n",
-      "2021-09-08 02:35:58,738 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:35:58,740 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:35:59,490 epoch 9 - iter 13/130 - loss 0.63310095 - samples/sec: 17.92 - lr: 0.010000\n",
-      "2021-09-08 02:36:00,210 epoch 9 - iter 26/130 - loss 0.65104126 - samples/sec: 18.07 - lr: 0.010000\n",
-      "2021-09-08 02:36:00,927 epoch 9 - iter 39/130 - loss 0.64730823 - samples/sec: 18.16 - lr: 0.010000\n",
-      "2021-09-08 02:36:01,646 epoch 9 - iter 52/130 - loss 0.63642284 - samples/sec: 18.08 - lr: 0.010000\n",
-      "2021-09-08 02:36:02,367 epoch 9 - iter 65/130 - loss 0.63492247 - samples/sec: 18.06 - lr: 0.010000\n",
-      "2021-09-08 02:36:02,999 epoch 9 - iter 78/130 - loss 0.63142747 - samples/sec: 20.60 - lr: 0.010000\n",
-      "2021-09-08 02:36:03,613 epoch 9 - iter 91/130 - loss 0.63317925 - samples/sec: 21.21 - lr: 0.010000\n",
-      "2021-09-08 02:36:04,243 epoch 9 - iter 104/130 - loss 0.63711370 - samples/sec: 20.64 - lr: 0.010000\n",
-      "2021-09-08 02:36:04,848 epoch 9 - iter 117/130 - loss 0.64013249 - samples/sec: 21.51 - lr: 0.010000\n",
-      "2021-09-08 02:36:05,466 epoch 9 - iter 130/130 - loss 0.64142896 - samples/sec: 21.07 - lr: 0.010000\n",
-      "2021-09-08 02:36:05,467 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:05,468 EPOCH 9 done: loss 0.6414 - lr 0.0100000\n",
-      "2021-09-08 02:36:05,939 DEV : loss 0.3971840441226959 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 02:36:05,940 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:36:05,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:06,606 epoch 10 - iter 13/130 - loss 0.66073349 - samples/sec: 21.10 - lr: 0.005000\n",
-      "2021-09-08 02:36:07,226 epoch 10 - iter 26/130 - loss 0.64179616 - samples/sec: 21.00 - lr: 0.005000\n",
-      "2021-09-08 02:36:07,851 epoch 10 - iter 39/130 - loss 0.64555396 - samples/sec: 20.84 - lr: 0.005000\n",
-      "2021-09-08 02:36:08,462 epoch 10 - iter 52/130 - loss 0.65164818 - samples/sec: 21.28 - lr: 0.005000\n",
-      "2021-09-08 02:36:09,079 epoch 10 - iter 65/130 - loss 0.65182398 - samples/sec: 21.11 - lr: 0.005000\n",
-      "2021-09-08 02:36:09,694 epoch 10 - iter 78/130 - loss 0.64755390 - samples/sec: 21.15 - lr: 0.005000\n",
-      "2021-09-08 02:36:10,309 epoch 10 - iter 91/130 - loss 0.65008232 - samples/sec: 21.16 - lr: 0.005000\n",
-      "2021-09-08 02:36:10,921 epoch 10 - iter 104/130 - loss 0.64762347 - samples/sec: 21.29 - lr: 0.005000\n",
-      "2021-09-08 02:36:11,534 epoch 10 - iter 117/130 - loss 0.64906460 - samples/sec: 21.24 - lr: 0.005000\n",
-      "2021-09-08 02:36:12,136 epoch 10 - iter 130/130 - loss 0.65030347 - samples/sec: 21.61 - lr: 0.005000\n",
-      "2021-09-08 02:36:12,137 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:12,138 EPOCH 10 done: loss 0.6503 - lr 0.0050000\n",
-      "2021-09-08 02:36:12,588 DEV : loss 0.45291081070899963 - score 0.0\n",
-      "2021-09-08 02:36:12,589 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:36:16,568 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:16,568 Testing using best model ...\n",
-      "2021-09-08 02:36:16,570 loading file None/best-model.pt\n",
+      "2021-09-21 22:15:28,158 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:28,158 EPOCH 6 done: loss 0.6486 - lr 0.0200000\n",
+      "2021-09-21 22:15:29,021 DEV : loss 0.5670293569564819 - score 0.0714\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:15:29,022 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:15:29,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:29,851 epoch 7 - iter 13/130 - loss 0.61820995 - samples/sec: 16.45 - lr: 0.010000\n",
+      "2021-09-21 22:15:30,634 epoch 7 - iter 26/130 - loss 0.63261315 - samples/sec: 16.63 - lr: 0.010000\n",
+      "2021-09-21 22:15:31,232 epoch 7 - iter 39/130 - loss 0.63588305 - samples/sec: 21.77 - lr: 0.010000\n",
+      "2021-09-21 22:15:31,810 epoch 7 - iter 52/130 - loss 0.63873871 - samples/sec: 22.53 - lr: 0.010000\n",
+      "2021-09-21 22:15:32,380 epoch 7 - iter 65/130 - loss 0.64087232 - samples/sec: 22.85 - lr: 0.010000\n",
+      "2021-09-21 22:15:32,952 epoch 7 - iter 78/130 - loss 0.63910860 - samples/sec: 22.77 - lr: 0.010000\n",
+      "2021-09-21 22:15:33,521 epoch 7 - iter 91/130 - loss 0.63881440 - samples/sec: 22.88 - lr: 0.010000\n",
+      "2021-09-21 22:15:34,089 epoch 7 - iter 104/130 - loss 0.64258033 - samples/sec: 22.90 - lr: 0.010000\n",
+      "2021-09-21 22:15:34,649 epoch 7 - iter 117/130 - loss 0.64365313 - samples/sec: 23.24 - lr: 0.010000\n",
+      "2021-09-21 22:15:35,232 epoch 7 - iter 130/130 - loss 0.64143901 - samples/sec: 22.34 - lr: 0.010000\n",
+      "2021-09-21 22:15:35,234 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:35,234 EPOCH 7 done: loss 0.6414 - lr 0.0100000\n",
+      "2021-09-21 22:15:35,592 DEV : loss 0.41936400532722473 - score 0.0714\n",
+      "2021-09-21 22:15:35,593 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:15:35,596 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:36,181 epoch 8 - iter 13/130 - loss 0.64585215 - samples/sec: 23.07 - lr: 0.010000\n",
+      "2021-09-21 22:15:36,744 epoch 8 - iter 26/130 - loss 0.65286716 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 22:15:37,298 epoch 8 - iter 39/130 - loss 0.64806202 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 22:15:37,851 epoch 8 - iter 52/130 - loss 0.64727586 - samples/sec: 23.52 - lr: 0.010000\n",
+      "2021-09-21 22:15:38,408 epoch 8 - iter 65/130 - loss 0.64533117 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 22:15:38,967 epoch 8 - iter 78/130 - loss 0.64583747 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 22:15:39,616 epoch 8 - iter 91/130 - loss 0.64207670 - samples/sec: 20.07 - lr: 0.010000\n",
+      "2021-09-21 22:15:40,373 epoch 8 - iter 104/130 - loss 0.64296504 - samples/sec: 17.19 - lr: 0.010000\n",
+      "2021-09-21 22:15:41,174 epoch 8 - iter 117/130 - loss 0.64342809 - samples/sec: 16.26 - lr: 0.010000\n",
+      "2021-09-21 22:15:41,941 epoch 8 - iter 130/130 - loss 0.64466890 - samples/sec: 16.96 - lr: 0.010000\n",
+      "2021-09-21 22:15:41,942 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:41,942 EPOCH 8 done: loss 0.6447 - lr 0.0100000\n",
+      "2021-09-21 22:15:42,642 DEV : loss 0.4468749165534973 - score 0.0714\n",
+      "2021-09-21 22:15:42,643 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:15:42,645 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:43,444 epoch 9 - iter 13/130 - loss 0.65524629 - samples/sec: 16.95 - lr: 0.010000\n",
+      "2021-09-21 22:15:44,186 epoch 9 - iter 26/130 - loss 0.64552323 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 22:15:44,975 epoch 9 - iter 39/130 - loss 0.65251868 - samples/sec: 16.49 - lr: 0.010000\n",
+      "2021-09-21 22:15:45,765 epoch 9 - iter 52/130 - loss 0.65129813 - samples/sec: 16.46 - lr: 0.010000\n",
+      "2021-09-21 22:15:46,589 epoch 9 - iter 65/130 - loss 0.65294780 - samples/sec: 15.79 - lr: 0.010000\n",
+      "2021-09-21 22:15:47,360 epoch 9 - iter 78/130 - loss 0.65143301 - samples/sec: 16.88 - lr: 0.010000\n",
+      "2021-09-21 22:15:48,124 epoch 9 - iter 91/130 - loss 0.64754673 - samples/sec: 17.04 - lr: 0.010000\n",
+      "2021-09-21 22:15:48,897 epoch 9 - iter 104/130 - loss 0.64211076 - samples/sec: 16.82 - lr: 0.010000\n",
+      "2021-09-21 22:15:49,709 epoch 9 - iter 117/130 - loss 0.64233569 - samples/sec: 16.04 - lr: 0.010000\n",
+      "2021-09-21 22:15:50,482 epoch 9 - iter 130/130 - loss 0.64207091 - samples/sec: 16.82 - lr: 0.010000\n",
+      "2021-09-21 22:15:50,483 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:50,484 EPOCH 9 done: loss 0.6421 - lr 0.0100000\n",
+      "2021-09-21 22:15:51,094 DEV : loss 0.41113176941871643 - score 0.1429\n",
+      "2021-09-21 22:15:51,095 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:15:51,097 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:51,960 epoch 10 - iter 13/130 - loss 0.63973678 - samples/sec: 16.46 - lr: 0.010000\n",
+      "2021-09-21 22:15:52,756 epoch 10 - iter 26/130 - loss 0.64943762 - samples/sec: 16.35 - lr: 0.010000\n",
+      "2021-09-21 22:15:53,557 epoch 10 - iter 39/130 - loss 0.64594430 - samples/sec: 16.24 - lr: 0.010000\n",
+      "2021-09-21 22:15:54,297 epoch 10 - iter 52/130 - loss 0.64582978 - samples/sec: 17.59 - lr: 0.010000\n",
+      "2021-09-21 22:15:55,065 epoch 10 - iter 65/130 - loss 0.64832665 - samples/sec: 16.95 - lr: 0.010000\n",
+      "2021-09-21 22:15:55,845 epoch 10 - iter 78/130 - loss 0.64786513 - samples/sec: 16.67 - lr: 0.010000\n",
+      "2021-09-21 22:15:56,629 epoch 10 - iter 91/130 - loss 0.64279178 - samples/sec: 16.60 - lr: 0.010000\n",
+      "2021-09-21 22:15:57,441 epoch 10 - iter 104/130 - loss 0.64302129 - samples/sec: 16.02 - lr: 0.010000\n",
+      "2021-09-21 22:15:58,165 epoch 10 - iter 117/130 - loss 0.64195941 - samples/sec: 17.98 - lr: 0.010000\n",
+      "2021-09-21 22:15:59,075 epoch 10 - iter 130/130 - loss 0.64167888 - samples/sec: 14.30 - lr: 0.010000\n",
+      "2021-09-21 22:15:59,076 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:15:59,076 EPOCH 10 done: loss 0.6417 - lr 0.0100000\n",
+      "2021-09-21 22:15:59,835 DEV : loss 0.4487608075141907 - score 0.1429\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 22:15:59,835 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:16:03,400 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:03,401 Testing using best model ...\n",
+      "2021-09-21 22:16:03,403 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:36:21,705 \t0.0\n",
-      "2021-09-08 02:36:21,705 \n",
+      "2021-09-21 22:16:08,268 \t0.0\n",
+      "2021-09-21 22:16:08,269 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -7001,21 +7006,21 @@
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
       "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         1\n",
-      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         1\n",
+      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
       "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         1\n",
       "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
-      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         1\n",
+      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         2\n",
       "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         2\n",
-      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         1\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         2\n",
-      "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
-      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         1\n",
+      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         2\n",
+      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
+      "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         1\n",
+      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         2\n",
       "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         1\n",
       "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         2\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
+      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         1\n",
       "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         1\n",
       "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         2\n",
+      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
       "\n",
       "                                                                                                                             micro avg     0.0000    0.0000    0.0000        16\n",
       "                                                                                                                             macro avg     0.0000    0.0000    0.0000        16\n",
@@ -7028,26 +7033,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:36:21,705 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:34,169 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:16:08,269 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:27,131 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:36:38,246 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:16:31,194 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 67965.41it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 74615.38it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:36:38,250 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
-      "2021-09-08 02:36:38,259 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:38,261 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:16:31,198 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 22:16:31,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:31,209 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7360,25 +7365,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:36:38,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:38,262 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:36:38,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:38,263 Parameters:\n",
-      "2021-09-08 02:36:38,263  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:36:38,263  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:36:38,263  - patience: \"3\"\n",
-      "2021-09-08 02:36:38,264  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:36:38,264  - max_epochs: \"10\"\n",
-      "2021-09-08 02:36:38,264  - shuffle: \"True\"\n",
-      "2021-09-08 02:36:38,265  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:36:38,265  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:36:38,265 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:38,265 Model training base path: \"None\"\n",
-      "2021-09-08 02:36:38,266 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:38,266 Device: cuda:0\n",
-      "2021-09-08 02:36:38,266 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:38,266 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:36:38,274 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:16:31,209 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:31,210 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:16:31,210 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:31,210 Parameters:\n",
+      "2021-09-21 22:16:31,211  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:16:31,211  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:16:31,211  - patience: \"3\"\n",
+      "2021-09-21 22:16:31,212  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:16:31,212  - max_epochs: \"10\"\n",
+      "2021-09-21 22:16:31,212  - shuffle: \"True\"\n",
+      "2021-09-21 22:16:31,212  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:16:31,213  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:16:31,213 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:31,213 Model training base path: \"None\"\n",
+      "2021-09-21 22:16:31,213 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:31,214 Device: cuda:0\n",
+      "2021-09-21 22:16:31,214 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:31,214 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:16:31,220 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7392,200 +7397,201 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:36:38,896 epoch 1 - iter 13/130 - loss 0.46003193 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 02:36:39,499 epoch 1 - iter 26/130 - loss 0.62000166 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 02:36:40,107 epoch 1 - iter 39/130 - loss 0.59766270 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 02:36:40,695 epoch 1 - iter 52/130 - loss 0.59855118 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 02:36:41,301 epoch 1 - iter 65/130 - loss 0.58943898 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 02:36:41,900 epoch 1 - iter 78/130 - loss 0.60291275 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 02:36:42,509 epoch 1 - iter 91/130 - loss 0.62594817 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 02:36:43,107 epoch 1 - iter 104/130 - loss 0.60080635 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 02:36:43,708 epoch 1 - iter 117/130 - loss 0.62795275 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 02:36:44,318 epoch 1 - iter 130/130 - loss 0.62061229 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 02:36:44,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:44,319 EPOCH 1 done: loss 0.6206 - lr 0.0200000\n",
-      "2021-09-08 02:36:44,654 DEV : loss 0.4686543345451355 - score 0.0714\n",
-      "2021-09-08 02:36:44,654 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:16:32,118 epoch 1 - iter 13/130 - loss 0.51246792 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 22:16:32,908 epoch 1 - iter 26/130 - loss 0.53209273 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 22:16:33,737 epoch 1 - iter 39/130 - loss 0.59177453 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 22:16:34,562 epoch 1 - iter 52/130 - loss 0.54101238 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 22:16:35,371 epoch 1 - iter 65/130 - loss 0.54557719 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 22:16:36,192 epoch 1 - iter 78/130 - loss 0.50486362 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 22:16:36,949 epoch 1 - iter 91/130 - loss 0.49668283 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 22:16:37,765 epoch 1 - iter 104/130 - loss 0.53381992 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 22:16:38,538 epoch 1 - iter 117/130 - loss 0.52709474 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 22:16:39,340 epoch 1 - iter 130/130 - loss 0.51726769 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 22:16:39,341 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:39,342 EPOCH 1 done: loss 0.5173 - lr 0.0200000\n",
+      "2021-09-21 22:16:39,958 DEV : loss 0.4396098852157593 - score 0.2143\n",
+      "2021-09-21 22:16:39,959 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:36:48,338 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:49,064 epoch 2 - iter 13/130 - loss 0.78682399 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 02:36:49,666 epoch 2 - iter 26/130 - loss 0.72618034 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 02:36:50,250 epoch 2 - iter 39/130 - loss 0.71099179 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 02:36:50,976 epoch 2 - iter 52/130 - loss 0.69710496 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 02:36:51,831 epoch 2 - iter 65/130 - loss 0.68854490 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 02:36:52,525 epoch 2 - iter 78/130 - loss 0.67989459 - samples/sec: 18.76 - lr: 0.020000\n",
-      "2021-09-08 02:36:53,362 epoch 2 - iter 91/130 - loss 0.67541533 - samples/sec: 15.54 - lr: 0.020000\n",
-      "2021-09-08 02:36:54,213 epoch 2 - iter 104/130 - loss 0.67437555 - samples/sec: 15.29 - lr: 0.020000\n",
-      "2021-09-08 02:36:55,046 epoch 2 - iter 117/130 - loss 0.67032493 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 02:36:55,889 epoch 2 - iter 130/130 - loss 0.66896623 - samples/sec: 15.44 - lr: 0.020000\n",
-      "2021-09-08 02:36:55,890 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:55,891 EPOCH 2 done: loss 0.6690 - lr 0.0200000\n",
-      "2021-09-08 02:36:56,877 DEV : loss 0.3891511559486389 - score 0.0\n",
-      "2021-09-08 02:36:56,878 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:36:56,887 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:36:57,820 epoch 3 - iter 13/130 - loss 0.64690389 - samples/sec: 14.90 - lr: 0.020000\n",
-      "2021-09-08 02:36:58,700 epoch 3 - iter 26/130 - loss 0.65440385 - samples/sec: 14.78 - lr: 0.020000\n",
-      "2021-09-08 02:36:59,551 epoch 3 - iter 39/130 - loss 0.65435137 - samples/sec: 15.29 - lr: 0.020000\n",
-      "2021-09-08 02:37:00,391 epoch 3 - iter 52/130 - loss 0.64264612 - samples/sec: 15.49 - lr: 0.020000\n",
-      "2021-09-08 02:37:01,233 epoch 3 - iter 65/130 - loss 0.64345721 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 02:37:02,080 epoch 3 - iter 78/130 - loss 0.64283171 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 02:37:02,918 epoch 3 - iter 91/130 - loss 0.64415541 - samples/sec: 15.53 - lr: 0.020000\n",
-      "2021-09-08 02:37:03,750 epoch 3 - iter 104/130 - loss 0.64318885 - samples/sec: 15.65 - lr: 0.020000\n",
-      "2021-09-08 02:37:04,596 epoch 3 - iter 117/130 - loss 0.64057973 - samples/sec: 15.39 - lr: 0.020000\n",
-      "2021-09-08 02:37:05,453 epoch 3 - iter 130/130 - loss 0.64331976 - samples/sec: 15.18 - lr: 0.020000\n",
-      "2021-09-08 02:37:05,454 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:05,455 EPOCH 3 done: loss 0.6433 - lr 0.0200000\n",
-      "2021-09-08 02:37:06,201 DEV : loss 0.4634826183319092 - score 0.2857\n",
-      "2021-09-08 02:37:06,202 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:16:44,020 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:44,862 epoch 2 - iter 13/130 - loss 0.44015595 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 22:16:45,688 epoch 2 - iter 26/130 - loss 0.60916928 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 22:16:46,534 epoch 2 - iter 39/130 - loss 0.59633113 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 22:16:47,325 epoch 2 - iter 52/130 - loss 0.60076216 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 22:16:48,128 epoch 2 - iter 65/130 - loss 0.58825489 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 22:16:48,930 epoch 2 - iter 78/130 - loss 0.59215672 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 22:16:49,732 epoch 2 - iter 91/130 - loss 0.60564027 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 22:16:50,497 epoch 2 - iter 104/130 - loss 0.62022666 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 22:16:51,271 epoch 2 - iter 117/130 - loss 0.62099427 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 22:16:51,901 epoch 2 - iter 130/130 - loss 0.61947766 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 22:16:51,902 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:16:51,903 EPOCH 2 done: loss 0.6195 - lr 0.0200000\n",
+      "2021-09-21 22:16:52,335 DEV : loss 0.29296910762786865 - score 0.2857\n",
+      "2021-09-21 22:16:52,335 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:37:10,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:11,235 epoch 4 - iter 13/130 - loss 0.61344713 - samples/sec: 14.57 - lr: 0.020000\n",
-      "2021-09-08 02:37:12,113 epoch 4 - iter 26/130 - loss 0.61376650 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 02:37:12,995 epoch 4 - iter 39/130 - loss 0.62481511 - samples/sec: 14.77 - lr: 0.020000\n",
-      "2021-09-08 02:37:13,859 epoch 4 - iter 52/130 - loss 0.62701459 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 02:37:14,652 epoch 4 - iter 65/130 - loss 0.63389146 - samples/sec: 16.42 - lr: 0.020000\n",
-      "2021-09-08 02:37:15,436 epoch 4 - iter 78/130 - loss 0.62988880 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 02:37:16,300 epoch 4 - iter 91/130 - loss 0.63393115 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 02:37:17,169 epoch 4 - iter 104/130 - loss 0.62087276 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 02:37:18,045 epoch 4 - iter 117/130 - loss 0.62637180 - samples/sec: 14.86 - lr: 0.020000\n",
-      "2021-09-08 02:37:18,931 epoch 4 - iter 130/130 - loss 0.62404931 - samples/sec: 14.68 - lr: 0.020000\n",
-      "2021-09-08 02:37:18,932 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:18,933 EPOCH 4 done: loss 0.6240 - lr 0.0200000\n",
-      "2021-09-08 02:37:19,689 DEV : loss 0.4623728096485138 - score 0.2143\n",
-      "2021-09-08 02:37:19,689 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:37:19,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:20,529 epoch 5 - iter 13/130 - loss 0.61367602 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 02:37:21,295 epoch 5 - iter 26/130 - loss 0.58649112 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 02:37:21,901 epoch 5 - iter 39/130 - loss 0.58811810 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 02:37:22,513 epoch 5 - iter 52/130 - loss 0.60103309 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 02:37:23,120 epoch 5 - iter 65/130 - loss 0.59473716 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 02:37:23,729 epoch 5 - iter 78/130 - loss 0.59479114 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 02:37:24,333 epoch 5 - iter 91/130 - loss 0.58207784 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 02:37:24,939 epoch 5 - iter 104/130 - loss 0.57538551 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 02:37:25,543 epoch 5 - iter 117/130 - loss 0.58160596 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 02:37:26,144 epoch 5 - iter 130/130 - loss 0.57025845 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 02:37:26,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:26,146 EPOCH 5 done: loss 0.5703 - lr 0.0200000\n",
-      "2021-09-08 02:37:26,461 DEV : loss 0.3656441867351532 - score 0.2143\n",
-      "2021-09-08 02:37:26,462 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:37:26,463 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:27,090 epoch 6 - iter 13/130 - loss 0.54043339 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 02:37:27,693 epoch 6 - iter 26/130 - loss 0.50760705 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 02:37:28,292 epoch 6 - iter 39/130 - loss 0.46219171 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 02:37:28,887 epoch 6 - iter 52/130 - loss 0.46902660 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 02:37:29,485 epoch 6 - iter 65/130 - loss 0.51539859 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 02:37:30,081 epoch 6 - iter 78/130 - loss 0.50724856 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 02:37:30,678 epoch 6 - iter 91/130 - loss 0.48713874 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 02:37:31,283 epoch 6 - iter 104/130 - loss 0.51523302 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 02:37:31,874 epoch 6 - iter 117/130 - loss 0.50935024 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 02:37:32,489 epoch 6 - iter 130/130 - loss 0.51030178 - samples/sec: 21.17 - lr: 0.020000\n"
+      "2021-09-21 22:16:59,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:00,076 epoch 3 - iter 13/130 - loss 0.34903161 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 22:17:00,876 epoch 3 - iter 26/130 - loss 0.41034911 - samples/sec: 16.26 - lr: 0.020000\n",
+      "2021-09-21 22:17:01,689 epoch 3 - iter 39/130 - loss 0.46306919 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 22:17:02,497 epoch 3 - iter 52/130 - loss 0.46148971 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 22:17:03,290 epoch 3 - iter 65/130 - loss 0.49370920 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 22:17:04,095 epoch 3 - iter 78/130 - loss 0.53067796 - samples/sec: 16.17 - lr: 0.020000\n",
+      "2021-09-21 22:17:04,928 epoch 3 - iter 91/130 - loss 0.53334846 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 22:17:05,730 epoch 3 - iter 104/130 - loss 0.54254827 - samples/sec: 16.23 - lr: 0.020000\n",
+      "2021-09-21 22:17:06,550 epoch 3 - iter 117/130 - loss 0.54039151 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 22:17:07,345 epoch 3 - iter 130/130 - loss 0.54045091 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 22:17:07,346 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:07,346 EPOCH 3 done: loss 0.5405 - lr 0.0200000\n",
+      "2021-09-21 22:17:08,125 DEV : loss 0.5640403628349304 - score 0.2857\n",
+      "2021-09-21 22:17:08,128 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:17:08,130 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:08,953 epoch 4 - iter 13/130 - loss 0.34952349 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 22:17:09,800 epoch 4 - iter 26/130 - loss 0.44089914 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 22:17:10,582 epoch 4 - iter 39/130 - loss 0.39980897 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 22:17:11,385 epoch 4 - iter 52/130 - loss 0.41279202 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 22:17:12,194 epoch 4 - iter 65/130 - loss 0.45276397 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 22:17:12,961 epoch 4 - iter 78/130 - loss 0.47201276 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 22:17:13,760 epoch 4 - iter 91/130 - loss 0.49059013 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 22:17:14,552 epoch 4 - iter 104/130 - loss 0.47026878 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 22:17:15,346 epoch 4 - iter 117/130 - loss 0.46457559 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 22:17:16,156 epoch 4 - iter 130/130 - loss 0.43644603 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 22:17:16,157 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:16,158 EPOCH 4 done: loss 0.4364 - lr 0.0200000\n",
+      "2021-09-21 22:17:16,838 DEV : loss 0.40249064564704895 - score 0.2857\n",
+      "2021-09-21 22:17:16,840 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:17:16,842 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:17,718 epoch 5 - iter 13/130 - loss 0.79608287 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 22:17:18,521 epoch 5 - iter 26/130 - loss 0.73598770 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 22:17:19,238 epoch 5 - iter 39/130 - loss 0.63493897 - samples/sec: 18.17 - lr: 0.020000\n",
+      "2021-09-21 22:17:19,844 epoch 5 - iter 52/130 - loss 0.55776075 - samples/sec: 21.49 - lr: 0.020000\n",
+      "2021-09-21 22:17:20,413 epoch 5 - iter 65/130 - loss 0.47131997 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:17:20,981 epoch 5 - iter 78/130 - loss 0.44208951 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 22:17:21,557 epoch 5 - iter 91/130 - loss 0.44354632 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 22:17:22,136 epoch 5 - iter 104/130 - loss 0.42712491 - samples/sec: 22.49 - lr: 0.020000\n",
+      "2021-09-21 22:17:22,709 epoch 5 - iter 117/130 - loss 0.41647681 - samples/sec: 22.71 - lr: 0.020000\n",
+      "2021-09-21 22:17:23,302 epoch 5 - iter 130/130 - loss 0.44868283 - samples/sec: 21.94 - lr: 0.020000\n",
+      "2021-09-21 22:17:23,303 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:23,304 EPOCH 5 done: loss 0.4487 - lr 0.0200000\n",
+      "2021-09-21 22:17:23,673 DEV : loss 0.5303949117660522 - score 0.3571\n",
+      "2021-09-21 22:17:23,674 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:17:27,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:28,267 epoch 6 - iter 13/130 - loss 0.26211048 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 22:17:28,829 epoch 6 - iter 26/130 - loss 0.19928729 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 22:17:29,400 epoch 6 - iter 39/130 - loss 0.22584620 - samples/sec: 22.82 - lr: 0.020000\n",
+      "2021-09-21 22:17:29,969 epoch 6 - iter 52/130 - loss 0.24701530 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 22:17:30,547 epoch 6 - iter 65/130 - loss 0.24469809 - samples/sec: 22.50 - lr: 0.020000\n",
+      "2021-09-21 22:17:31,107 epoch 6 - iter 78/130 - loss 0.24307458 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 22:17:31,672 epoch 6 - iter 91/130 - loss 0.22599738 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 22:17:32,245 epoch 6 - iter 104/130 - loss 0.27258229 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 22:17:32,818 epoch 6 - iter 117/130 - loss 0.30053210 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 22:17:33,390 epoch 6 - iter 130/130 - loss 0.30735990 - samples/sec: 22.72 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:37:32,490 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:32,491 EPOCH 6 done: loss 0.5103 - lr 0.0200000\n",
-      "2021-09-08 02:37:32,814 DEV : loss 0.2979331612586975 - score 0.5\n",
-      "2021-09-08 02:37:32,815 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:17:33,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:33,392 EPOCH 6 done: loss 0.3074 - lr 0.0200000\n",
+      "2021-09-21 22:17:33,721 DEV : loss 0.37914684414863586 - score 0.5\n",
+      "2021-09-21 22:17:33,722 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:37:37,034 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:37,809 epoch 7 - iter 13/130 - loss 0.32822723 - samples/sec: 17.34 - lr: 0.020000\n",
-      "2021-09-08 02:37:38,668 epoch 7 - iter 26/130 - loss 0.30150058 - samples/sec: 15.15 - lr: 0.020000\n",
-      "2021-09-08 02:37:39,482 epoch 7 - iter 39/130 - loss 0.36145397 - samples/sec: 15.98 - lr: 0.020000\n",
-      "2021-09-08 02:37:40,315 epoch 7 - iter 52/130 - loss 0.39673303 - samples/sec: 15.63 - lr: 0.020000\n",
-      "2021-09-08 02:37:41,158 epoch 7 - iter 65/130 - loss 0.34645757 - samples/sec: 15.44 - lr: 0.020000\n",
-      "2021-09-08 02:37:42,028 epoch 7 - iter 78/130 - loss 0.39887737 - samples/sec: 14.95 - lr: 0.020000\n",
-      "2021-09-08 02:37:42,865 epoch 7 - iter 91/130 - loss 0.43435546 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 02:37:43,696 epoch 7 - iter 104/130 - loss 0.42673775 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 02:37:44,537 epoch 7 - iter 117/130 - loss 0.40097203 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 02:37:45,342 epoch 7 - iter 130/130 - loss 0.38416445 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 02:37:45,343 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:45,344 EPOCH 7 done: loss 0.3842 - lr 0.0200000\n",
-      "2021-09-08 02:37:45,910 DEV : loss 0.5950483679771423 - score 0.2857\n",
-      "2021-09-08 02:37:45,910 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:37:45,912 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:46,784 epoch 8 - iter 13/130 - loss 0.16355293 - samples/sec: 15.77 - lr: 0.020000\n",
-      "2021-09-08 02:37:47,611 epoch 8 - iter 26/130 - loss 0.29677666 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 02:37:48,408 epoch 8 - iter 39/130 - loss 0.30782326 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 02:37:49,264 epoch 8 - iter 52/130 - loss 0.36455702 - samples/sec: 15.20 - lr: 0.020000\n",
-      "2021-09-08 02:37:50,073 epoch 8 - iter 65/130 - loss 0.35294759 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 02:37:50,897 epoch 8 - iter 78/130 - loss 0.36888132 - samples/sec: 15.79 - lr: 0.020000\n",
-      "2021-09-08 02:37:51,706 epoch 8 - iter 91/130 - loss 0.33468427 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 02:37:52,540 epoch 8 - iter 104/130 - loss 0.30798767 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 02:37:53,368 epoch 8 - iter 117/130 - loss 0.34073394 - samples/sec: 15.72 - lr: 0.020000\n",
-      "2021-09-08 02:37:54,184 epoch 8 - iter 130/130 - loss 0.32492687 - samples/sec: 15.94 - lr: 0.020000\n",
-      "2021-09-08 02:37:54,186 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:54,186 EPOCH 8 done: loss 0.3249 - lr 0.0200000\n",
-      "2021-09-08 02:37:54,848 DEV : loss 0.29556748270988464 - score 0.5\n",
-      "2021-09-08 02:37:54,849 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:17:37,710 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:38,302 epoch 7 - iter 13/130 - loss 0.21965428 - samples/sec: 22.92 - lr: 0.020000\n",
+      "2021-09-21 22:17:38,872 epoch 7 - iter 26/130 - loss 0.29822939 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 22:17:39,453 epoch 7 - iter 39/130 - loss 0.39860650 - samples/sec: 22.37 - lr: 0.020000\n",
+      "2021-09-21 22:17:40,034 epoch 7 - iter 52/130 - loss 0.45369980 - samples/sec: 22.40 - lr: 0.020000\n",
+      "2021-09-21 22:17:40,602 epoch 7 - iter 65/130 - loss 0.40230735 - samples/sec: 22.92 - lr: 0.020000\n",
+      "2021-09-21 22:17:41,171 epoch 7 - iter 78/130 - loss 0.39768219 - samples/sec: 22.85 - lr: 0.020000\n",
+      "2021-09-21 22:17:41,740 epoch 7 - iter 91/130 - loss 0.37403433 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 22:17:42,310 epoch 7 - iter 104/130 - loss 0.37071235 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 22:17:42,868 epoch 7 - iter 117/130 - loss 0.34680954 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 22:17:43,556 epoch 7 - iter 130/130 - loss 0.32505444 - samples/sec: 18.92 - lr: 0.020000\n",
+      "2021-09-21 22:17:43,557 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:43,558 EPOCH 7 done: loss 0.3251 - lr 0.0200000\n",
+      "2021-09-21 22:17:43,885 DEV : loss 0.45000481605529785 - score 0.3571\n",
+      "2021-09-21 22:17:43,886 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:17:43,888 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:44,468 epoch 8 - iter 13/130 - loss 0.35061605 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 22:17:45,042 epoch 8 - iter 26/130 - loss 0.44012504 - samples/sec: 22.68 - lr: 0.020000\n",
+      "2021-09-21 22:17:45,605 epoch 8 - iter 39/130 - loss 0.35853658 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 22:17:46,162 epoch 8 - iter 52/130 - loss 0.29236794 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 22:17:46,727 epoch 8 - iter 65/130 - loss 0.28816967 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 22:17:47,280 epoch 8 - iter 78/130 - loss 0.26256676 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 22:17:47,846 epoch 8 - iter 91/130 - loss 0.27623596 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 22:17:48,406 epoch 8 - iter 104/130 - loss 0.24435668 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 22:17:48,968 epoch 8 - iter 117/130 - loss 0.26931700 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 22:17:49,528 epoch 8 - iter 130/130 - loss 0.24777868 - samples/sec: 23.26 - lr: 0.020000\n",
+      "2021-09-21 22:17:49,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:49,529 EPOCH 8 done: loss 0.2478 - lr 0.0200000\n",
+      "2021-09-21 22:17:49,858 DEV : loss 0.5577907562255859 - score 0.3571\n",
+      "2021-09-21 22:17:49,858 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:17:49,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:50,448 epoch 9 - iter 13/130 - loss 0.26945050 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:17:51,006 epoch 9 - iter 26/130 - loss 0.19633312 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 22:17:51,571 epoch 9 - iter 39/130 - loss 0.22802272 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 22:17:52,139 epoch 9 - iter 52/130 - loss 0.21947134 - samples/sec: 22.92 - lr: 0.020000\n",
+      "2021-09-21 22:17:52,701 epoch 9 - iter 65/130 - loss 0.21506085 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 22:17:53,255 epoch 9 - iter 78/130 - loss 0.18089904 - samples/sec: 23.48 - lr: 0.020000\n",
+      "2021-09-21 22:17:53,810 epoch 9 - iter 91/130 - loss 0.15610206 - samples/sec: 23.44 - lr: 0.020000\n",
+      "2021-09-21 22:17:54,363 epoch 9 - iter 104/130 - loss 0.13732538 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 22:17:54,922 epoch 9 - iter 117/130 - loss 0.13816804 - samples/sec: 23.29 - lr: 0.020000\n",
+      "2021-09-21 22:17:55,480 epoch 9 - iter 130/130 - loss 0.14581659 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 22:17:55,481 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:17:55,481 EPOCH 9 done: loss 0.1458 - lr 0.0200000\n",
+      "2021-09-21 22:17:55,810 DEV : loss 0.2750832438468933 - score 0.5714\n",
+      "2021-09-21 22:17:55,811 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:37:58,918 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:37:59,761 epoch 9 - iter 13/130 - loss 0.13881880 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 02:38:00,618 epoch 9 - iter 26/130 - loss 0.25556388 - samples/sec: 15.19 - lr: 0.020000\n",
-      "2021-09-08 02:38:01,462 epoch 9 - iter 39/130 - loss 0.30191899 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 02:38:02,273 epoch 9 - iter 52/130 - loss 0.30018973 - samples/sec: 16.04 - lr: 0.020000\n",
-      "2021-09-08 02:38:03,081 epoch 9 - iter 65/130 - loss 0.31237855 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 02:38:03,877 epoch 9 - iter 78/130 - loss 0.29034929 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 02:38:04,729 epoch 9 - iter 91/130 - loss 0.25371707 - samples/sec: 15.26 - lr: 0.020000\n",
-      "2021-09-08 02:38:05,529 epoch 9 - iter 104/130 - loss 0.31094630 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 02:38:06,376 epoch 9 - iter 117/130 - loss 0.31034712 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 02:38:07,231 epoch 9 - iter 130/130 - loss 0.29883753 - samples/sec: 15.22 - lr: 0.020000\n",
-      "2021-09-08 02:38:07,232 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:07,233 EPOCH 9 done: loss 0.2988 - lr 0.0200000\n",
-      "2021-09-08 02:38:07,823 DEV : loss 0.3549293279647827 - score 0.3571\n",
-      "2021-09-08 02:38:07,825 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:38:07,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:08,695 epoch 10 - iter 13/130 - loss 0.10830257 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 02:38:09,526 epoch 10 - iter 26/130 - loss 0.17859559 - samples/sec: 15.67 - lr: 0.020000\n",
-      "2021-09-08 02:38:10,320 epoch 10 - iter 39/130 - loss 0.16221785 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 02:38:11,127 epoch 10 - iter 52/130 - loss 0.12638341 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 02:38:11,853 epoch 10 - iter 65/130 - loss 0.16293904 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 02:38:12,583 epoch 10 - iter 78/130 - loss 0.15818843 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 02:38:13,308 epoch 10 - iter 91/130 - loss 0.16576812 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 02:38:14,060 epoch 10 - iter 104/130 - loss 0.16251212 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 02:38:14,755 epoch 10 - iter 117/130 - loss 0.17454534 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 02:38:15,453 epoch 10 - iter 130/130 - loss 0.18573094 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 02:38:15,455 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:15,455 EPOCH 10 done: loss 0.1857 - lr 0.0200000\n",
-      "2021-09-08 02:38:16,101 DEV : loss 0.3549848198890686 - score 0.2857\n",
-      "2021-09-08 02:38:16,102 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:38:19,966 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:19,967 Testing using best model ...\n",
-      "2021-09-08 02:38:19,968 loading file None/best-model.pt\n",
+      "2021-09-21 22:17:59,555 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:00,143 epoch 10 - iter 13/130 - loss 0.04416619 - samples/sec: 23.07 - lr: 0.020000\n",
+      "2021-09-21 22:18:00,699 epoch 10 - iter 26/130 - loss 0.07600841 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 22:18:01,268 epoch 10 - iter 39/130 - loss 0.16313747 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 22:18:01,839 epoch 10 - iter 52/130 - loss 0.13426480 - samples/sec: 22.82 - lr: 0.020000\n",
+      "2021-09-21 22:18:02,414 epoch 10 - iter 65/130 - loss 0.19483942 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 22:18:02,973 epoch 10 - iter 78/130 - loss 0.18135246 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 22:18:03,530 epoch 10 - iter 91/130 - loss 0.17603986 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 22:18:04,086 epoch 10 - iter 104/130 - loss 0.15880447 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 22:18:04,646 epoch 10 - iter 117/130 - loss 0.16442761 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 22:18:05,204 epoch 10 - iter 130/130 - loss 0.15313675 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 22:18:05,205 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:05,205 EPOCH 10 done: loss 0.1531 - lr 0.0200000\n",
+      "2021-09-21 22:18:05,533 DEV : loss 0.37342369556427 - score 0.5\n",
+      "2021-09-21 22:18:05,534 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:18:15,224 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:15,225 Testing using best model ...\n",
+      "2021-09-21 22:18:15,227 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:38:25,530 \t0.4375\n",
-      "2021-09-08 02:38:25,530 \n",
+      "2021-09-21 22:18:20,268 \t0.5\n",
+      "2021-09-21 22:18:20,268 \n",
       "Results:\n",
-      "- F-score (micro) 0.4375\n",
-      "- F-score (macro) 0.2042\n",
-      "- Accuracy 0.4375\n",
+      "- F-score (micro) 0.5\n",
+      "- F-score (macro) 0.2319\n",
+      "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
-      "                                                                                                           undertake a journey or trip     1.0000    0.3333    0.5000         3\n",
-      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
-      "                                                                                        a healthy state of wellbeing free from disease     1.0000    0.5000    0.6667         2\n",
-      "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
+      "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         1\n",
+      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         1\n",
+      "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         1\n",
+      "                                                                                          an adult female person (as opposed to a man)     1.0000    1.0000    1.0000         1\n",
       " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
-      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         1\n",
+      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
       "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         0\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         1\n",
-      "                                                                            an activity that is diverting and that holds the attention     0.3333    1.0000    0.5000         1\n",
-      "                                                                                           a particular branch of scientific knowledge     0.6667    1.0000    0.8000         2\n",
-      "                                                                                                   the legal dissolution of a marriage     0.6667    1.0000    0.8000         2\n",
-      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         1\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         1\n",
-      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         2\n",
+      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
+      "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         1\n",
+      "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
+      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
+      "                                                          a strong belief in a supernatural power or powers that control human destiny     1.0000    0.6667    0.8000         3\n",
+      "                                                                       an active diversion requiring physical exertion and competition     1.0000    0.6667    0.8000         3\n",
+      "                                                                        social relations involving intrigue to gain authority or power     0.5000    1.0000    0.6667         1\n",
+      "                                                                                          light and humorous drama with a happy ending     0.4000    0.5000    0.4444         4\n",
       "\n",
-      "                                                                                                                             micro avg     0.4375    0.4375    0.4375        16\n",
-      "                                                                                                                             macro avg     0.2292    0.2396    0.2042        16\n",
-      "                                                                                                                          weighted avg     0.5000    0.4375    0.4083        16\n",
-      "                                                                                                                           samples avg     0.4375    0.4375    0.4375        16\n",
+      "                                                                                                                             micro avg     0.5000    0.5000    0.5000        16\n",
+      "                                                                                                                             macro avg     0.2437    0.2396    0.2319        16\n",
+      "                                                                                                                          weighted avg     0.5687    0.5000    0.5153        16\n",
+      "                                                                                                                           samples avg     0.5000    0.5000    0.5000        16\n",
       "\n"
      ]
     },
@@ -7593,26 +7599,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:38:25,531 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:49,399 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:18:20,269 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:32,760 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:38:53,685 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:18:36,664 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 76032.83it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 74262.48it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:38:53,689 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
-      "2021-09-08 02:38:53,698 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:53,699 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:18:36,667 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 22:18:36,676 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:36,678 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7925,25 +7931,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:38:53,700 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:53,700 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:38:53,701 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:53,701 Parameters:\n",
-      "2021-09-08 02:38:53,701  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:38:53,701  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:38:53,702  - patience: \"3\"\n",
-      "2021-09-08 02:38:53,702  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:38:53,702  - max_epochs: \"10\"\n",
-      "2021-09-08 02:38:53,703  - shuffle: \"True\"\n",
-      "2021-09-08 02:38:53,703  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:38:53,703  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:38:53,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:53,704 Model training base path: \"None\"\n",
-      "2021-09-08 02:38:53,704 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:53,704 Device: cuda:0\n",
-      "2021-09-08 02:38:53,705 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:38:53,705 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:38:53,712 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:18:36,679 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:36,679 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:18:36,680 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:36,680 Parameters:\n",
+      "2021-09-21 22:18:36,680  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:18:36,680  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:18:36,681  - patience: \"3\"\n",
+      "2021-09-21 22:18:36,681  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:18:36,681  - max_epochs: \"10\"\n",
+      "2021-09-21 22:18:36,681  - shuffle: \"True\"\n",
+      "2021-09-21 22:18:36,682  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:18:36,682  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:18:36,682 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:36,683 Model training base path: \"None\"\n",
+      "2021-09-21 22:18:36,683 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:36,683 Device: cuda:0\n",
+      "2021-09-21 22:18:36,683 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:36,684 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:18:36,690 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7957,202 +7963,202 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:38:54,509 epoch 1 - iter 13/130 - loss 0.38478973 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 02:38:55,236 epoch 1 - iter 26/130 - loss 0.39768543 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 02:38:55,989 epoch 1 - iter 39/130 - loss 0.47377267 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 02:38:56,745 epoch 1 - iter 52/130 - loss 0.50526681 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 02:38:57,497 epoch 1 - iter 65/130 - loss 0.57117976 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 02:38:58,257 epoch 1 - iter 78/130 - loss 0.55794054 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 02:38:59,030 epoch 1 - iter 91/130 - loss 0.51574751 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 02:38:59,843 epoch 1 - iter 104/130 - loss 0.51227428 - samples/sec: 16.00 - lr: 0.020000\n",
-      "2021-09-08 02:39:00,598 epoch 1 - iter 117/130 - loss 0.53745593 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 02:39:01,378 epoch 1 - iter 130/130 - loss 0.51700674 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 02:39:01,380 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:01,380 EPOCH 1 done: loss 0.5170 - lr 0.0200000\n",
-      "2021-09-08 02:39:01,947 DEV : loss 0.3362935781478882 - score 0.1429\n",
-      "2021-09-08 02:39:01,948 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 02:39:06,149 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:06,986 epoch 2 - iter 13/130 - loss 0.67900417 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 02:39:07,767 epoch 2 - iter 26/130 - loss 0.71050463 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 02:39:08,570 epoch 2 - iter 39/130 - loss 0.71459576 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 02:39:09,393 epoch 2 - iter 52/130 - loss 0.70010438 - samples/sec: 15.82 - lr: 0.020000\n",
-      "2021-09-08 02:39:10,159 epoch 2 - iter 65/130 - loss 0.67479225 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 02:39:10,916 epoch 2 - iter 78/130 - loss 0.68119608 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 02:39:11,715 epoch 2 - iter 91/130 - loss 0.67791274 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 02:39:12,500 epoch 2 - iter 104/130 - loss 0.68102979 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 02:39:13,278 epoch 2 - iter 117/130 - loss 0.68402305 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 02:39:14,084 epoch 2 - iter 130/130 - loss 0.67805172 - samples/sec: 16.16 - lr: 0.020000\n",
-      "2021-09-08 02:39:14,085 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:14,085 EPOCH 2 done: loss 0.6781 - lr 0.0200000\n",
-      "2021-09-08 02:39:14,682 DEV : loss 0.5466846823692322 - score 0.2143\n",
-      "2021-09-08 02:39:14,683 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:18:37,290 epoch 1 - iter 13/130 - loss 0.41569880 - samples/sec: 22.54 - lr: 0.020000\n",
+      "2021-09-21 22:18:37,864 epoch 1 - iter 26/130 - loss 0.43437983 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 22:18:38,459 epoch 1 - iter 39/130 - loss 0.52868768 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 22:18:39,050 epoch 1 - iter 52/130 - loss 0.60606208 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 22:18:39,644 epoch 1 - iter 65/130 - loss 0.58656284 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 22:18:40,225 epoch 1 - iter 78/130 - loss 0.55830433 - samples/sec: 22.38 - lr: 0.020000\n",
+      "2021-09-21 22:18:40,802 epoch 1 - iter 91/130 - loss 0.53967211 - samples/sec: 22.55 - lr: 0.020000\n",
+      "2021-09-21 22:18:41,385 epoch 1 - iter 104/130 - loss 0.54596751 - samples/sec: 22.32 - lr: 0.020000\n",
+      "2021-09-21 22:18:41,969 epoch 1 - iter 117/130 - loss 0.55340061 - samples/sec: 22.28 - lr: 0.020000\n",
+      "2021-09-21 22:18:42,556 epoch 1 - iter 130/130 - loss 0.53654740 - samples/sec: 22.18 - lr: 0.020000\n",
+      "2021-09-21 22:18:42,557 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:42,557 EPOCH 1 done: loss 0.5365 - lr 0.0200000\n",
+      "2021-09-21 22:18:42,872 DEV : loss 0.5910035967826843 - score 0.0714\n",
+      "2021-09-21 22:18:42,873 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:39:18,935 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:19,770 epoch 3 - iter 13/130 - loss 0.71949561 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 02:39:20,550 epoch 3 - iter 26/130 - loss 0.67300824 - samples/sec: 16.68 - lr: 0.020000\n",
-      "2021-09-08 02:39:21,305 epoch 3 - iter 39/130 - loss 0.62028874 - samples/sec: 17.23 - lr: 0.020000\n",
-      "2021-09-08 02:39:22,047 epoch 3 - iter 52/130 - loss 0.55972152 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 02:39:22,780 epoch 3 - iter 65/130 - loss 0.54217707 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 02:39:23,509 epoch 3 - iter 78/130 - loss 0.53737558 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 02:39:24,242 epoch 3 - iter 91/130 - loss 0.55555078 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 02:39:25,004 epoch 3 - iter 104/130 - loss 0.58728463 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 02:39:25,810 epoch 3 - iter 117/130 - loss 0.59475536 - samples/sec: 16.15 - lr: 0.020000\n",
-      "2021-09-08 02:39:26,561 epoch 3 - iter 130/130 - loss 0.60115450 - samples/sec: 17.34 - lr: 0.020000\n",
-      "2021-09-08 02:39:26,562 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:26,562 EPOCH 3 done: loss 0.6012 - lr 0.0200000\n",
-      "2021-09-08 02:39:27,153 DEV : loss 0.41513678431510925 - score 0.2857\n",
-      "2021-09-08 02:39:27,154 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:18:47,008 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:47,627 epoch 2 - iter 13/130 - loss 0.93302747 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 22:18:48,216 epoch 2 - iter 26/130 - loss 0.92781356 - samples/sec: 22.13 - lr: 0.020000\n",
+      "2021-09-21 22:18:48,802 epoch 2 - iter 39/130 - loss 0.85042164 - samples/sec: 22.18 - lr: 0.020000\n",
+      "2021-09-21 22:18:49,368 epoch 2 - iter 52/130 - loss 0.80565256 - samples/sec: 22.99 - lr: 0.020000\n",
+      "2021-09-21 22:18:49,922 epoch 2 - iter 65/130 - loss 0.77503813 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 22:18:50,478 epoch 2 - iter 78/130 - loss 0.75244694 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 22:18:51,031 epoch 2 - iter 91/130 - loss 0.73852477 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 22:18:51,584 epoch 2 - iter 104/130 - loss 0.72686391 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 22:18:52,133 epoch 2 - iter 117/130 - loss 0.71754802 - samples/sec: 23.67 - lr: 0.020000\n",
+      "2021-09-21 22:18:52,696 epoch 2 - iter 130/130 - loss 0.71021154 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 22:18:52,697 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:18:52,697 EPOCH 2 done: loss 0.7102 - lr 0.0200000\n",
+      "2021-09-21 22:18:53,010 DEV : loss 0.4588887691497803 - score 0.1429\n",
+      "2021-09-21 22:18:53,011 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:39:31,173 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:31,977 epoch 4 - iter 13/130 - loss 0.49976840 - samples/sec: 17.24 - lr: 0.020000\n",
-      "2021-09-08 02:39:32,731 epoch 4 - iter 26/130 - loss 0.48653297 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 02:39:33,507 epoch 4 - iter 39/130 - loss 0.53995916 - samples/sec: 16.78 - lr: 0.020000\n",
-      "2021-09-08 02:39:34,276 epoch 4 - iter 52/130 - loss 0.57562626 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 02:39:35,032 epoch 4 - iter 65/130 - loss 0.59784307 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 02:39:35,799 epoch 4 - iter 78/130 - loss 0.58844357 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 02:39:36,582 epoch 4 - iter 91/130 - loss 0.57170125 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 02:39:37,366 epoch 4 - iter 104/130 - loss 0.56109229 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 02:39:38,104 epoch 4 - iter 117/130 - loss 0.56466880 - samples/sec: 17.64 - lr: 0.020000\n",
-      "2021-09-08 02:39:38,842 epoch 4 - iter 130/130 - loss 0.56343226 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 02:39:38,843 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:38,844 EPOCH 4 done: loss 0.5634 - lr 0.0200000\n",
-      "2021-09-08 02:39:39,428 DEV : loss 0.655899167060852 - score 0.3571\n",
-      "2021-09-08 02:39:39,429 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:19:02,272 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:02,851 epoch 3 - iter 13/130 - loss 0.63959224 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 22:19:03,406 epoch 3 - iter 26/130 - loss 0.63454731 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 22:19:03,960 epoch 3 - iter 39/130 - loss 0.63655888 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 22:19:04,513 epoch 3 - iter 52/130 - loss 0.63752378 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 22:19:05,064 epoch 3 - iter 65/130 - loss 0.63753443 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 22:19:05,653 epoch 3 - iter 78/130 - loss 0.63229710 - samples/sec: 22.09 - lr: 0.020000\n",
+      "2021-09-21 22:19:06,238 epoch 3 - iter 91/130 - loss 0.63653765 - samples/sec: 22.25 - lr: 0.020000\n",
+      "2021-09-21 22:19:06,816 epoch 3 - iter 104/130 - loss 0.63487414 - samples/sec: 22.53 - lr: 0.020000\n",
+      "2021-09-21 22:19:07,396 epoch 3 - iter 117/130 - loss 0.64013985 - samples/sec: 22.41 - lr: 0.020000\n",
+      "2021-09-21 22:19:07,990 epoch 3 - iter 130/130 - loss 0.63807119 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 22:19:07,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:07,991 EPOCH 3 done: loss 0.6381 - lr 0.0200000\n",
+      "2021-09-21 22:19:08,308 DEV : loss 0.4595988690853119 - score 0.1429\n",
+      "2021-09-21 22:19:08,308 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:19:08,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:08,918 epoch 4 - iter 13/130 - loss 0.64273242 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 22:19:09,500 epoch 4 - iter 26/130 - loss 0.63879866 - samples/sec: 22.37 - lr: 0.020000\n",
+      "2021-09-21 22:19:10,080 epoch 4 - iter 39/130 - loss 0.62002552 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 22:19:10,674 epoch 4 - iter 52/130 - loss 0.66469804 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 22:19:11,263 epoch 4 - iter 65/130 - loss 0.66945456 - samples/sec: 22.10 - lr: 0.020000\n",
+      "2021-09-21 22:19:11,843 epoch 4 - iter 78/130 - loss 0.63384337 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 22:19:12,403 epoch 4 - iter 91/130 - loss 0.64045149 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 22:19:12,980 epoch 4 - iter 104/130 - loss 0.63678298 - samples/sec: 22.55 - lr: 0.020000\n",
+      "2021-09-21 22:19:13,566 epoch 4 - iter 117/130 - loss 0.63867196 - samples/sec: 22.19 - lr: 0.020000\n",
+      "2021-09-21 22:19:14,157 epoch 4 - iter 130/130 - loss 0.64446190 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 22:19:14,158 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:14,158 EPOCH 4 done: loss 0.6445 - lr 0.0200000\n",
+      "2021-09-21 22:19:14,474 DEV : loss 0.3800823390483856 - score 0.2857\n",
+      "2021-09-21 22:19:14,475 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:39:43,619 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:44,409 epoch 5 - iter 13/130 - loss 0.40071958 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 02:39:45,158 epoch 5 - iter 26/130 - loss 0.48223127 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 02:39:45,864 epoch 5 - iter 39/130 - loss 0.48932094 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 02:39:46,667 epoch 5 - iter 52/130 - loss 0.45990272 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 02:39:47,408 epoch 5 - iter 65/130 - loss 0.45240591 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 02:39:48,144 epoch 5 - iter 78/130 - loss 0.45848022 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 02:39:48,901 epoch 5 - iter 91/130 - loss 0.47880303 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 02:39:49,608 epoch 5 - iter 104/130 - loss 0.46936724 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 02:39:50,342 epoch 5 - iter 117/130 - loss 0.50306244 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 02:39:51,126 epoch 5 - iter 130/130 - loss 0.49955767 - samples/sec: 16.59 - lr: 0.020000\n",
-      "2021-09-08 02:39:51,128 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:51,128 EPOCH 5 done: loss 0.4996 - lr 0.0200000\n",
-      "2021-09-08 02:39:51,644 DEV : loss 0.6591951251029968 - score 0.1429\n",
-      "2021-09-08 02:39:51,645 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:39:51,647 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:52,413 epoch 6 - iter 13/130 - loss 0.29756028 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 02:39:53,166 epoch 6 - iter 26/130 - loss 0.46254231 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 02:39:53,940 epoch 6 - iter 39/130 - loss 0.51412371 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 02:39:54,689 epoch 6 - iter 52/130 - loss 0.46851725 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 02:39:55,429 epoch 6 - iter 65/130 - loss 0.46910028 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 02:39:56,177 epoch 6 - iter 78/130 - loss 0.47084791 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 02:39:56,905 epoch 6 - iter 91/130 - loss 0.44585829 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 02:39:57,633 epoch 6 - iter 104/130 - loss 0.43605437 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 02:39:58,412 epoch 6 - iter 117/130 - loss 0.43649524 - samples/sec: 16.69 - lr: 0.020000\n",
-      "2021-09-08 02:39:59,155 epoch 6 - iter 130/130 - loss 0.42603917 - samples/sec: 17.53 - lr: 0.020000\n"
+      "2021-09-21 22:19:18,448 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:19,064 epoch 5 - iter 13/130 - loss 0.57764051 - samples/sec: 21.98 - lr: 0.020000\n",
+      "2021-09-21 22:19:19,655 epoch 5 - iter 26/130 - loss 0.52614180 - samples/sec: 22.03 - lr: 0.020000\n",
+      "2021-09-21 22:19:20,241 epoch 5 - iter 39/130 - loss 0.57324086 - samples/sec: 22.23 - lr: 0.020000\n",
+      "2021-09-21 22:19:20,830 epoch 5 - iter 52/130 - loss 0.61668191 - samples/sec: 22.06 - lr: 0.020000\n",
+      "2021-09-21 22:19:21,415 epoch 5 - iter 65/130 - loss 0.60388438 - samples/sec: 22.25 - lr: 0.020000\n",
+      "2021-09-21 22:19:22,003 epoch 5 - iter 78/130 - loss 0.60477586 - samples/sec: 22.15 - lr: 0.020000\n",
+      "2021-09-21 22:19:22,594 epoch 5 - iter 91/130 - loss 0.62065298 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 22:19:23,183 epoch 5 - iter 104/130 - loss 0.60981869 - samples/sec: 22.08 - lr: 0.020000\n",
+      "2021-09-21 22:19:23,767 epoch 5 - iter 117/130 - loss 0.59070019 - samples/sec: 22.29 - lr: 0.020000\n",
+      "2021-09-21 22:19:24,356 epoch 5 - iter 130/130 - loss 0.61080862 - samples/sec: 22.08 - lr: 0.020000\n",
+      "2021-09-21 22:19:24,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:24,358 EPOCH 5 done: loss 0.6108 - lr 0.0200000\n",
+      "2021-09-21 22:19:24,673 DEV : loss 0.5366729497909546 - score 0.1429\n",
+      "2021-09-21 22:19:24,674 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:19:24,676 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:25,284 epoch 6 - iter 13/130 - loss 0.64707642 - samples/sec: 22.10 - lr: 0.020000\n",
+      "2021-09-21 22:19:25,868 epoch 6 - iter 26/130 - loss 0.51991323 - samples/sec: 22.27 - lr: 0.020000\n",
+      "2021-09-21 22:19:26,563 epoch 6 - iter 39/130 - loss 0.58910733 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 22:19:27,143 epoch 6 - iter 52/130 - loss 0.60092379 - samples/sec: 22.45 - lr: 0.020000\n",
+      "2021-09-21 22:19:27,729 epoch 6 - iter 65/130 - loss 0.64669429 - samples/sec: 22.20 - lr: 0.020000\n",
+      "2021-09-21 22:19:28,289 epoch 6 - iter 78/130 - loss 0.64601468 - samples/sec: 23.27 - lr: 0.020000\n",
+      "2021-09-21 22:19:28,844 epoch 6 - iter 91/130 - loss 0.64696473 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 22:19:29,415 epoch 6 - iter 104/130 - loss 0.65378763 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 22:19:29,970 epoch 6 - iter 117/130 - loss 0.65216644 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 22:19:30,520 epoch 6 - iter 130/130 - loss 0.65107951 - samples/sec: 23.68 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:39:59,156 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:39:59,156 EPOCH 6 done: loss 0.4260 - lr 0.0200000\n",
-      "2021-09-08 02:39:59,766 DEV : loss 0.4531661570072174 - score 0.2143\n",
-      "2021-09-08 02:39:59,767 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:39:59,791 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:00,584 epoch 7 - iter 13/130 - loss 0.29801290 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 02:40:01,324 epoch 7 - iter 26/130 - loss 0.18774681 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 02:40:02,074 epoch 7 - iter 39/130 - loss 0.26855933 - samples/sec: 17.37 - lr: 0.020000\n",
-      "2021-09-08 02:40:02,821 epoch 7 - iter 52/130 - loss 0.29544013 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 02:40:03,550 epoch 7 - iter 65/130 - loss 0.33747994 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 02:40:04,292 epoch 7 - iter 78/130 - loss 0.33556216 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 02:40:05,032 epoch 7 - iter 91/130 - loss 0.33611735 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 02:40:05,751 epoch 7 - iter 104/130 - loss 0.33263544 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 02:40:06,456 epoch 7 - iter 117/130 - loss 0.32371581 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 02:40:07,218 epoch 7 - iter 130/130 - loss 0.32387032 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 02:40:07,219 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:07,219 EPOCH 7 done: loss 0.3239 - lr 0.0200000\n",
-      "2021-09-08 02:40:07,788 DEV : loss 0.31394943594932556 - score 0.3571\n",
-      "2021-09-08 02:40:07,789 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:19:30,521 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:30,521 EPOCH 6 done: loss 0.6511 - lr 0.0200000\n",
+      "2021-09-21 22:19:30,835 DEV : loss 0.4833381772041321 - score 0.0714\n",
+      "2021-09-21 22:19:30,835 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:19:30,837 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:31,407 epoch 7 - iter 13/130 - loss 0.64223972 - samples/sec: 23.65 - lr: 0.020000\n",
+      "2021-09-21 22:19:31,960 epoch 7 - iter 26/130 - loss 0.64615876 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 22:19:32,509 epoch 7 - iter 39/130 - loss 0.64488530 - samples/sec: 23.71 - lr: 0.020000\n",
+      "2021-09-21 22:19:33,069 epoch 7 - iter 52/130 - loss 0.63573047 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 22:19:33,630 epoch 7 - iter 65/130 - loss 0.54622186 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 22:19:34,218 epoch 7 - iter 78/130 - loss 0.55276947 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 22:19:34,801 epoch 7 - iter 91/130 - loss 0.53169172 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 22:19:35,376 epoch 7 - iter 104/130 - loss 0.52554996 - samples/sec: 22.63 - lr: 0.020000\n",
+      "2021-09-21 22:19:35,964 epoch 7 - iter 117/130 - loss 0.53754888 - samples/sec: 22.12 - lr: 0.020000\n",
+      "2021-09-21 22:19:36,544 epoch 7 - iter 130/130 - loss 0.53868029 - samples/sec: 22.41 - lr: 0.020000\n",
+      "2021-09-21 22:19:36,546 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:36,546 EPOCH 7 done: loss 0.5387 - lr 0.0200000\n",
+      "2021-09-21 22:19:36,860 DEV : loss 0.4266835153102875 - score 0.3571\n",
+      "2021-09-21 22:19:36,861 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:40:12,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:13,080 epoch 8 - iter 13/130 - loss 0.39813937 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 02:40:13,849 epoch 8 - iter 26/130 - loss 0.45813401 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 02:40:14,585 epoch 8 - iter 39/130 - loss 0.41956720 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 02:40:15,340 epoch 8 - iter 52/130 - loss 0.32442375 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 02:40:16,065 epoch 8 - iter 65/130 - loss 0.31865958 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 02:40:16,809 epoch 8 - iter 78/130 - loss 0.29947569 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 02:40:17,576 epoch 8 - iter 91/130 - loss 0.32583161 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 02:40:18,349 epoch 8 - iter 104/130 - loss 0.31121932 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 02:40:19,094 epoch 8 - iter 117/130 - loss 0.35163880 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 02:40:19,797 epoch 8 - iter 130/130 - loss 0.34214670 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 02:40:19,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:19,799 EPOCH 8 done: loss 0.3421 - lr 0.0200000\n",
-      "2021-09-08 02:40:20,445 DEV : loss 0.49057406187057495 - score 0.2857\n",
-      "2021-09-08 02:40:20,445 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:40:20,447 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:21,191 epoch 9 - iter 13/130 - loss 0.20092866 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 02:40:21,923 epoch 9 - iter 26/130 - loss 0.33066225 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 02:40:22,618 epoch 9 - iter 39/130 - loss 0.29791518 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 02:40:23,343 epoch 9 - iter 52/130 - loss 0.24090417 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 02:40:24,073 epoch 9 - iter 65/130 - loss 0.27570231 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 02:40:24,790 epoch 9 - iter 78/130 - loss 0.28528999 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 02:40:25,495 epoch 9 - iter 91/130 - loss 0.27081918 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 02:40:26,258 epoch 9 - iter 104/130 - loss 0.25345099 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 02:40:26,982 epoch 9 - iter 117/130 - loss 0.25054047 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 02:40:27,695 epoch 9 - iter 130/130 - loss 0.24745283 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 02:40:27,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:27,697 EPOCH 9 done: loss 0.2475 - lr 0.0200000\n",
-      "2021-09-08 02:40:28,331 DEV : loss 0.6551463603973389 - score 0.3571\n",
-      "2021-09-08 02:40:28,332 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:40:28,334 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:29,076 epoch 10 - iter 13/130 - loss 0.21514049 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 02:40:29,846 epoch 10 - iter 26/130 - loss 0.16611261 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 02:40:30,576 epoch 10 - iter 39/130 - loss 0.21645949 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 02:40:31,355 epoch 10 - iter 52/130 - loss 0.27026852 - samples/sec: 16.72 - lr: 0.020000\n",
-      "2021-09-08 02:40:32,071 epoch 10 - iter 65/130 - loss 0.23930643 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 02:40:32,785 epoch 10 - iter 78/130 - loss 0.22451289 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 02:40:33,469 epoch 10 - iter 91/130 - loss 0.20815185 - samples/sec: 19.04 - lr: 0.020000\n",
-      "2021-09-08 02:40:34,206 epoch 10 - iter 104/130 - loss 0.19548526 - samples/sec: 17.64 - lr: 0.020000\n",
-      "2021-09-08 02:40:34,919 epoch 10 - iter 117/130 - loss 0.19364433 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 02:40:35,639 epoch 10 - iter 130/130 - loss 0.18630416 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 02:40:35,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:35,640 EPOCH 10 done: loss 0.1863 - lr 0.0200000\n",
-      "2021-09-08 02:40:36,217 DEV : loss 0.5762989521026611 - score 0.4286\n",
-      "2021-09-08 02:40:36,218 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:19:41,123 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:41,737 epoch 8 - iter 13/130 - loss 0.78204890 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 22:19:42,321 epoch 8 - iter 26/130 - loss 0.61390786 - samples/sec: 22.29 - lr: 0.020000\n",
+      "2021-09-21 22:19:42,896 epoch 8 - iter 39/130 - loss 0.53179303 - samples/sec: 22.64 - lr: 0.020000\n",
+      "2021-09-21 22:19:43,465 epoch 8 - iter 52/130 - loss 0.54720423 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:19:44,039 epoch 8 - iter 65/130 - loss 0.54594251 - samples/sec: 22.64 - lr: 0.020000\n",
+      "2021-09-21 22:19:44,625 epoch 8 - iter 78/130 - loss 0.53404039 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 22:19:45,210 epoch 8 - iter 91/130 - loss 0.53413306 - samples/sec: 22.21 - lr: 0.020000\n",
+      "2021-09-21 22:19:45,782 epoch 8 - iter 104/130 - loss 0.50949747 - samples/sec: 22.78 - lr: 0.020000\n",
+      "2021-09-21 22:19:46,355 epoch 8 - iter 117/130 - loss 0.51774540 - samples/sec: 22.70 - lr: 0.020000\n",
+      "2021-09-21 22:19:46,938 epoch 8 - iter 130/130 - loss 0.51323264 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 22:19:46,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:46,939 EPOCH 8 done: loss 0.5132 - lr 0.0200000\n",
+      "2021-09-21 22:19:47,255 DEV : loss 0.5440257787704468 - score 0.2857\n",
+      "2021-09-21 22:19:47,256 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:19:47,258 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:47,849 epoch 9 - iter 13/130 - loss 0.26416956 - samples/sec: 22.76 - lr: 0.020000\n",
+      "2021-09-21 22:19:48,424 epoch 9 - iter 26/130 - loss 0.26494446 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 22:19:48,997 epoch 9 - iter 39/130 - loss 0.30741143 - samples/sec: 22.70 - lr: 0.020000\n",
+      "2021-09-21 22:19:49,569 epoch 9 - iter 52/130 - loss 0.31836358 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 22:19:50,143 epoch 9 - iter 65/130 - loss 0.34711756 - samples/sec: 22.68 - lr: 0.020000\n",
+      "2021-09-21 22:19:50,715 epoch 9 - iter 78/130 - loss 0.33464675 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 22:19:51,297 epoch 9 - iter 91/130 - loss 0.38253967 - samples/sec: 22.40 - lr: 0.020000\n",
+      "2021-09-21 22:19:51,869 epoch 9 - iter 104/130 - loss 0.38899057 - samples/sec: 22.74 - lr: 0.020000\n",
+      "2021-09-21 22:19:52,436 epoch 9 - iter 117/130 - loss 0.37963062 - samples/sec: 22.94 - lr: 0.020000\n",
+      "2021-09-21 22:19:53,015 epoch 9 - iter 130/130 - loss 0.37568348 - samples/sec: 22.48 - lr: 0.020000\n",
+      "2021-09-21 22:19:53,017 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:53,017 EPOCH 9 done: loss 0.3757 - lr 0.0200000\n",
+      "2021-09-21 22:19:53,330 DEV : loss 0.373149037361145 - score 0.3571\n",
+      "2021-09-21 22:19:53,331 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:40:48,800 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:40:48,801 Testing using best model ...\n",
-      "2021-09-08 02:40:48,802 loading file None/best-model.pt\n",
+      "2021-09-21 22:19:57,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:19:57,882 epoch 10 - iter 13/130 - loss 0.20543711 - samples/sec: 22.66 - lr: 0.020000\n",
+      "2021-09-21 22:19:58,460 epoch 10 - iter 26/130 - loss 0.23181338 - samples/sec: 22.49 - lr: 0.020000\n",
+      "2021-09-21 22:19:59,024 epoch 10 - iter 39/130 - loss 0.26575775 - samples/sec: 23.09 - lr: 0.020000\n",
+      "2021-09-21 22:19:59,584 epoch 10 - iter 52/130 - loss 0.24739762 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 22:20:00,159 epoch 10 - iter 65/130 - loss 0.30250345 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 22:20:00,719 epoch 10 - iter 78/130 - loss 0.29020958 - samples/sec: 23.27 - lr: 0.020000\n",
+      "2021-09-21 22:20:01,294 epoch 10 - iter 91/130 - loss 0.31563894 - samples/sec: 22.64 - lr: 0.020000\n",
+      "2021-09-21 22:20:01,864 epoch 10 - iter 104/130 - loss 0.30977175 - samples/sec: 22.82 - lr: 0.020000\n",
+      "2021-09-21 22:20:02,456 epoch 10 - iter 117/130 - loss 0.33092029 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 22:20:03,043 epoch 10 - iter 130/130 - loss 0.37378012 - samples/sec: 22.20 - lr: 0.020000\n",
+      "2021-09-21 22:20:03,044 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:03,045 EPOCH 10 done: loss 0.3738 - lr 0.0200000\n",
+      "2021-09-21 22:20:03,362 DEV : loss 0.4409712255001068 - score 0.4286\n",
+      "2021-09-21 22:20:03,363 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:20:11,677 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:11,678 Testing using best model ...\n",
+      "2021-09-21 22:20:11,680 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:40:54,475 \t0.625\n",
-      "2021-09-08 02:40:54,475 \n",
+      "2021-09-21 22:20:15,941 \t0.5625\n",
+      "2021-09-21 22:20:15,941 \n",
       "Results:\n",
-      "- F-score (micro) 0.625\n",
-      "- F-score (macro) 0.4583\n",
-      "- Accuracy 0.625\n",
+      "- F-score (micro) 0.5625\n",
+      "- F-score (macro) 0.3348\n",
+      "- Accuracy 0.5625\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
-      "                                                                                                           undertake a journey or trip     1.0000    0.3333    0.5000         3\n",
-      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         1\n",
-      "                                                                                        a healthy state of wellbeing free from disease     0.5000    1.0000    0.6667         1\n",
-      "                                                                                          an adult female person (as opposed to a man)     0.5000    1.0000    0.6667         1\n",
+      "                                                                                                           undertake a journey or trip     0.5000    1.0000    0.6667         1\n",
+      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         2\n",
+      "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         1\n",
+      "                                                                                          an adult female person (as opposed to a man)     1.0000    1.0000    1.0000         1\n",
       " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
-      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     1.0000    1.0000    1.0000         1\n",
-      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         1\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         1\n",
-      "                                                                            an activity that is diverting and that holds the attention     0.3333    1.0000    0.5000         1\n",
-      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
-      "                                                                                                   the legal dissolution of a marriage     1.0000    1.0000    1.0000         1\n",
-      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     1.0000    1.0000    1.0000         1\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     1.0000    1.0000    1.0000         2\n",
-      "                                                                       an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         1\n",
-      "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         1\n",
+      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         1\n",
+      "                                                                       the social event at which the ceremony of marriage is performed     0.5000    1.0000    0.6667         1\n",
+      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
+      "                                                                            an activity that is diverting and that holds the attention     0.5000    0.5000    0.5000         2\n",
+      "                                                                                           a particular branch of scientific knowledge     1.0000    1.0000    1.0000         1\n",
+      "                                                                                                   the legal dissolution of a marriage     0.7500    1.0000    0.8571         3\n",
+      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         1\n",
+      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
+      "                                                                       an active diversion requiring physical exertion and competition     1.0000    0.5000    0.6667         2\n",
+      "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
       "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                                                                                                                             micro avg     0.6250    0.6250    0.6250        16\n",
-      "                                                                                                                             macro avg     0.4583    0.5208    0.4583        16\n",
-      "                                                                                                                          weighted avg     0.6458    0.6250    0.5833        16\n",
-      "                                                                                                                           samples avg     0.6250    0.6250    0.6250        16\n",
+      "                                                                                                                             micro avg     0.5625    0.5625    0.5625        16\n",
+      "                                                                                                                             macro avg     0.3281    0.3750    0.3348        16\n",
+      "                                                                                                                          weighted avg     0.5156    0.5625    0.5149        16\n",
+      "                                                                                                                           samples avg     0.5625    0.5625    0.5625        16\n",
       "\n"
      ]
     },
@@ -8160,26 +8166,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:40:54,476 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:17,760 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 22:20:15,942 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:28,395 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:41:21,709 Computing label dictionary. Progress:\n"
+      "2021-09-21 22:20:32,376 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 146/146 [00:00<00:00, 74999.19it/s]"
+      "100%|██████████| 146/146 [00:00<00:00, 73031.41it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:41:21,712 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
-      "2021-09-08 02:41:21,721 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:21,724 Model: \"TARSClassifier(\n",
+      "2021-09-21 22:20:32,380 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 22:20:32,388 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:32,390 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8492,25 +8498,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:41:21,724 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:21,725 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
-      "2021-09-08 02:41:21,725 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:21,726 Parameters:\n",
-      "2021-09-08 02:41:21,726  - learning_rate: \"0.02\"\n",
-      "2021-09-08 02:41:21,726  - mini_batch_size: \"1\"\n",
-      "2021-09-08 02:41:21,727  - patience: \"3\"\n",
-      "2021-09-08 02:41:21,727  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 02:41:21,727  - max_epochs: \"10\"\n",
-      "2021-09-08 02:41:21,727  - shuffle: \"True\"\n",
-      "2021-09-08 02:41:21,728  - train_with_dev: \"False\"\n",
-      "2021-09-08 02:41:21,728  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 02:41:21,728 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:21,729 Model training base path: \"None\"\n",
-      "2021-09-08 02:41:21,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:21,729 Device: cuda:0\n",
-      "2021-09-08 02:41:21,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:21,730 Embeddings storage mode: cpu\n",
-      "2021-09-08 02:41:21,736 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 22:20:32,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:32,391 Corpus: \"Corpus: 130 train + 14 dev + 16 test sentences\"\n",
+      "2021-09-21 22:20:32,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:32,392 Parameters:\n",
+      "2021-09-21 22:20:32,392  - learning_rate: \"0.02\"\n",
+      "2021-09-21 22:20:32,392  - mini_batch_size: \"1\"\n",
+      "2021-09-21 22:20:32,392  - patience: \"3\"\n",
+      "2021-09-21 22:20:32,393  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 22:20:32,393  - max_epochs: \"10\"\n",
+      "2021-09-21 22:20:32,393  - shuffle: \"True\"\n",
+      "2021-09-21 22:20:32,394  - train_with_dev: \"False\"\n",
+      "2021-09-21 22:20:32,394  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 22:20:32,394 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:32,394 Model training base path: \"None\"\n",
+      "2021-09-21 22:20:32,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:32,395 Device: cuda:0\n",
+      "2021-09-21 22:20:32,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:32,395 Embeddings storage mode: cpu\n",
+      "2021-09-21 22:20:32,417 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -8524,201 +8530,202 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:41:22,520 epoch 1 - iter 13/130 - loss 0.40393746 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 02:41:23,258 epoch 1 - iter 26/130 - loss 0.37025548 - samples/sec: 17.64 - lr: 0.020000\n",
-      "2021-09-08 02:41:23,975 epoch 1 - iter 39/130 - loss 0.38012828 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:41:24,767 epoch 1 - iter 52/130 - loss 0.42612588 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 02:41:25,523 epoch 1 - iter 65/130 - loss 0.46847262 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 02:41:26,268 epoch 1 - iter 78/130 - loss 0.51119174 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 02:41:27,083 epoch 1 - iter 91/130 - loss 0.53287747 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 02:41:27,814 epoch 1 - iter 104/130 - loss 0.54161548 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 02:41:28,576 epoch 1 - iter 117/130 - loss 0.56118086 - samples/sec: 17.07 - lr: 0.020000\n",
-      "2021-09-08 02:41:29,347 epoch 1 - iter 130/130 - loss 0.53889270 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 02:41:29,348 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:29,349 EPOCH 1 done: loss 0.5389 - lr 0.0200000\n",
-      "2021-09-08 02:41:29,925 DEV : loss 0.8034178614616394 - score 0.0\n",
-      "2021-09-08 02:41:29,925 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:20:33,007 epoch 1 - iter 13/130 - loss 0.54219832 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 22:20:33,587 epoch 1 - iter 26/130 - loss 0.49372473 - samples/sec: 22.43 - lr: 0.020000\n",
+      "2021-09-21 22:20:34,175 epoch 1 - iter 39/130 - loss 0.55074115 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 22:20:34,779 epoch 1 - iter 52/130 - loss 0.54695375 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 22:20:35,376 epoch 1 - iter 65/130 - loss 0.53581724 - samples/sec: 21.79 - lr: 0.020000\n",
+      "2021-09-21 22:20:35,967 epoch 1 - iter 78/130 - loss 0.53766878 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 22:20:36,551 epoch 1 - iter 91/130 - loss 0.55314878 - samples/sec: 22.32 - lr: 0.020000\n",
+      "2021-09-21 22:20:37,123 epoch 1 - iter 104/130 - loss 0.53678307 - samples/sec: 22.73 - lr: 0.020000\n",
+      "2021-09-21 22:20:37,710 epoch 1 - iter 117/130 - loss 0.55025090 - samples/sec: 22.19 - lr: 0.020000\n",
+      "2021-09-21 22:20:38,315 epoch 1 - iter 130/130 - loss 0.54402477 - samples/sec: 21.48 - lr: 0.020000\n",
+      "2021-09-21 22:20:38,317 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:38,317 EPOCH 1 done: loss 0.5440 - lr 0.0200000\n",
+      "2021-09-21 22:20:38,637 DEV : loss 0.6179983615875244 - score 0.0714\n",
+      "2021-09-21 22:20:38,638 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 22:20:42,692 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:43,311 epoch 2 - iter 13/130 - loss 0.86031199 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 22:20:43,898 epoch 2 - iter 26/130 - loss 0.72977163 - samples/sec: 22.19 - lr: 0.020000\n",
+      "2021-09-21 22:20:44,486 epoch 2 - iter 39/130 - loss 0.71128757 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 22:20:45,077 epoch 2 - iter 52/130 - loss 0.70459009 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 22:20:45,666 epoch 2 - iter 65/130 - loss 0.68646669 - samples/sec: 22.09 - lr: 0.020000\n",
+      "2021-09-21 22:20:46,256 epoch 2 - iter 78/130 - loss 0.67976877 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 22:20:46,845 epoch 2 - iter 91/130 - loss 0.66898732 - samples/sec: 22.09 - lr: 0.020000\n",
+      "2021-09-21 22:20:47,426 epoch 2 - iter 104/130 - loss 0.66331540 - samples/sec: 22.38 - lr: 0.020000\n",
+      "2021-09-21 22:20:48,017 epoch 2 - iter 117/130 - loss 0.65500263 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 22:20:48,599 epoch 2 - iter 130/130 - loss 0.62742747 - samples/sec: 22.34 - lr: 0.020000\n",
+      "2021-09-21 22:20:48,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:48,600 EPOCH 2 done: loss 0.6274 - lr 0.0200000\n",
+      "2021-09-21 22:20:48,915 DEV : loss 0.7521701455116272 - score 0.3571\n",
+      "2021-09-21 22:20:48,915 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:41:34,158 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:34,974 epoch 2 - iter 13/130 - loss 1.00329191 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 02:41:35,699 epoch 2 - iter 26/130 - loss 0.80393596 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 02:41:36,470 epoch 2 - iter 39/130 - loss 0.74231694 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 02:41:37,229 epoch 2 - iter 52/130 - loss 0.72745550 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 02:41:37,982 epoch 2 - iter 65/130 - loss 0.70817026 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 02:41:38,772 epoch 2 - iter 78/130 - loss 0.67661849 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 02:41:39,520 epoch 2 - iter 91/130 - loss 0.66619928 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 02:41:40,277 epoch 2 - iter 104/130 - loss 0.67661661 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 02:41:41,027 epoch 2 - iter 117/130 - loss 0.66881341 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 02:41:41,829 epoch 2 - iter 130/130 - loss 0.66954119 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 02:41:41,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:41,831 EPOCH 2 done: loss 0.6695 - lr 0.0200000\n",
-      "2021-09-08 02:41:42,403 DEV : loss 0.42124679684638977 - score 0.2857\n",
-      "2021-09-08 02:41:42,403 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:20:53,060 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:53,667 epoch 3 - iter 13/130 - loss 0.40151886 - samples/sec: 22.31 - lr: 0.020000\n",
+      "2021-09-21 22:20:54,256 epoch 3 - iter 26/130 - loss 0.45574911 - samples/sec: 22.09 - lr: 0.020000\n",
+      "2021-09-21 22:20:54,830 epoch 3 - iter 39/130 - loss 0.41011847 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 22:20:55,416 epoch 3 - iter 52/130 - loss 0.46894188 - samples/sec: 22.21 - lr: 0.020000\n",
+      "2021-09-21 22:20:56,000 epoch 3 - iter 65/130 - loss 0.51467512 - samples/sec: 22.29 - lr: 0.020000\n",
+      "2021-09-21 22:20:56,585 epoch 3 - iter 78/130 - loss 0.53779036 - samples/sec: 22.22 - lr: 0.020000\n",
+      "2021-09-21 22:20:57,169 epoch 3 - iter 91/130 - loss 0.53035909 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 22:20:57,758 epoch 3 - iter 104/130 - loss 0.53983397 - samples/sec: 22.09 - lr: 0.020000\n",
+      "2021-09-21 22:20:58,347 epoch 3 - iter 117/130 - loss 0.54472254 - samples/sec: 22.10 - lr: 0.020000\n",
+      "2021-09-21 22:20:58,920 epoch 3 - iter 130/130 - loss 0.52183205 - samples/sec: 22.71 - lr: 0.020000\n",
+      "2021-09-21 22:20:58,921 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:20:58,921 EPOCH 3 done: loss 0.5218 - lr 0.0200000\n",
+      "2021-09-21 22:20:59,333 DEV : loss 0.5126273036003113 - score 0.4286\n",
+      "2021-09-21 22:20:59,334 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:41:46,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:47,101 epoch 3 - iter 13/130 - loss 0.42759757 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 02:41:47,819 epoch 3 - iter 26/130 - loss 0.49200525 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 02:41:48,602 epoch 3 - iter 39/130 - loss 0.48724530 - samples/sec: 16.62 - lr: 0.020000\n",
-      "2021-09-08 02:41:49,361 epoch 3 - iter 52/130 - loss 0.48786794 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 02:41:50,136 epoch 3 - iter 65/130 - loss 0.47356546 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 02:41:50,925 epoch 3 - iter 78/130 - loss 0.50266226 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 02:41:51,687 epoch 3 - iter 91/130 - loss 0.50625505 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 02:41:52,411 epoch 3 - iter 104/130 - loss 0.47500367 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 02:41:53,206 epoch 3 - iter 117/130 - loss 0.50549409 - samples/sec: 16.37 - lr: 0.020000\n",
-      "2021-09-08 02:41:53,941 epoch 3 - iter 130/130 - loss 0.50590169 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 02:41:53,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:53,942 EPOCH 3 done: loss 0.5059 - lr 0.0200000\n",
-      "2021-09-08 02:41:54,535 DEV : loss 0.2694588899612427 - score 0.4286\n",
-      "2021-09-08 02:41:54,536 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:21:03,540 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:04,145 epoch 4 - iter 13/130 - loss 0.55564277 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 22:21:04,719 epoch 4 - iter 26/130 - loss 0.50805892 - samples/sec: 22.66 - lr: 0.020000\n",
+      "2021-09-21 22:21:05,302 epoch 4 - iter 39/130 - loss 0.52478508 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 22:21:05,877 epoch 4 - iter 52/130 - loss 0.49411775 - samples/sec: 22.66 - lr: 0.020000\n",
+      "2021-09-21 22:21:06,457 epoch 4 - iter 65/130 - loss 0.51916072 - samples/sec: 22.43 - lr: 0.020000\n",
+      "2021-09-21 22:21:07,043 epoch 4 - iter 78/130 - loss 0.50477480 - samples/sec: 22.18 - lr: 0.020000\n",
+      "2021-09-21 22:21:07,619 epoch 4 - iter 91/130 - loss 0.48786381 - samples/sec: 22.63 - lr: 0.020000\n",
+      "2021-09-21 22:21:08,190 epoch 4 - iter 104/130 - loss 0.45881323 - samples/sec: 22.78 - lr: 0.020000\n",
+      "2021-09-21 22:21:08,749 epoch 4 - iter 117/130 - loss 0.44194299 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 22:21:09,318 epoch 4 - iter 130/130 - loss 0.42786020 - samples/sec: 22.85 - lr: 0.020000\n",
+      "2021-09-21 22:21:09,319 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:09,320 EPOCH 4 done: loss 0.4279 - lr 0.0200000\n",
+      "2021-09-21 22:21:09,634 DEV : loss 0.6064906120300293 - score 0.5714\n",
+      "2021-09-21 22:21:09,635 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:41:58,522 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:41:59,264 epoch 4 - iter 13/130 - loss 0.19593777 - samples/sec: 18.68 - lr: 0.020000\n",
-      "2021-09-08 02:41:59,962 epoch 4 - iter 26/130 - loss 0.30945470 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 02:42:00,699 epoch 4 - iter 39/130 - loss 0.34816846 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 02:42:01,449 epoch 4 - iter 52/130 - loss 0.36767165 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 02:42:02,186 epoch 4 - iter 65/130 - loss 0.38441615 - samples/sec: 17.65 - lr: 0.020000\n",
-      "2021-09-08 02:42:02,927 epoch 4 - iter 78/130 - loss 0.41305183 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 02:42:03,726 epoch 4 - iter 91/130 - loss 0.42344389 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 02:42:04,469 epoch 4 - iter 104/130 - loss 0.44870407 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 02:42:05,214 epoch 4 - iter 117/130 - loss 0.48585600 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 02:42:05,967 epoch 4 - iter 130/130 - loss 0.45883727 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 02:42:05,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:05,969 EPOCH 4 done: loss 0.4588 - lr 0.0200000\n",
-      "2021-09-08 02:42:06,593 DEV : loss 0.32973378896713257 - score 0.4286\n",
-      "2021-09-08 02:42:06,594 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:42:06,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:07,370 epoch 5 - iter 13/130 - loss 0.70305406 - samples/sec: 17.32 - lr: 0.020000\n",
-      "2021-09-08 02:42:08,117 epoch 5 - iter 26/130 - loss 0.48784590 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 02:42:08,883 epoch 5 - iter 39/130 - loss 0.45877101 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 02:42:09,618 epoch 5 - iter 52/130 - loss 0.41473633 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 02:42:10,360 epoch 5 - iter 65/130 - loss 0.42468100 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 02:42:11,057 epoch 5 - iter 78/130 - loss 0.39744252 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 02:42:11,758 epoch 5 - iter 91/130 - loss 0.39779001 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 02:42:12,478 epoch 5 - iter 104/130 - loss 0.41057586 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 02:42:13,194 epoch 5 - iter 117/130 - loss 0.38031450 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 02:42:13,900 epoch 5 - iter 130/130 - loss 0.36689761 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 02:42:13,901 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:13,902 EPOCH 5 done: loss 0.3669 - lr 0.0200000\n",
-      "2021-09-08 02:42:14,472 DEV : loss 0.7333126664161682 - score 0.4286\n",
-      "2021-09-08 02:42:14,473 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:42:14,475 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:15,254 epoch 6 - iter 13/130 - loss 0.52665318 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 02:42:15,993 epoch 6 - iter 26/130 - loss 0.39333116 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 02:42:16,725 epoch 6 - iter 39/130 - loss 0.46108541 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 02:42:17,440 epoch 6 - iter 52/130 - loss 0.41253744 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 02:42:18,182 epoch 6 - iter 65/130 - loss 0.37307899 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 02:42:18,897 epoch 6 - iter 78/130 - loss 0.36296344 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 02:42:19,592 epoch 6 - iter 91/130 - loss 0.32747646 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 02:42:20,315 epoch 6 - iter 104/130 - loss 0.30670557 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 02:42:21,017 epoch 6 - iter 117/130 - loss 0.32697876 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 02:42:21,728 epoch 6 - iter 130/130 - loss 0.33939561 - samples/sec: 18.30 - lr: 0.020000\n"
+      "2021-09-21 22:21:13,836 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:14,431 epoch 5 - iter 13/130 - loss 0.20023167 - samples/sec: 22.81 - lr: 0.020000\n",
+      "2021-09-21 22:21:14,997 epoch 5 - iter 26/130 - loss 0.30421413 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 22:21:15,566 epoch 5 - iter 39/130 - loss 0.30127600 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 22:21:16,137 epoch 5 - iter 52/130 - loss 0.34966908 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 22:21:16,700 epoch 5 - iter 65/130 - loss 0.34007542 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 22:21:17,268 epoch 5 - iter 78/130 - loss 0.37062713 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:21:17,836 epoch 5 - iter 91/130 - loss 0.36417152 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 22:21:18,405 epoch 5 - iter 104/130 - loss 0.36657644 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:21:18,975 epoch 5 - iter 117/130 - loss 0.38957909 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 22:21:19,536 epoch 5 - iter 130/130 - loss 0.38324863 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 22:21:19,537 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:19,538 EPOCH 5 done: loss 0.3832 - lr 0.0200000\n",
+      "2021-09-21 22:21:19,852 DEV : loss 0.4522472620010376 - score 0.3571\n",
+      "2021-09-21 22:21:19,853 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:21:19,856 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:20,426 epoch 6 - iter 13/130 - loss 0.00897942 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 22:21:21,000 epoch 6 - iter 26/130 - loss 0.26537033 - samples/sec: 22.66 - lr: 0.020000\n",
+      "2021-09-21 22:21:21,574 epoch 6 - iter 39/130 - loss 0.25787134 - samples/sec: 22.71 - lr: 0.020000\n",
+      "2021-09-21 22:21:22,149 epoch 6 - iter 52/130 - loss 0.27972225 - samples/sec: 22.60 - lr: 0.020000\n",
+      "2021-09-21 22:21:22,727 epoch 6 - iter 65/130 - loss 0.33133661 - samples/sec: 22.54 - lr: 0.020000\n",
+      "2021-09-21 22:21:23,309 epoch 6 - iter 78/130 - loss 0.32829574 - samples/sec: 22.37 - lr: 0.020000\n",
+      "2021-09-21 22:21:23,886 epoch 6 - iter 91/130 - loss 0.34618865 - samples/sec: 22.53 - lr: 0.020000\n",
+      "2021-09-21 22:21:24,449 epoch 6 - iter 104/130 - loss 0.34625852 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 22:21:25,020 epoch 6 - iter 117/130 - loss 0.32231900 - samples/sec: 22.82 - lr: 0.020000\n",
+      "2021-09-21 22:21:25,583 epoch 6 - iter 130/130 - loss 0.31800545 - samples/sec: 23.13 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:42:21,730 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:21,730 EPOCH 6 done: loss 0.3394 - lr 0.0200000\n",
-      "2021-09-08 02:42:22,326 DEV : loss 0.5161055326461792 - score 0.3571\n",
-      "2021-09-08 02:42:22,327 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 02:42:22,329 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:23,129 epoch 7 - iter 13/130 - loss 0.13532466 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 02:42:23,909 epoch 7 - iter 26/130 - loss 0.24661716 - samples/sec: 16.68 - lr: 0.020000\n",
-      "2021-09-08 02:42:24,641 epoch 7 - iter 39/130 - loss 0.23084050 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 02:42:25,335 epoch 7 - iter 52/130 - loss 0.21887267 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 02:42:26,040 epoch 7 - iter 65/130 - loss 0.21943677 - samples/sec: 18.47 - lr: 0.020000\n",
-      "2021-09-08 02:42:26,764 epoch 7 - iter 78/130 - loss 0.26300963 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 02:42:27,539 epoch 7 - iter 91/130 - loss 0.25535173 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 02:42:28,240 epoch 7 - iter 104/130 - loss 0.24663084 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 02:42:28,948 epoch 7 - iter 117/130 - loss 0.22127502 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 02:42:29,656 epoch 7 - iter 130/130 - loss 0.20803994 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 02:42:29,657 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:29,658 EPOCH 7 done: loss 0.2080 - lr 0.0200000\n",
-      "2021-09-08 02:42:30,207 DEV : loss 0.39025506377220154 - score 0.4286\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 02:42:30,209 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 02:42:30,211 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:30,976 epoch 8 - iter 13/130 - loss 0.21121972 - samples/sec: 17.86 - lr: 0.010000\n",
-      "2021-09-08 02:42:31,661 epoch 8 - iter 26/130 - loss 0.10823642 - samples/sec: 19.01 - lr: 0.010000\n",
-      "2021-09-08 02:42:32,420 epoch 8 - iter 39/130 - loss 0.07921974 - samples/sec: 17.12 - lr: 0.010000\n",
-      "2021-09-08 02:42:33,121 epoch 8 - iter 52/130 - loss 0.09256020 - samples/sec: 18.57 - lr: 0.010000\n",
-      "2021-09-08 02:42:33,832 epoch 8 - iter 65/130 - loss 0.07634969 - samples/sec: 18.31 - lr: 0.010000\n",
-      "2021-09-08 02:42:34,574 epoch 8 - iter 78/130 - loss 0.06442885 - samples/sec: 17.53 - lr: 0.010000\n",
-      "2021-09-08 02:42:35,276 epoch 8 - iter 91/130 - loss 0.06808637 - samples/sec: 18.55 - lr: 0.010000\n",
-      "2021-09-08 02:42:35,974 epoch 8 - iter 104/130 - loss 0.06036058 - samples/sec: 18.62 - lr: 0.010000\n",
-      "2021-09-08 02:42:36,670 epoch 8 - iter 117/130 - loss 0.12444027 - samples/sec: 18.71 - lr: 0.010000\n",
-      "2021-09-08 02:42:37,410 epoch 8 - iter 130/130 - loss 0.13081642 - samples/sec: 17.60 - lr: 0.010000\n",
-      "2021-09-08 02:42:37,411 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:37,411 EPOCH 8 done: loss 0.1308 - lr 0.0100000\n",
-      "2021-09-08 02:42:38,038 DEV : loss 0.3995971083641052 - score 0.4286\n",
-      "2021-09-08 02:42:38,039 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 02:42:38,041 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:38,807 epoch 9 - iter 13/130 - loss 0.26098775 - samples/sec: 18.96 - lr: 0.010000\n",
-      "2021-09-08 02:42:39,518 epoch 9 - iter 26/130 - loss 0.24422223 - samples/sec: 18.31 - lr: 0.010000\n",
-      "2021-09-08 02:42:40,230 epoch 9 - iter 39/130 - loss 0.30678704 - samples/sec: 18.28 - lr: 0.010000\n",
-      "2021-09-08 02:42:40,921 epoch 9 - iter 52/130 - loss 0.23349072 - samples/sec: 18.84 - lr: 0.010000\n",
-      "2021-09-08 02:42:41,678 epoch 9 - iter 65/130 - loss 0.20576255 - samples/sec: 17.19 - lr: 0.010000\n",
-      "2021-09-08 02:42:42,390 epoch 9 - iter 78/130 - loss 0.20907221 - samples/sec: 18.26 - lr: 0.010000\n",
-      "2021-09-08 02:42:43,079 epoch 9 - iter 91/130 - loss 0.17972766 - samples/sec: 18.89 - lr: 0.010000\n",
-      "2021-09-08 02:42:43,779 epoch 9 - iter 104/130 - loss 0.15920082 - samples/sec: 18.60 - lr: 0.010000\n",
-      "2021-09-08 02:42:44,501 epoch 9 - iter 117/130 - loss 0.14650054 - samples/sec: 18.03 - lr: 0.010000\n",
-      "2021-09-08 02:42:45,230 epoch 9 - iter 130/130 - loss 0.13438470 - samples/sec: 17.86 - lr: 0.010000\n",
-      "2021-09-08 02:42:45,231 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:45,231 EPOCH 9 done: loss 0.1344 - lr 0.0100000\n",
-      "2021-09-08 02:42:45,785 DEV : loss 0.4085161089897156 - score 0.4286\n",
-      "2021-09-08 02:42:45,786 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 02:42:45,788 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:46,540 epoch 10 - iter 13/130 - loss 0.00109212 - samples/sec: 18.43 - lr: 0.010000\n",
-      "2021-09-08 02:42:47,259 epoch 10 - iter 26/130 - loss 0.09474787 - samples/sec: 18.10 - lr: 0.010000\n",
-      "2021-09-08 02:42:47,978 epoch 10 - iter 39/130 - loss 0.11348122 - samples/sec: 18.10 - lr: 0.010000\n",
-      "2021-09-08 02:42:48,680 epoch 10 - iter 52/130 - loss 0.08640450 - samples/sec: 18.53 - lr: 0.010000\n",
-      "2021-09-08 02:42:49,383 epoch 10 - iter 65/130 - loss 0.11162168 - samples/sec: 18.53 - lr: 0.010000\n",
-      "2021-09-08 02:42:50,115 epoch 10 - iter 78/130 - loss 0.14933961 - samples/sec: 17.76 - lr: 0.010000\n",
-      "2021-09-08 02:42:50,856 epoch 10 - iter 91/130 - loss 0.21287554 - samples/sec: 17.57 - lr: 0.010000\n",
-      "2021-09-08 02:42:51,572 epoch 10 - iter 104/130 - loss 0.18753184 - samples/sec: 18.20 - lr: 0.010000\n",
-      "2021-09-08 02:42:52,292 epoch 10 - iter 117/130 - loss 0.21247062 - samples/sec: 18.08 - lr: 0.010000\n",
-      "2021-09-08 02:42:53,035 epoch 10 - iter 130/130 - loss 0.20191989 - samples/sec: 17.51 - lr: 0.010000\n",
-      "2021-09-08 02:42:53,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:42:53,037 EPOCH 10 done: loss 0.2019 - lr 0.0100000\n",
-      "2021-09-08 02:42:53,568 DEV : loss 0.36004501581192017 - score 0.5\n",
-      "2021-09-08 02:42:53,569 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 22:21:25,584 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:25,584 EPOCH 6 done: loss 0.3180 - lr 0.0200000\n",
+      "2021-09-21 22:21:25,903 DEV : loss 0.6641179919242859 - score 0.2857\n",
+      "2021-09-21 22:21:25,903 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 22:21:25,906 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:26,493 epoch 7 - iter 13/130 - loss 0.30248342 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 22:21:27,060 epoch 7 - iter 26/130 - loss 0.21547032 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 22:21:27,631 epoch 7 - iter 39/130 - loss 0.21844075 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 22:21:28,203 epoch 7 - iter 52/130 - loss 0.21405766 - samples/sec: 22.78 - lr: 0.020000\n",
+      "2021-09-21 22:21:28,774 epoch 7 - iter 65/130 - loss 0.24220211 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 22:21:29,341 epoch 7 - iter 78/130 - loss 0.22497599 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 22:21:29,909 epoch 7 - iter 91/130 - loss 0.21298273 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 22:21:30,477 epoch 7 - iter 104/130 - loss 0.22625414 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 22:21:31,034 epoch 7 - iter 117/130 - loss 0.21727591 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 22:21:31,593 epoch 7 - iter 130/130 - loss 0.21894679 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 22:21:31,594 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:31,594 EPOCH 7 done: loss 0.2189 - lr 0.0200000\n",
+      "2021-09-21 22:21:31,911 DEV : loss 0.5859968662261963 - score 0.2857\n",
+      "2021-09-21 22:21:31,912 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 22:21:31,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:32,498 epoch 8 - iter 13/130 - loss 0.38011765 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 22:21:33,065 epoch 8 - iter 26/130 - loss 0.45832901 - samples/sec: 22.95 - lr: 0.020000\n",
+      "2021-09-21 22:21:33,644 epoch 8 - iter 39/130 - loss 0.41813568 - samples/sec: 22.48 - lr: 0.020000\n",
+      "2021-09-21 22:21:34,209 epoch 8 - iter 52/130 - loss 0.34120583 - samples/sec: 23.04 - lr: 0.020000\n",
+      "2021-09-21 22:21:34,775 epoch 8 - iter 65/130 - loss 0.34717850 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 22:21:35,341 epoch 8 - iter 78/130 - loss 0.31843263 - samples/sec: 22.99 - lr: 0.020000\n",
+      "2021-09-21 22:21:35,910 epoch 8 - iter 91/130 - loss 0.29935242 - samples/sec: 22.87 - lr: 0.020000\n",
+      "2021-09-21 22:21:36,481 epoch 8 - iter 104/130 - loss 0.27501155 - samples/sec: 22.81 - lr: 0.020000\n",
+      "2021-09-21 22:21:37,046 epoch 8 - iter 117/130 - loss 0.26343293 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 22:21:37,611 epoch 8 - iter 130/130 - loss 0.25710676 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 22:21:37,612 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:37,612 EPOCH 8 done: loss 0.2571 - lr 0.0200000\n",
+      "2021-09-21 22:21:37,930 DEV : loss 0.6298171877861023 - score 0.2857\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 22:21:37,930 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 22:21:37,932 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:38,511 epoch 9 - iter 13/130 - loss 0.04141006 - samples/sec: 23.25 - lr: 0.010000\n",
+      "2021-09-21 22:21:39,068 epoch 9 - iter 26/130 - loss 0.12310959 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 22:21:39,629 epoch 9 - iter 39/130 - loss 0.11321051 - samples/sec: 23.23 - lr: 0.010000\n",
+      "2021-09-21 22:21:40,183 epoch 9 - iter 52/130 - loss 0.14842238 - samples/sec: 23.46 - lr: 0.010000\n",
+      "2021-09-21 22:21:40,753 epoch 9 - iter 65/130 - loss 0.17668142 - samples/sec: 22.87 - lr: 0.010000\n",
+      "2021-09-21 22:21:41,312 epoch 9 - iter 78/130 - loss 0.15665419 - samples/sec: 23.26 - lr: 0.010000\n",
+      "2021-09-21 22:21:41,867 epoch 9 - iter 91/130 - loss 0.15752251 - samples/sec: 23.47 - lr: 0.010000\n",
+      "2021-09-21 22:21:42,424 epoch 9 - iter 104/130 - loss 0.15311886 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 22:21:42,982 epoch 9 - iter 117/130 - loss 0.13899374 - samples/sec: 23.33 - lr: 0.010000\n",
+      "2021-09-21 22:21:43,548 epoch 9 - iter 130/130 - loss 0.14822310 - samples/sec: 22.96 - lr: 0.010000\n",
+      "2021-09-21 22:21:43,549 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:43,550 EPOCH 9 done: loss 0.1482 - lr 0.0100000\n",
+      "2021-09-21 22:21:43,864 DEV : loss 0.46857917308807373 - score 0.3571\n",
+      "2021-09-21 22:21:43,865 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 22:21:43,867 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:44,437 epoch 10 - iter 13/130 - loss 0.10683748 - samples/sec: 23.61 - lr: 0.010000\n",
+      "2021-09-21 22:21:44,999 epoch 10 - iter 26/130 - loss 0.17269348 - samples/sec: 23.15 - lr: 0.010000\n",
+      "2021-09-21 22:21:45,560 epoch 10 - iter 39/130 - loss 0.19966112 - samples/sec: 23.23 - lr: 0.010000\n",
+      "2021-09-21 22:21:46,122 epoch 10 - iter 52/130 - loss 0.17465556 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 22:21:46,679 epoch 10 - iter 65/130 - loss 0.15748871 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 22:21:47,230 epoch 10 - iter 78/130 - loss 0.13203522 - samples/sec: 23.59 - lr: 0.010000\n",
+      "2021-09-21 22:21:47,787 epoch 10 - iter 91/130 - loss 0.14928955 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 22:21:48,342 epoch 10 - iter 104/130 - loss 0.14426121 - samples/sec: 23.49 - lr: 0.010000\n",
+      "2021-09-21 22:21:48,905 epoch 10 - iter 117/130 - loss 0.16517354 - samples/sec: 23.09 - lr: 0.010000\n",
+      "2021-09-21 22:21:49,458 epoch 10 - iter 130/130 - loss 0.16473987 - samples/sec: 23.54 - lr: 0.010000\n",
+      "2021-09-21 22:21:49,459 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:49,460 EPOCH 10 done: loss 0.1647 - lr 0.0100000\n",
+      "2021-09-21 22:21:49,774 DEV : loss 0.4565923810005188 - score 0.5714\n",
+      "2021-09-21 22:21:49,775 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 02:43:05,322 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 02:43:05,323 Testing using best model ...\n",
-      "2021-09-08 02:43:05,324 loading file None/best-model.pt\n",
+      "2021-09-21 22:21:57,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 22:21:57,860 Testing using best model ...\n",
+      "2021-09-21 22:21:57,862 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 02:43:10,492 \t0.375\n",
-      "2021-09-08 02:43:10,493 \n",
+      "2021-09-21 22:22:02,143 \t0.5625\n",
+      "2021-09-21 22:22:02,144 \n",
       "Results:\n",
-      "- F-score (micro) 0.375\n",
-      "- F-score (macro) 0.2812\n",
-      "- Accuracy 0.375\n",
+      "- F-score (micro) 0.5625\n",
+      "- F-score (macro) 0.4\n",
+      "- Accuracy 0.5625\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
-      "                                                                                                           undertake a journey or trip     1.0000    1.0000    1.0000         2\n",
+      "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
       "                                                                          the practical application of science to commerce or industry     1.0000    1.0000    1.0000         1\n",
-      "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         0\n",
+      "                                                                                        a healthy state of wellbeing free from disease     0.5000    1.0000    0.6667         1\n",
       "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
-      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         1\n",
-      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         2\n",
-      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         0\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     1.0000    1.0000    1.0000         1\n",
-      "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         1\n",
-      "                                                                                           a particular branch of scientific knowledge     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                   the legal dissolution of a marriage     1.0000    1.0000    1.0000         1\n",
-      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         1\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         2\n",
-      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         1\n",
+      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     1.0000    1.0000    1.0000         2\n",
+      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         1\n",
+      "                                                                       the social event at which the ceremony of marriage is performed     1.0000    0.5000    0.6667         2\n",
+      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.5000    1.0000    0.6667         1\n",
+      "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                                                                                           a particular branch of scientific knowledge     0.5000    0.3333    0.4000         3\n",
+      "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
+      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     1.0000    1.0000    1.0000         1\n",
+      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
+      "                                                                       an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         1\n",
       "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         1\n",
+      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         3\n",
       "\n",
-      "                                                                                                                             micro avg     0.3750    0.3750    0.3750        16\n",
-      "                                                                                                                             macro avg     0.2812    0.2812    0.2812        16\n",
-      "                                                                                                                          weighted avg     0.3750    0.3750    0.3750        16\n",
-      "                                                                                                                           samples avg     0.3750    0.3750    0.3750        16\n",
+      "                                                                                                                             micro avg     0.5625    0.5625    0.5625        16\n",
+      "                                                                                                                             macro avg     0.4062    0.4271    0.4000        16\n",
+      "                                                                                                                          weighted avg     0.5938    0.5625    0.5542        16\n",
+      "                                                                                                                           samples avg     0.5625    0.5625    0.5625        16\n",
       "\n"
      ]
     },
@@ -8726,8 +8733,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 02:43:10,493 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.42897033158813264\n"
+      "2021-09-21 22:22:02,144 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.4118673647469459\n"
      ]
     }
    ],
@@ -8816,11 +8823,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a310936c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.4869109947643979, 0.059336823734729496, 0.4973821989528796, 0.49040139616055844, 0.525305410122164]\n",
+      "0.17678116407041497\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/oneshot/emotion_semeval_oneshot.ipynb b/oneshot/emotion_semeval_oneshot.ipynb
index 7c414a5..b4facee 100644
--- a/oneshot/emotion_semeval_oneshot.ipynb
+++ b/oneshot/emotion_semeval_oneshot.ipynb
@@ -39,7 +39,7 @@
    "source": [
     "# GRAKA auswählen\n",
     "import flair, torch\n",
-    "flair.device = torch.device('cuda:1') "
+    "flair.device = torch.device('cuda:0') "
    ]
   },
   {
@@ -74,25 +74,38 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:47:30,873 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:12:28,291 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:47:37,627 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:12:37,117 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 31476.95it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 35320.45it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:47:37,631 [b'joy', b'love', b'disgust', b'anger', b'guilt', b'shame', b'sadness', b'surprise']\n",
-      "2021-09-08 10:47:37,644 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:37,646 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:12:37,119 [b'joy', b'love', b'disgust', b'fear', b'anger', b'shame', b'sadness', b'surprise']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:12:37,493 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:37,495 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -405,173 +418,159 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:47:37,646 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:37,646 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:47:37,647 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:37,647 Parameters:\n",
-      "2021-09-08 10:47:37,647  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:47:37,648  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:47:37,648  - patience: \"3\"\n",
-      "2021-09-08 10:47:37,648  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:47:37,648  - max_epochs: \"10\"\n",
-      "2021-09-08 10:47:37,649  - shuffle: \"True\"\n",
-      "2021-09-08 10:47:37,649  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:47:37,649  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:47:37,649 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:37,650 Model training base path: \"None\"\n",
-      "2021-09-08 10:47:37,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:37,650 Device: cuda:1\n",
-      "2021-09-08 10:47:37,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:37,651 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:47:37,657 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:37,819 epoch 1 - iter 1/7 - loss 0.47605512 - samples/sec: 6.93 - lr: 0.020000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:47:37,867 epoch 1 - iter 2/7 - loss 0.78136747 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 10:47:37,910 epoch 1 - iter 3/7 - loss 0.73694645 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 10:47:37,956 epoch 1 - iter 4/7 - loss 0.67823986 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 10:47:37,999 epoch 1 - iter 5/7 - loss 0.66390999 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 10:47:38,042 epoch 1 - iter 6/7 - loss 0.65434074 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 10:47:38,086 epoch 1 - iter 7/7 - loss 0.63621402 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 10:47:38,087 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:38,087 EPOCH 1 done: loss 0.6362 - lr 0.0200000\n",
-      "2021-09-08 10:47:38,115 DEV : loss 0.3343164324760437 - score 0.0\n",
-      "2021-09-08 10:47:38,116 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:47:52,238 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:52,301 epoch 2 - iter 1/7 - loss 0.72597313 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 10:47:52,345 epoch 2 - iter 2/7 - loss 0.76389048 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 10:47:52,389 epoch 2 - iter 3/7 - loss 0.74629559 - samples/sec: 23.44 - lr: 0.020000\n",
-      "2021-09-08 10:47:52,432 epoch 2 - iter 4/7 - loss 0.71135966 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 10:47:52,475 epoch 2 - iter 5/7 - loss 0.70163226 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 10:47:52,518 epoch 2 - iter 6/7 - loss 0.68399093 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 10:47:52,566 epoch 2 - iter 7/7 - loss 0.73229130 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:47:52,567 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:52,567 EPOCH 2 done: loss 0.7323 - lr 0.0200000\n",
-      "2021-09-08 10:47:52,952 DEV : loss 0.5655881762504578 - score 0.0\n",
-      "2021-09-08 10:47:52,952 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:47:52,970 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:53,029 epoch 3 - iter 1/7 - loss 0.67420489 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,072 epoch 3 - iter 2/7 - loss 0.70769435 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,115 epoch 3 - iter 3/7 - loss 0.68838004 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,159 epoch 3 - iter 4/7 - loss 0.67917936 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,201 epoch 3 - iter 5/7 - loss 0.66018188 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,247 epoch 3 - iter 6/7 - loss 0.65741848 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,291 epoch 3 - iter 7/7 - loss 0.65230789 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,292 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:53,293 EPOCH 3 done: loss 0.6523 - lr 0.0200000\n",
-      "2021-09-08 10:47:53,324 DEV : loss 0.38331010937690735 - score 0.0\n",
-      "2021-09-08 10:47:53,324 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:47:53,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:53,382 epoch 4 - iter 1/7 - loss 0.63260370 - samples/sec: 23.46 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,426 epoch 4 - iter 2/7 - loss 0.62517428 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,469 epoch 4 - iter 3/7 - loss 0.58831569 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,512 epoch 4 - iter 4/7 - loss 0.60587890 - samples/sec: 23.55 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,555 epoch 4 - iter 5/7 - loss 0.60146329 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,611 epoch 4 - iter 6/7 - loss 0.60969721 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,669 epoch 4 - iter 7/7 - loss 0.62326998 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,670 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:53,671 EPOCH 4 done: loss 0.6233 - lr 0.0200000\n",
-      "2021-09-08 10:47:53,697 DEV : loss 0.5225672721862793 - score 0.0\n",
-      "2021-09-08 10:47:53,698 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:47:53,700 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:53,757 epoch 5 - iter 1/7 - loss 0.67456627 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,804 epoch 5 - iter 2/7 - loss 0.70342717 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,849 epoch 5 - iter 3/7 - loss 0.63381734 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,893 epoch 5 - iter 4/7 - loss 0.63654322 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,936 epoch 5 - iter 5/7 - loss 0.62334290 - samples/sec: 23.45 - lr: 0.020000\n",
-      "2021-09-08 10:47:53,980 epoch 5 - iter 6/7 - loss 0.60481663 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 10:47:54,024 epoch 5 - iter 7/7 - loss 0.56392309 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 10:47:54,025 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:47:54,026 EPOCH 5 done: loss 0.5639 - lr 0.0200000\n",
-      "2021-09-08 10:47:54,053 DEV : loss 0.2524285912513733 - score 0.0\n",
-      "2021-09-08 10:47:54,053 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:12:37,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:37,496 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:12:37,496 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:37,497 Parameters:\n",
+      "2021-09-21 19:12:37,497  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:12:37,497  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:12:37,498  - patience: \"3\"\n",
+      "2021-09-21 19:12:37,499  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:12:37,499  - max_epochs: \"10\"\n",
+      "2021-09-21 19:12:37,499  - shuffle: \"True\"\n",
+      "2021-09-21 19:12:37,499  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:12:37,500  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:12:37,500 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:37,500 Model training base path: \"None\"\n",
+      "2021-09-21 19:12:37,501 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:37,501 Device: cuda:0\n",
+      "2021-09-21 19:12:37,501 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:37,502 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:12:41,835 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:42,050 epoch 1 - iter 1/7 - loss 0.38174835 - samples/sec: 6.21 - lr: 0.020000\n",
+      "2021-09-21 19:12:42,114 epoch 1 - iter 2/7 - loss 0.55891116 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 19:12:42,169 epoch 1 - iter 3/7 - loss 0.65586939 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 19:12:42,230 epoch 1 - iter 4/7 - loss 0.64218179 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 19:12:42,290 epoch 1 - iter 5/7 - loss 0.54005338 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 19:12:42,351 epoch 1 - iter 6/7 - loss 0.66529329 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 19:12:42,402 epoch 1 - iter 7/7 - loss 0.62773081 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 19:12:42,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:42,404 EPOCH 1 done: loss 0.6277 - lr 0.0200000\n",
+      "2021-09-21 19:12:42,482 DEV : loss 0.3883691430091858 - score 0.0\n",
+      "2021-09-21 19:12:42,483 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:48:01,833 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:01,893 epoch 6 - iter 1/7 - loss 0.67429399 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 10:48:01,937 epoch 6 - iter 2/7 - loss 0.65677950 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 10:48:01,981 epoch 6 - iter 3/7 - loss 0.61754183 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 10:48:02,027 epoch 6 - iter 4/7 - loss 0.62358174 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 10:48:02,073 epoch 6 - iter 5/7 - loss 0.62817652 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 10:48:02,116 epoch 6 - iter 6/7 - loss 0.64499995 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 10:48:02,164 epoch 6 - iter 7/7 - loss 0.70082538 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:48:02,165 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:02,165 EPOCH 6 done: loss 0.7008 - lr 0.0200000\n",
-      "2021-09-08 10:48:02,232 DEV : loss 0.22316567599773407 - score 0.0\n",
-      "2021-09-08 10:48:02,233 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:48:09,196 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:09,273 epoch 7 - iter 1/7 - loss 0.66592866 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,320 epoch 7 - iter 2/7 - loss 0.63278997 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,367 epoch 7 - iter 3/7 - loss 0.63754775 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,417 epoch 7 - iter 4/7 - loss 0.63693506 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,464 epoch 7 - iter 5/7 - loss 0.63708953 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,509 epoch 7 - iter 6/7 - loss 0.63521194 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,556 epoch 7 - iter 7/7 - loss 0.63328874 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:09,558 EPOCH 7 done: loss 0.6333 - lr 0.0200000\n",
-      "2021-09-08 10:48:09,588 DEV : loss 0.4160038232803345 - score 0.0\n",
-      "2021-09-08 10:48:09,589 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:48:09,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:09,654 epoch 8 - iter 1/7 - loss 0.75938588 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,701 epoch 8 - iter 2/7 - loss 0.73827675 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,748 epoch 8 - iter 3/7 - loss 0.74361316 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,795 epoch 8 - iter 4/7 - loss 0.69360912 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,841 epoch 8 - iter 5/7 - loss 0.68344537 - samples/sec: 21.84 - lr: 0.020000\n"
+      "2021-09-21 19:12:46,796 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:46,883 epoch 2 - iter 1/7 - loss 0.41668496 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 19:12:46,938 epoch 2 - iter 2/7 - loss 0.50939049 - samples/sec: 18.24 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,004 epoch 2 - iter 3/7 - loss 0.43663570 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,061 epoch 2 - iter 4/7 - loss 0.48323219 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,117 epoch 2 - iter 5/7 - loss 0.50640503 - samples/sec: 18.06 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,185 epoch 2 - iter 6/7 - loss 0.44025306 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,248 epoch 2 - iter 7/7 - loss 0.48250557 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:47,249 EPOCH 2 done: loss 0.4825 - lr 0.0200000\n",
+      "2021-09-21 19:12:47,294 DEV : loss 0.4079870283603668 - score 0.0\n",
+      "2021-09-21 19:12:47,296 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:12:47,298 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:47,379 epoch 3 - iter 1/7 - loss 0.63768715 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,444 epoch 3 - iter 2/7 - loss 0.37554054 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,497 epoch 3 - iter 3/7 - loss 0.45588399 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,564 epoch 3 - iter 4/7 - loss 0.46012029 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,629 epoch 3 - iter 5/7 - loss 0.44528106 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,689 epoch 3 - iter 6/7 - loss 0.39493562 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,761 epoch 3 - iter 7/7 - loss 0.45906120 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:47,763 EPOCH 3 done: loss 0.4591 - lr 0.0200000\n",
+      "2021-09-21 19:12:47,811 DEV : loss 0.5997085571289062 - score 0.0\n",
+      "2021-09-21 19:12:47,815 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:12:47,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:47,914 epoch 4 - iter 1/7 - loss 0.05988601 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 19:12:47,975 epoch 4 - iter 2/7 - loss 0.04582797 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,037 epoch 4 - iter 3/7 - loss 0.08450227 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,095 epoch 4 - iter 4/7 - loss 0.25497059 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,152 epoch 4 - iter 5/7 - loss 0.29344268 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,220 epoch 4 - iter 6/7 - loss 0.34742191 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,302 epoch 4 - iter 7/7 - loss 0.37829452 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,303 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:48,303 EPOCH 4 done: loss 0.3783 - lr 0.0200000\n",
+      "2021-09-21 19:12:48,338 DEV : loss 0.5142182111740112 - score 0.0\n",
+      "2021-09-21 19:12:48,340 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:12:48,342 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:48,446 epoch 5 - iter 1/7 - loss 0.14320962 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,512 epoch 5 - iter 2/7 - loss 0.19079941 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,575 epoch 5 - iter 3/7 - loss 0.15853430 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,642 epoch 5 - iter 4/7 - loss 0.18606907 - samples/sec: 14.94 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,710 epoch 5 - iter 5/7 - loss 0.27558390 - samples/sec: 14.93 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,767 epoch 5 - iter 6/7 - loss 0.28539386 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,825 epoch 5 - iter 7/7 - loss 0.34058823 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 19:12:48,826 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:48,826 EPOCH 5 done: loss 0.3406 - lr 0.0200000\n",
+      "2021-09-21 19:12:48,874 DEV : loss 0.4342966675758362 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:12:48,876 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:12:48,878 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:48,956 epoch 6 - iter 1/7 - loss 0.10663535 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,009 epoch 6 - iter 2/7 - loss 0.24361075 - samples/sec: 18.69 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,064 epoch 6 - iter 3/7 - loss 0.30320919 - samples/sec: 18.63 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,118 epoch 6 - iter 4/7 - loss 0.28093841 - samples/sec: 18.58 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,171 epoch 6 - iter 5/7 - loss 0.24204202 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,228 epoch 6 - iter 6/7 - loss 0.26048495 - samples/sec: 17.78 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,282 epoch 6 - iter 7/7 - loss 0.29756547 - samples/sec: 18.57 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:49,283 EPOCH 6 done: loss 0.2976 - lr 0.0100000\n",
+      "2021-09-21 19:12:49,320 DEV : loss 0.44166994094848633 - score 0.0\n",
+      "2021-09-21 19:12:49,320 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:12:49,322 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:49,409 epoch 7 - iter 1/7 - loss 0.20485555 - samples/sec: 18.06 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:09,888 epoch 8 - iter 6/7 - loss 0.66102083 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,934 epoch 8 - iter 7/7 - loss 0.63545732 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 10:48:09,935 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:09,936 EPOCH 8 done: loss 0.6355 - lr 0.0200000\n",
-      "2021-09-08 10:48:09,965 DEV : loss 0.36557936668395996 - score 0.0\n",
-      "2021-09-08 10:48:09,966 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:48:09,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:10,028 epoch 9 - iter 1/7 - loss 0.66011959 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,075 epoch 9 - iter 2/7 - loss 0.66560513 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,121 epoch 9 - iter 3/7 - loss 0.63918072 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,166 epoch 9 - iter 4/7 - loss 0.62778908 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,211 epoch 9 - iter 5/7 - loss 0.62224764 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,258 epoch 9 - iter 6/7 - loss 0.59851673 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,305 epoch 9 - iter 7/7 - loss 0.60362130 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,307 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:10,307 EPOCH 9 done: loss 0.6036 - lr 0.0200000\n",
-      "2021-09-08 10:48:10,335 DEV : loss 0.4287395179271698 - score 0.0\n",
-      "2021-09-08 10:48:10,336 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:48:10,338 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:10,402 epoch 10 - iter 1/7 - loss 0.88053459 - samples/sec: 20.26 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,447 epoch 10 - iter 2/7 - loss 0.75916892 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,494 epoch 10 - iter 3/7 - loss 0.72089704 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,541 epoch 10 - iter 4/7 - loss 0.76560922 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,587 epoch 10 - iter 5/7 - loss 0.74030411 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,634 epoch 10 - iter 6/7 - loss 0.72857574 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,685 epoch 10 - iter 7/7 - loss 0.73311876 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 10:48:10,686 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:10,686 EPOCH 10 done: loss 0.7331 - lr 0.0200000\n",
-      "2021-09-08 10:48:10,715 DEV : loss 0.3829210698604584 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:48:10,715 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:48:17,939 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:17,940 Testing using best model ...\n",
-      "2021-09-08 10:48:17,941 loading file None/best-model.pt\n",
+      "2021-09-21 19:12:49,466 epoch 7 - iter 2/7 - loss 0.38756140 - samples/sec: 17.77 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,531 epoch 7 - iter 3/7 - loss 0.31426273 - samples/sec: 15.38 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,585 epoch 7 - iter 4/7 - loss 0.30745374 - samples/sec: 18.65 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,635 epoch 7 - iter 5/7 - loss 0.24912739 - samples/sec: 20.01 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,692 epoch 7 - iter 6/7 - loss 0.24742705 - samples/sec: 17.69 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,747 epoch 7 - iter 7/7 - loss 0.22270898 - samples/sec: 18.34 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,748 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:49,749 EPOCH 7 done: loss 0.2227 - lr 0.0100000\n",
+      "2021-09-21 19:12:49,798 DEV : loss 0.4503578245639801 - score 0.0\n",
+      "2021-09-21 19:12:49,801 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:12:49,803 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:49,888 epoch 8 - iter 1/7 - loss 0.02075867 - samples/sec: 18.14 - lr: 0.010000\n",
+      "2021-09-21 19:12:49,945 epoch 8 - iter 2/7 - loss 0.08197311 - samples/sec: 17.75 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,016 epoch 8 - iter 3/7 - loss 0.10697054 - samples/sec: 14.15 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,081 epoch 8 - iter 4/7 - loss 0.15659729 - samples/sec: 15.66 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,153 epoch 8 - iter 5/7 - loss 0.14800450 - samples/sec: 13.86 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,213 epoch 8 - iter 6/7 - loss 0.13164821 - samples/sec: 16.87 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,283 epoch 8 - iter 7/7 - loss 0.13460414 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,285 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:50,285 EPOCH 8 done: loss 0.1346 - lr 0.0100000\n",
+      "2021-09-21 19:12:50,331 DEV : loss 0.49705004692077637 - score 0.0\n",
+      "2021-09-21 19:12:50,334 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:12:50,335 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:50,439 epoch 9 - iter 1/7 - loss 0.09693340 - samples/sec: 19.10 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,487 epoch 9 - iter 2/7 - loss 0.05839359 - samples/sec: 20.75 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,545 epoch 9 - iter 3/7 - loss 0.16120398 - samples/sec: 17.47 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,601 epoch 9 - iter 4/7 - loss 0.15837175 - samples/sec: 18.17 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,654 epoch 9 - iter 5/7 - loss 0.15076260 - samples/sec: 18.73 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,709 epoch 9 - iter 6/7 - loss 0.28708128 - samples/sec: 18.37 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,792 epoch 9 - iter 7/7 - loss 0.25802480 - samples/sec: 12.23 - lr: 0.010000\n",
+      "2021-09-21 19:12:50,793 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:50,793 EPOCH 9 done: loss 0.2580 - lr 0.0100000\n",
+      "2021-09-21 19:12:50,831 DEV : loss 0.4094218611717224 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:12:50,832 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:12:50,834 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:50,932 epoch 10 - iter 1/7 - loss 0.03767024 - samples/sec: 19.08 - lr: 0.005000\n",
+      "2021-09-21 19:12:50,985 epoch 10 - iter 2/7 - loss 0.08836204 - samples/sec: 19.24 - lr: 0.005000\n",
+      "2021-09-21 19:12:51,050 epoch 10 - iter 3/7 - loss 0.06434838 - samples/sec: 15.46 - lr: 0.005000\n",
+      "2021-09-21 19:12:51,112 epoch 10 - iter 4/7 - loss 0.08552247 - samples/sec: 16.39 - lr: 0.005000\n",
+      "2021-09-21 19:12:51,180 epoch 10 - iter 5/7 - loss 0.14201980 - samples/sec: 14.72 - lr: 0.005000\n",
+      "2021-09-21 19:12:51,244 epoch 10 - iter 6/7 - loss 0.12550853 - samples/sec: 15.93 - lr: 0.005000\n",
+      "2021-09-21 19:12:51,309 epoch 10 - iter 7/7 - loss 0.14617900 - samples/sec: 15.49 - lr: 0.005000\n",
+      "2021-09-21 19:12:51,311 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:51,311 EPOCH 10 done: loss 0.1462 - lr 0.0050000\n",
+      "2021-09-21 19:12:51,355 DEV : loss 0.4492053985595703 - score 0.0\n",
+      "2021-09-21 19:12:51,357 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:12:55,607 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:12:55,607 Testing using best model ...\n",
+      "2021-09-21 19:12:55,609 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:48:22,672 \t0.0\n",
-      "2021-09-08 10:48:22,673 \n",
+      "2021-09-21 19:13:00,032 \t0.0\n",
+      "2021-09-21 19:13:00,033 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -583,8 +582,8 @@
       "         joy     0.0000    0.0000    0.0000         0\n",
       "        love     0.0000    0.0000    0.0000         0\n",
       "     disgust     0.0000    0.0000    0.0000         0\n",
+      "        fear     0.0000    0.0000    0.0000         0\n",
       "       anger     0.0000    0.0000    0.0000         0\n",
-      "       guilt     0.0000    0.0000    0.0000         0\n",
       "       shame     0.0000    0.0000    0.0000         0\n",
       "     sadness     0.0000    0.0000    0.0000         0\n",
       "    surprise     0.0000    0.0000    0.0000         1\n",
@@ -594,26 +593,39 @@
       "weighted avg     0.0000    0.0000    0.0000         1\n",
       " samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 10:48:22,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:02,304 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:13:00,033 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:13,059 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:49:06,513 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:14:17,151 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 20802.50it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 25536.10it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:06,516 [b'joy', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness', b'surprise']\n",
-      "2021-09-08 10:49:06,525 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,527 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:14:17,154 [b'surprise', b'love', b'disgust', b'fear', b'anger', b'guilt', b'sadness', b'shame']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:14:21,927 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:21,929 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -926,174 +938,160 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:06,528 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,528 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:49:06,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,529 Parameters:\n",
-      "2021-09-08 10:49:06,529  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:49:06,530  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:49:06,530  - patience: \"3\"\n",
-      "2021-09-08 10:49:06,530  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:49:06,530  - max_epochs: \"10\"\n",
-      "2021-09-08 10:49:06,531  - shuffle: \"True\"\n",
-      "2021-09-08 10:49:06,531  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:49:06,531  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:49:06,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,532 Model training base path: \"None\"\n",
-      "2021-09-08 10:49:06,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,532 Device: cuda:1\n",
-      "2021-09-08 10:49:06,533 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,533 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:49:06,539 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,604 epoch 1 - iter 1/7 - loss 0.62716800 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 10:49:06,661 epoch 1 - iter 2/7 - loss 0.88973925 - samples/sec: 17.81 - lr: 0.020000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "2021-09-21 19:14:21,930 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:21,930 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:14:21,930 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:21,931 Parameters:\n",
+      "2021-09-21 19:14:21,931  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:14:21,931  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:14:21,931  - patience: \"3\"\n",
+      "2021-09-21 19:14:21,932  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:14:21,932  - max_epochs: \"10\"\n",
+      "2021-09-21 19:14:21,932  - shuffle: \"True\"\n",
+      "2021-09-21 19:14:21,933  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:14:21,933  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:14:21,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:21,933 Model training base path: \"None\"\n",
+      "2021-09-21 19:14:21,934 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:21,934 Device: cuda:0\n",
+      "2021-09-21 19:14:21,934 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:21,935 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:14:22,033 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:22,188 epoch 1 - iter 1/7 - loss 0.82319349 - samples/sec: 12.49 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,264 epoch 1 - iter 2/7 - loss 0.71560264 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,344 epoch 1 - iter 3/7 - loss 0.73426515 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,426 epoch 1 - iter 4/7 - loss 0.68758214 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,511 epoch 1 - iter 5/7 - loss 0.70583421 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,604 epoch 1 - iter 6/7 - loss 0.76028292 - samples/sec: 10.89 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,680 epoch 1 - iter 7/7 - loss 0.74343547 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,682 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:22,682 EPOCH 1 done: loss 0.7434 - lr 0.0200000\n",
+      "2021-09-21 19:14:22,821 DEV : loss 0.1911117136478424 - score 0.0\n",
+      "2021-09-21 19:14:22,821 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:14:42,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:42,111 epoch 2 - iter 1/7 - loss 0.75900984 - samples/sec: 20.92 - lr: 0.020000\n",
+      "2021-09-21 19:14:42,161 epoch 2 - iter 2/7 - loss 0.78102389 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 19:14:42,208 epoch 2 - iter 3/7 - loss 0.70792143 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 19:14:42,257 epoch 2 - iter 4/7 - loss 0.68361177 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 19:14:42,304 epoch 2 - iter 5/7 - loss 0.64196820 - samples/sec: 21.62 - lr: 0.020000\n",
+      "2021-09-21 19:14:42,351 epoch 2 - iter 6/7 - loss 0.64027230 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 19:14:42,395 epoch 2 - iter 7/7 - loss 0.64025502 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 19:14:42,396 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:42,396 EPOCH 2 done: loss 0.6403 - lr 0.0200000\n",
+      "2021-09-21 19:14:45,412 DEV : loss 0.2480490803718567 - score 0.0\n",
+      "2021-09-21 19:14:45,415 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:14:45,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:45,626 epoch 3 - iter 1/7 - loss 0.66579312 - samples/sec: 11.21 - lr: 0.020000\n",
+      "2021-09-21 19:14:45,716 epoch 3 - iter 2/7 - loss 0.51864170 - samples/sec: 11.20 - lr: 0.020000\n",
+      "2021-09-21 19:14:45,806 epoch 3 - iter 3/7 - loss 0.59081516 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 19:14:45,898 epoch 3 - iter 4/7 - loss 0.51129763 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 19:14:45,999 epoch 3 - iter 5/7 - loss 0.52441435 - samples/sec: 9.98 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,088 epoch 3 - iter 6/7 - loss 0.53622205 - samples/sec: 11.28 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,196 epoch 3 - iter 7/7 - loss 0.58482537 - samples/sec: 9.36 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,197 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:46,198 EPOCH 3 done: loss 0.5848 - lr 0.0200000\n",
+      "2021-09-21 19:14:46,292 DEV : loss 0.30744853615760803 - score 0.0\n",
+      "2021-09-21 19:14:46,294 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:14:46,296 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:46,445 epoch 4 - iter 1/7 - loss 0.27779067 - samples/sec: 11.36 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,548 epoch 4 - iter 2/7 - loss 0.44847533 - samples/sec: 9.80 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,629 epoch 4 - iter 3/7 - loss 0.46954834 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,724 epoch 4 - iter 4/7 - loss 0.51658718 - samples/sec: 10.66 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,820 epoch 4 - iter 5/7 - loss 0.53065703 - samples/sec: 10.47 - lr: 0.020000\n",
+      "2021-09-21 19:14:46,915 epoch 4 - iter 6/7 - loss 0.56159734 - samples/sec: 10.52 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,003 epoch 4 - iter 7/7 - loss 0.50858064 - samples/sec: 11.49 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,004 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:47,005 EPOCH 4 done: loss 0.5086 - lr 0.0200000\n",
+      "2021-09-21 19:14:47,050 DEV : loss 0.38563138246536255 - score 0.0\n",
+      "2021-09-21 19:14:47,055 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:14:47,057 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:47,244 epoch 5 - iter 1/7 - loss 0.11129540 - samples/sec: 10.31 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,329 epoch 5 - iter 2/7 - loss 0.25416724 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,426 epoch 5 - iter 3/7 - loss 0.30751351 - samples/sec: 10.29 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,511 epoch 5 - iter 4/7 - loss 0.38883968 - samples/sec: 11.86 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,603 epoch 5 - iter 5/7 - loss 0.38633789 - samples/sec: 11.05 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,693 epoch 5 - iter 6/7 - loss 0.42727697 - samples/sec: 11.15 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,796 epoch 5 - iter 7/7 - loss 0.45273015 - samples/sec: 9.73 - lr: 0.020000\n",
+      "2021-09-21 19:14:47,798 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:47,798 EPOCH 5 done: loss 0.4527 - lr 0.0200000\n",
+      "2021-09-21 19:14:47,887 DEV : loss 0.5122790336608887 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:14:47,891 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:14:47,895 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:48,060 epoch 6 - iter 1/7 - loss 0.08971126 - samples/sec: 11.27 - lr: 0.010000\n",
+      "2021-09-21 19:14:48,161 epoch 6 - iter 2/7 - loss 0.10020608 - samples/sec: 9.93 - lr: 0.010000\n",
+      "2021-09-21 19:14:48,245 epoch 6 - iter 3/7 - loss 0.26083910 - samples/sec: 12.00 - lr: 0.010000\n",
+      "2021-09-21 19:14:48,332 epoch 6 - iter 4/7 - loss 0.33125572 - samples/sec: 11.60 - lr: 0.010000\n",
+      "2021-09-21 19:14:48,422 epoch 6 - iter 5/7 - loss 0.37350935 - samples/sec: 11.24 - lr: 0.010000\n",
+      "2021-09-21 19:14:48,527 epoch 6 - iter 6/7 - loss 0.35157787 - samples/sec: 9.60 - lr: 0.010000\n",
+      "2021-09-21 19:14:48,630 epoch 6 - iter 7/7 - loss 0.38071095 - samples/sec: 9.73 - lr: 0.010000\n",
+      "2021-09-21 19:14:48,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:48,632 EPOCH 6 done: loss 0.3807 - lr 0.0100000\n",
+      "2021-09-21 19:14:48,709 DEV : loss 0.4389093518257141 - score 0.0\n",
+      "2021-09-21 19:14:48,715 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:14:48,717 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:48,889 epoch 7 - iter 1/7 - loss 0.04011158 - samples/sec: 11.28 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:06,719 epoch 1 - iter 3/7 - loss 0.82319870 - samples/sec: 17.44 - lr: 0.020000\n",
-      "2021-09-08 10:49:06,776 epoch 1 - iter 4/7 - loss 0.66263921 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 10:49:06,835 epoch 1 - iter 5/7 - loss 0.67605600 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 10:49:06,893 epoch 1 - iter 6/7 - loss 0.74702467 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 10:49:06,962 epoch 1 - iter 7/7 - loss 0.71915529 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 10:49:06,963 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:06,963 EPOCH 1 done: loss 0.7192 - lr 0.0200000\n",
-      "2021-09-08 10:49:06,995 DEV : loss 0.5355588793754578 - score 0.0\n",
-      "2021-09-08 10:49:06,996 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:49:14,165 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:14,240 epoch 2 - iter 1/7 - loss 0.65435088 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 10:49:14,296 epoch 2 - iter 2/7 - loss 0.55780615 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 10:49:14,352 epoch 2 - iter 3/7 - loss 0.59126146 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 10:49:14,407 epoch 2 - iter 4/7 - loss 0.49592783 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 10:49:14,464 epoch 2 - iter 5/7 - loss 0.51510971 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 10:49:14,519 epoch 2 - iter 6/7 - loss 0.46214596 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 10:49:14,575 epoch 2 - iter 7/7 - loss 0.57710840 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 10:49:14,576 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:14,576 EPOCH 2 done: loss 0.5771 - lr 0.0200000\n",
-      "2021-09-08 10:49:20,801 DEV : loss 0.4437183141708374 - score 0.0\n",
-      "2021-09-08 10:49:20,802 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:49:43,054 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,130 epoch 3 - iter 1/7 - loss 0.24023892 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,192 epoch 3 - iter 2/7 - loss 0.40890796 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,250 epoch 3 - iter 3/7 - loss 0.46443874 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,309 epoch 3 - iter 4/7 - loss 0.54514419 - samples/sec: 17.15 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,368 epoch 3 - iter 5/7 - loss 0.47977971 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,426 epoch 3 - iter 6/7 - loss 0.43399988 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,482 epoch 3 - iter 7/7 - loss 0.45268260 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,483 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,483 EPOCH 3 done: loss 0.4527 - lr 0.0200000\n",
-      "2021-09-08 10:49:43,520 DEV : loss 0.36386948823928833 - score 0.0\n",
-      "2021-09-08 10:49:43,521 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:14:48,987 epoch 7 - iter 2/7 - loss 0.20030876 - samples/sec: 10.24 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,068 epoch 7 - iter 3/7 - loss 0.29585282 - samples/sec: 12.55 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,152 epoch 7 - iter 4/7 - loss 0.34746035 - samples/sec: 11.89 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,244 epoch 7 - iter 5/7 - loss 0.41548306 - samples/sec: 11.05 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,333 epoch 7 - iter 6/7 - loss 0.40581610 - samples/sec: 11.26 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,421 epoch 7 - iter 7/7 - loss 0.36123051 - samples/sec: 11.44 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,422 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:49,423 EPOCH 7 done: loss 0.3612 - lr 0.0100000\n",
+      "2021-09-21 19:14:49,499 DEV : loss 0.2614549994468689 - score 0.0\n",
+      "2021-09-21 19:14:49,502 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:14:49,504 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:49,678 epoch 8 - iter 1/7 - loss 0.05307830 - samples/sec: 12.47 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,776 epoch 8 - iter 2/7 - loss 0.19546227 - samples/sec: 10.22 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,875 epoch 8 - iter 3/7 - loss 0.26768199 - samples/sec: 10.15 - lr: 0.010000\n",
+      "2021-09-21 19:14:49,965 epoch 8 - iter 4/7 - loss 0.31533370 - samples/sec: 11.20 - lr: 0.010000\n",
+      "2021-09-21 19:14:50,068 epoch 8 - iter 5/7 - loss 0.35303723 - samples/sec: 9.76 - lr: 0.010000\n",
+      "2021-09-21 19:14:50,159 epoch 8 - iter 6/7 - loss 0.36344960 - samples/sec: 11.10 - lr: 0.010000\n",
+      "2021-09-21 19:14:50,236 epoch 8 - iter 7/7 - loss 0.32940317 - samples/sec: 13.17 - lr: 0.010000\n",
+      "2021-09-21 19:14:50,237 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:50,237 EPOCH 8 done: loss 0.3294 - lr 0.0100000\n",
+      "2021-09-21 19:14:50,288 DEV : loss 0.17152856290340424 - score 0.0\n",
+      "2021-09-21 19:14:50,294 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:49:57,717 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,817 epoch 4 - iter 1/7 - loss 0.06850830 - samples/sec: 13.99 - lr: 0.020000\n",
-      "2021-09-08 10:49:57,879 epoch 4 - iter 2/7 - loss 0.34909206 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 10:49:57,934 epoch 4 - iter 3/7 - loss 0.40274359 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 10:49:57,990 epoch 4 - iter 4/7 - loss 0.34810615 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 10:49:58,040 epoch 4 - iter 5/7 - loss 0.45412187 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 10:49:58,089 epoch 4 - iter 6/7 - loss 0.42215904 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 10:49:58,140 epoch 4 - iter 7/7 - loss 0.42120358 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 10:49:58,141 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:58,142 EPOCH 4 done: loss 0.4212 - lr 0.0200000\n",
-      "2021-09-08 10:49:58,170 DEV : loss 0.3170982599258423 - score 0.0\n",
-      "2021-09-08 10:49:58,171 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:14:54,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:54,631 epoch 9 - iter 1/7 - loss 0.05743018 - samples/sec: 18.84 - lr: 0.010000\n",
+      "2021-09-21 19:14:54,682 epoch 9 - iter 2/7 - loss 0.21742785 - samples/sec: 20.02 - lr: 0.010000\n",
+      "2021-09-21 19:14:54,729 epoch 9 - iter 3/7 - loss 0.15960263 - samples/sec: 21.66 - lr: 0.010000\n",
+      "2021-09-21 19:14:54,783 epoch 9 - iter 4/7 - loss 0.17722770 - samples/sec: 18.86 - lr: 0.010000\n",
+      "2021-09-21 19:14:54,831 epoch 9 - iter 5/7 - loss 0.22850725 - samples/sec: 20.83 - lr: 0.010000\n",
+      "2021-09-21 19:14:54,885 epoch 9 - iter 6/7 - loss 0.22134705 - samples/sec: 18.79 - lr: 0.010000\n",
+      "2021-09-21 19:14:54,937 epoch 9 - iter 7/7 - loss 0.22775621 - samples/sec: 19.69 - lr: 0.010000\n",
+      "2021-09-21 19:14:54,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:54,939 EPOCH 9 done: loss 0.2278 - lr 0.0100000\n",
+      "2021-09-21 19:14:55,095 DEV : loss 0.25219887495040894 - score 0.0\n",
+      "2021-09-21 19:14:55,096 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:14:55,170 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:55,240 epoch 10 - iter 1/7 - loss 0.22480418 - samples/sec: 18.80 - lr: 0.010000\n",
+      "2021-09-21 19:14:59,944 epoch 10 - iter 2/7 - loss 0.22121029 - samples/sec: 20.28 - lr: 0.010000\n",
+      "2021-09-21 19:14:59,989 epoch 10 - iter 3/7 - loss 0.15858494 - samples/sec: 22.61 - lr: 0.010000\n",
+      "2021-09-21 19:15:00,037 epoch 10 - iter 4/7 - loss 0.15045174 - samples/sec: 21.45 - lr: 0.010000\n",
+      "2021-09-21 19:15:00,081 epoch 10 - iter 5/7 - loss 0.12843511 - samples/sec: 22.96 - lr: 0.010000\n",
+      "2021-09-21 19:15:00,129 epoch 10 - iter 6/7 - loss 0.16033807 - samples/sec: 21.09 - lr: 0.010000\n",
+      "2021-09-21 19:15:00,178 epoch 10 - iter 7/7 - loss 0.23056544 - samples/sec: 20.81 - lr: 0.010000\n",
+      "2021-09-21 19:15:00,179 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:00,180 EPOCH 10 done: loss 0.2306 - lr 0.0100000\n",
+      "2021-09-21 19:15:02,756 DEV : loss 0.14452113211154938 - score 0.0\n",
+      "2021-09-21 19:15:02,757 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:50:07,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:07,794 epoch 5 - iter 1/7 - loss 0.16607843 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 10:50:07,858 epoch 5 - iter 2/7 - loss 0.15592369 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 10:50:07,917 epoch 5 - iter 3/7 - loss 0.31167836 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 10:50:07,964 epoch 5 - iter 4/7 - loss 0.29925067 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 10:50:08,011 epoch 5 - iter 5/7 - loss 0.33954044 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 10:50:08,057 epoch 5 - iter 6/7 - loss 0.34029225 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 10:50:08,107 epoch 5 - iter 7/7 - loss 0.31261512 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 10:50:08,108 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:08,108 EPOCH 5 done: loss 0.3126 - lr 0.0200000\n",
-      "2021-09-08 10:50:09,821 DEV : loss 0.6538010835647583 - score 0.0\n",
-      "2021-09-08 10:50:09,821 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:09,824 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:09,892 epoch 6 - iter 1/7 - loss 0.04690917 - samples/sec: 20.05 - lr: 0.020000\n",
-      "2021-09-08 10:50:09,953 epoch 6 - iter 2/7 - loss 0.12231392 - samples/sec: 16.43 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,012 epoch 6 - iter 3/7 - loss 0.25837722 - samples/sec: 17.02 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,070 epoch 6 - iter 4/7 - loss 0.28355443 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,119 epoch 6 - iter 5/7 - loss 0.27962426 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,172 epoch 6 - iter 6/7 - loss 0.23999891 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,232 epoch 6 - iter 7/7 - loss 0.23010722 - samples/sec: 16.72 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,233 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,233 EPOCH 6 done: loss 0.2301 - lr 0.0200000\n",
-      "2021-09-08 10:50:10,267 DEV : loss 0.8285207748413086 - score 0.0\n",
-      "2021-09-08 10:50:10,268 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:10,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,340 epoch 7 - iter 1/7 - loss 0.81755775 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,387 epoch 7 - iter 2/7 - loss 0.46108446 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,440 epoch 7 - iter 3/7 - loss 0.38114372 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,493 epoch 7 - iter 4/7 - loss 0.30714532 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,552 epoch 7 - iter 5/7 - loss 0.29942479 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,606 epoch 7 - iter 6/7 - loss 0.30453753 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,659 epoch 7 - iter 7/7 - loss 0.26482458 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,660 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,661 EPOCH 7 done: loss 0.2648 - lr 0.0200000\n",
-      "2021-09-08 10:50:10,693 DEV : loss 0.7153764367103577 - score 0.0\n",
-      "2021-09-08 10:50:10,694 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:10,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,776 epoch 8 - iter 1/7 - loss 0.01673782 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,830 epoch 8 - iter 2/7 - loss 0.05654408 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,880 epoch 8 - iter 3/7 - loss 0.39873629 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,936 epoch 8 - iter 4/7 - loss 0.41365001 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,992 epoch 8 - iter 5/7 - loss 0.57105365 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 10:50:11,045 epoch 8 - iter 6/7 - loss 0.48190312 - samples/sec: 18.92 - lr: 0.020000\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:50:11,100 epoch 8 - iter 7/7 - loss 0.46697865 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 10:50:11,100 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,101 EPOCH 8 done: loss 0.4670 - lr 0.0200000\n",
-      "2021-09-08 10:50:11,138 DEV : loss 0.6374543905258179 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:50:11,138 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:50:11,140 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,212 epoch 9 - iter 1/7 - loss 0.24062622 - samples/sec: 17.85 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,267 epoch 9 - iter 2/7 - loss 0.31555438 - samples/sec: 18.44 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,325 epoch 9 - iter 3/7 - loss 0.24628759 - samples/sec: 17.31 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,379 epoch 9 - iter 4/7 - loss 0.19558999 - samples/sec: 18.53 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,433 epoch 9 - iter 5/7 - loss 0.18869741 - samples/sec: 18.74 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,489 epoch 9 - iter 6/7 - loss 0.25168134 - samples/sec: 18.07 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,536 epoch 9 - iter 7/7 - loss 0.21822630 - samples/sec: 21.60 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,537 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,537 EPOCH 9 done: loss 0.2182 - lr 0.0100000\n",
-      "2021-09-08 10:50:11,577 DEV : loss 0.9783905744552612 - score 0.0\n",
-      "2021-09-08 10:50:11,578 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:11,580 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,647 epoch 10 - iter 1/7 - loss 0.16448417 - samples/sec: 19.98 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,705 epoch 10 - iter 2/7 - loss 0.16961829 - samples/sec: 17.44 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,758 epoch 10 - iter 3/7 - loss 0.12369613 - samples/sec: 19.06 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,808 epoch 10 - iter 4/7 - loss 0.10557148 - samples/sec: 20.00 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,858 epoch 10 - iter 5/7 - loss 0.22807313 - samples/sec: 20.29 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,909 epoch 10 - iter 6/7 - loss 0.25467349 - samples/sec: 19.69 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,957 epoch 10 - iter 7/7 - loss 0.24831582 - samples/sec: 21.05 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,959 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,959 EPOCH 10 done: loss 0.2483 - lr 0.0100000\n",
-      "2021-09-08 10:50:11,987 DEV : loss 0.6601609587669373 - score 0.0\n",
-      "2021-09-08 10:50:11,988 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:34,140 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:34,140 Testing using best model ...\n",
-      "2021-09-08 10:50:34,142 loading file None/best-model.pt\n",
+      "2021-09-21 19:15:25,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:25,601 Testing using best model ...\n",
+      "2021-09-21 19:15:25,602 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:50:39,269 \t0.0\n",
-      "2021-09-08 10:50:39,270 \n",
+      "2021-09-21 19:15:30,993 \t0.0\n",
+      "2021-09-21 19:15:30,993 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -1102,40 +1100,40 @@
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "         joy     0.0000    0.0000    0.0000         0\n",
+      "    surprise     0.0000    0.0000    0.0000         0\n",
+      "        love     0.0000    0.0000    0.0000         0\n",
       "     disgust     0.0000    0.0000    0.0000         0\n",
       "        fear     0.0000    0.0000    0.0000         0\n",
       "       anger     0.0000    0.0000    0.0000         0\n",
       "       guilt     0.0000    0.0000    0.0000         0\n",
-      "       shame     0.0000    0.0000    0.0000         0\n",
       "     sadness     0.0000    0.0000    0.0000         0\n",
-      "    surprise     0.0000    0.0000    0.0000         1\n",
+      "       shame     0.0000    0.0000    0.0000         1\n",
       "\n",
       "   micro avg     0.0000    0.0000    0.0000         1\n",
       "   macro avg     0.0000    0.0000    0.0000         1\n",
       "weighted avg     0.0000    0.0000    0.0000         1\n",
       " samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 10:50:39,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:18,518 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:15:30,994 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:39,823 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:51:23,628 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:16:44,957 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 32482.51it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 31388.62it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:23,629 [b'surprise', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness', b'joy']\n",
-      "2021-09-08 10:51:23,822 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:23,824 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:16:44,959 [b'joy', b'surprise', b'disgust', b'fear', b'anger', b'shame', b'sadness', b'guilt']\n",
+      "2021-09-21 19:16:45,114 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,116 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1448,22 +1446,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:23,824 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:23,825 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:51:23,825 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:23,825 Parameters:\n",
-      "2021-09-08 10:51:23,826  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:51:23,826  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:51:23,826  - patience: \"3\"\n",
-      "2021-09-08 10:51:23,826  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:51:23,827  - max_epochs: \"10\"\n",
-      "2021-09-08 10:51:23,827  - shuffle: \"True\"\n",
-      "2021-09-08 10:51:23,827  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:51:23,828  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:51:23,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:23,828 Model training base path: \"None\"\n",
-      "2021-09-08 10:51:23,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:23,829 Device: cuda:1\n"
+      "2021-09-21 19:16:45,116 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,117 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:16:45,117 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,117 Parameters:\n",
+      "2021-09-21 19:16:45,118  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:16:45,118  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:16:45,118  - patience: \"3\"\n",
+      "2021-09-21 19:16:45,118  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:16:45,119  - max_epochs: \"10\"\n",
+      "2021-09-21 19:16:45,119  - shuffle: \"True\"\n",
+      "2021-09-21 19:16:45,119  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:16:45,120  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:16:45,120 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,120 Model training base path: \"None\"\n",
+      "2021-09-21 19:16:45,120 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,121 Device: cuda:0\n",
+      "2021-09-21 19:16:45,121 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,121 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:16:45,128 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -1477,145 +1478,141 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:23,829 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:23,829 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:51:24,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:24,377 epoch 1 - iter 1/7 - loss 0.69078809 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 10:51:24,425 epoch 1 - iter 2/7 - loss 0.63801020 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 10:51:24,472 epoch 1 - iter 3/7 - loss 0.67979167 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 10:51:24,520 epoch 1 - iter 4/7 - loss 0.62101304 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 10:51:24,567 epoch 1 - iter 5/7 - loss 0.61791268 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 10:51:24,613 epoch 1 - iter 6/7 - loss 0.65299991 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 10:51:24,661 epoch 1 - iter 7/7 - loss 0.62327278 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:51:24,662 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:24,662 EPOCH 1 done: loss 0.6233 - lr 0.0200000\n",
-      "2021-09-08 10:51:25,508 DEV : loss 0.2935805320739746 - score 0.0\n",
-      "2021-09-08 10:51:25,509 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:16:45,233 epoch 1 - iter 1/7 - loss 0.25631359 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,326 epoch 1 - iter 2/7 - loss 0.38632430 - samples/sec: 10.84 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,404 epoch 1 - iter 3/7 - loss 0.59195667 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,526 epoch 1 - iter 4/7 - loss 0.62218808 - samples/sec: 8.24 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,631 epoch 1 - iter 5/7 - loss 0.65640433 - samples/sec: 9.58 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,746 epoch 1 - iter 6/7 - loss 0.67423826 - samples/sec: 8.71 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,823 epoch 1 - iter 7/7 - loss 0.69607870 - samples/sec: 13.13 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,824 EPOCH 1 done: loss 0.6961 - lr 0.0200000\n",
+      "2021-09-21 19:16:45,915 DEV : loss 0.7643988132476807 - score 0.0\n",
+      "2021-09-21 19:16:45,915 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:34,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:34,906 epoch 2 - iter 1/7 - loss 0.56055409 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 10:51:34,954 epoch 2 - iter 2/7 - loss 0.42461525 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,001 epoch 2 - iter 3/7 - loss 0.52187714 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,048 epoch 2 - iter 4/7 - loss 0.50673166 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,095 epoch 2 - iter 5/7 - loss 0.52329957 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,141 epoch 2 - iter 6/7 - loss 0.56603862 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,188 epoch 2 - iter 7/7 - loss 0.59482031 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,189 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:35,189 EPOCH 2 done: loss 0.5948 - lr 0.0200000\n",
-      "2021-09-08 10:51:35,480 DEV : loss 0.4668830335140228 - score 0.0\n",
-      "2021-09-08 10:51:35,481 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:35,493 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:35,555 epoch 3 - iter 1/7 - loss 0.78426582 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,607 epoch 3 - iter 2/7 - loss 0.67212355 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,659 epoch 3 - iter 3/7 - loss 0.66272044 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,710 epoch 3 - iter 4/7 - loss 0.54840916 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,759 epoch 3 - iter 5/7 - loss 0.57385753 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,807 epoch 3 - iter 6/7 - loss 0.55489898 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,857 epoch 3 - iter 7/7 - loss 0.54139922 - samples/sec: 20.27 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,858 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:35,859 EPOCH 3 done: loss 0.5414 - lr 0.0200000\n",
-      "2021-09-08 10:51:35,889 DEV : loss 0.216843843460083 - score 0.0\n",
-      "2021-09-08 10:51:35,890 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:16:49,919 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:50,098 epoch 2 - iter 1/7 - loss 0.50185448 - samples/sec: 11.29 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,179 epoch 2 - iter 2/7 - loss 0.34549677 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,257 epoch 2 - iter 3/7 - loss 0.54244742 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,353 epoch 2 - iter 4/7 - loss 0.55518974 - samples/sec: 10.55 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,435 epoch 2 - iter 5/7 - loss 0.60760469 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,507 epoch 2 - iter 6/7 - loss 0.53782974 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,602 epoch 2 - iter 7/7 - loss 0.56662481 - samples/sec: 10.59 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:50,604 EPOCH 2 done: loss 0.5666 - lr 0.0200000\n",
+      "2021-09-21 19:16:50,654 DEV : loss 0.4387739300727844 - score 0.0\n",
+      "2021-09-21 19:16:50,656 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:44,946 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,021 epoch 4 - iter 1/7 - loss 0.70098752 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,075 epoch 4 - iter 2/7 - loss 0.58215041 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,126 epoch 4 - iter 3/7 - loss 0.51287289 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,180 epoch 4 - iter 4/7 - loss 0.50586567 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,242 epoch 4 - iter 5/7 - loss 0.53668211 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,296 epoch 4 - iter 6/7 - loss 0.54285822 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,351 epoch 4 - iter 7/7 - loss 0.47323234 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,352 EPOCH 4 done: loss 0.4732 - lr 0.0200000\n",
-      "2021-09-08 10:51:45,390 DEV : loss 0.19219858944416046 - score 0.0\n",
-      "2021-09-08 10:51:45,392 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:16:55,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:55,839 epoch 3 - iter 1/7 - loss 0.45181727 - samples/sec: 18.24 - lr: 0.020000\n",
+      "2021-09-21 19:16:55,906 epoch 3 - iter 2/7 - loss 0.49571699 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 19:16:55,980 epoch 3 - iter 3/7 - loss 0.47598958 - samples/sec: 13.73 - lr: 0.020000\n",
+      "2021-09-21 19:16:56,045 epoch 3 - iter 4/7 - loss 0.44025577 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 19:16:56,102 epoch 3 - iter 5/7 - loss 0.47497382 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 19:16:56,158 epoch 3 - iter 6/7 - loss 0.49307728 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 19:16:56,225 epoch 3 - iter 7/7 - loss 0.48966321 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 19:16:56,227 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:56,227 EPOCH 3 done: loss 0.4897 - lr 0.0200000\n",
+      "2021-09-21 19:16:56,294 DEV : loss 0.3363073766231537 - score 0.0\n",
+      "2021-09-21 19:16:56,296 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:50,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:51,049 epoch 5 - iter 1/7 - loss 0.64347434 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 10:51:51,097 epoch 5 - iter 2/7 - loss 0.56001832 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 10:51:51,152 epoch 5 - iter 3/7 - loss 0.70931002 - samples/sec: 18.49 - lr: 0.020000\n",
-      "2021-09-08 10:51:51,207 epoch 5 - iter 4/7 - loss 0.60646948 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 10:51:51,262 epoch 5 - iter 5/7 - loss 0.49588724 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 10:51:51,317 epoch 5 - iter 6/7 - loss 0.51044872 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 10:51:51,372 epoch 5 - iter 7/7 - loss 0.53402351 - samples/sec: 18.50 - lr: 0.020000\n",
-      "2021-09-08 10:51:51,373 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:51,373 EPOCH 5 done: loss 0.5340 - lr 0.0200000\n",
-      "2021-09-08 10:51:52,044 DEV : loss 0.1434156894683838 - score 0.0\n",
-      "2021-09-08 10:51:52,045 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:51:56,277 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:56,341 epoch 6 - iter 1/7 - loss 0.58528334 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,388 epoch 6 - iter 2/7 - loss 0.66045979 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,435 epoch 6 - iter 3/7 - loss 0.77713245 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,482 epoch 6 - iter 4/7 - loss 0.65412190 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,529 epoch 6 - iter 5/7 - loss 0.65793650 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,576 epoch 6 - iter 6/7 - loss 0.56381563 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,623 epoch 6 - iter 7/7 - loss 0.54569003 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:56,624 EPOCH 6 done: loss 0.5457 - lr 0.0200000\n",
-      "2021-09-08 10:51:56,760 DEV : loss 0.1646856814622879 - score 0.0\n",
-      "2021-09-08 10:51:56,761 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:56,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:56,907 epoch 7 - iter 1/7 - loss 0.02008551 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,954 epoch 7 - iter 2/7 - loss 0.32008364 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 10:51:56,998 epoch 7 - iter 3/7 - loss 0.45234404 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,041 epoch 7 - iter 4/7 - loss 0.48975777 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,084 epoch 7 - iter 5/7 - loss 0.53900242 - samples/sec: 23.39 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,128 epoch 7 - iter 6/7 - loss 0.54533130 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,171 epoch 7 - iter 7/7 - loss 0.56392698 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,172 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:57,173 EPOCH 7 done: loss 0.5639 - lr 0.0200000\n",
-      "2021-09-08 10:51:57,295 DEV : loss 0.42561012506484985 - score 0.0\n",
-      "2021-09-08 10:51:57,295 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:51:57,370 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:57,428 epoch 8 - iter 1/7 - loss 0.75083703 - samples/sec: 22.45 - lr: 0.020000\n"
+      "2021-09-21 19:17:10,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:10,766 epoch 4 - iter 1/7 - loss 0.51133281 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 19:17:10,837 epoch 4 - iter 2/7 - loss 0.56470537 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 19:17:10,905 epoch 4 - iter 3/7 - loss 0.50975231 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 19:17:10,993 epoch 4 - iter 4/7 - loss 0.41008675 - samples/sec: 11.50 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,067 epoch 4 - iter 5/7 - loss 0.46127978 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,134 epoch 4 - iter 6/7 - loss 0.47269612 - samples/sec: 15.21 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,191 epoch 4 - iter 7/7 - loss 0.41608008 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,192 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:11,192 EPOCH 4 done: loss 0.4161 - lr 0.0200000\n",
+      "2021-09-21 19:17:11,377 DEV : loss 0.6624864935874939 - score 0.0\n",
+      "2021-09-21 19:17:11,378 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:17:11,401 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:11,550 epoch 5 - iter 1/7 - loss 0.52697098 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,610 epoch 5 - iter 2/7 - loss 0.28677965 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,706 epoch 5 - iter 3/7 - loss 0.30008339 - samples/sec: 10.50 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,780 epoch 5 - iter 4/7 - loss 0.30694014 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,848 epoch 5 - iter 5/7 - loss 0.43799473 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,918 epoch 5 - iter 6/7 - loss 0.37735276 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,989 epoch 5 - iter 7/7 - loss 0.39133133 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,990 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:11,991 EPOCH 5 done: loss 0.3913 - lr 0.0200000\n",
+      "2021-09-21 19:17:12,049 DEV : loss 0.4374282658100128 - score 0.0\n",
+      "2021-09-21 19:17:12,051 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:17:12,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:12,177 epoch 6 - iter 1/7 - loss 0.71323115 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,271 epoch 6 - iter 2/7 - loss 0.44931049 - samples/sec: 10.69 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,340 epoch 6 - iter 3/7 - loss 0.37430482 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,428 epoch 6 - iter 4/7 - loss 0.37603760 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,487 epoch 6 - iter 5/7 - loss 0.31288913 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,557 epoch 6 - iter 6/7 - loss 0.27483604 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,630 epoch 6 - iter 7/7 - loss 0.27865398 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:12,631 EPOCH 6 done: loss 0.2787 - lr 0.0200000\n",
+      "2021-09-21 19:17:12,677 DEV : loss 0.7644373774528503 - score 0.0\n",
+      "2021-09-21 19:17:12,681 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:17:12,682 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:12,793 epoch 7 - iter 1/7 - loss 0.23778634 - samples/sec: 11.94 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,865 epoch 7 - iter 2/7 - loss 0.21002549 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,936 epoch 7 - iter 3/7 - loss 0.16241150 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 19:17:13,002 epoch 7 - iter 4/7 - loss 0.39022858 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 19:17:13,066 epoch 7 - iter 5/7 - loss 0.34820906 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 19:17:13,121 epoch 7 - iter 6/7 - loss 0.31857393 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 19:17:13,176 epoch 7 - iter 7/7 - loss 0.27887307 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 19:17:13,178 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:13,178 EPOCH 7 done: loss 0.2789 - lr 0.0200000\n",
+      "2021-09-21 19:17:13,211 DEV : loss 0.67439204454422 - score 0.0\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:17:13,214 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:17:13,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:13,308 epoch 8 - iter 1/7 - loss 0.22162332 - samples/sec: 16.48 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,369 epoch 8 - iter 2/7 - loss 0.14432964 - samples/sec: 16.70 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,421 epoch 8 - iter 3/7 - loss 0.13821908 - samples/sec: 19.56 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:57,471 epoch 8 - iter 2/7 - loss 0.72308290 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,515 epoch 8 - iter 3/7 - loss 0.70714476 - samples/sec: 23.35 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,558 epoch 8 - iter 4/7 - loss 0.70062667 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,602 epoch 8 - iter 5/7 - loss 0.69199072 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,646 epoch 8 - iter 6/7 - loss 0.63465654 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,692 epoch 8 - iter 7/7 - loss 0.63154418 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,693 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:57,694 EPOCH 8 done: loss 0.6315 - lr 0.0200000\n",
-      "2021-09-08 10:51:57,821 DEV : loss 0.29933273792266846 - score 0.0\n",
-      "2021-09-08 10:51:57,822 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:51:57,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:57,897 epoch 9 - iter 1/7 - loss 0.64976829 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,941 epoch 9 - iter 2/7 - loss 0.63207969 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 10:51:57,985 epoch 9 - iter 3/7 - loss 0.64577466 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 10:51:58,029 epoch 9 - iter 4/7 - loss 0.62790141 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 10:51:58,072 epoch 9 - iter 5/7 - loss 0.65587384 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 10:51:58,115 epoch 9 - iter 6/7 - loss 0.65547824 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 10:51:58,159 epoch 9 - iter 7/7 - loss 0.65419051 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 10:51:58,160 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:58,161 EPOCH 9 done: loss 0.6542 - lr 0.0200000\n",
-      "2021-09-08 10:51:58,452 DEV : loss 0.40491822361946106 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:51:58,453 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:51:58,456 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:58,513 epoch 10 - iter 1/7 - loss 0.56834656 - samples/sec: 22.77 - lr: 0.010000\n",
-      "2021-09-08 10:51:58,556 epoch 10 - iter 2/7 - loss 0.58383101 - samples/sec: 23.35 - lr: 0.010000\n",
-      "2021-09-08 10:51:58,605 epoch 10 - iter 3/7 - loss 0.56755513 - samples/sec: 20.71 - lr: 0.010000\n",
-      "2021-09-08 10:51:58,649 epoch 10 - iter 4/7 - loss 0.58666874 - samples/sec: 23.15 - lr: 0.010000\n",
-      "2021-09-08 10:51:58,692 epoch 10 - iter 5/7 - loss 0.59302598 - samples/sec: 23.25 - lr: 0.010000\n",
-      "2021-09-08 10:51:58,736 epoch 10 - iter 6/7 - loss 0.59561692 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 10:51:58,780 epoch 10 - iter 7/7 - loss 0.59593379 - samples/sec: 23.25 - lr: 0.010000\n",
-      "2021-09-08 10:51:58,873 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:58,873 EPOCH 10 done: loss 0.5959 - lr 0.0100000\n",
-      "2021-09-08 10:51:59,065 DEV : loss 0.38583236932754517 - score 0.0\n",
-      "2021-09-08 10:51:59,066 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:03,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:03,689 Testing using best model ...\n",
-      "2021-09-08 10:52:03,714 loading file None/best-model.pt\n",
+      "2021-09-21 19:17:13,472 epoch 8 - iter 4/7 - loss 0.14052143 - samples/sec: 19.98 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,522 epoch 8 - iter 5/7 - loss 0.16036033 - samples/sec: 20.00 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,575 epoch 8 - iter 6/7 - loss 0.15226208 - samples/sec: 19.42 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,625 epoch 8 - iter 7/7 - loss 0.18853066 - samples/sec: 20.03 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,627 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:13,627 EPOCH 8 done: loss 0.1885 - lr 0.0100000\n",
+      "2021-09-21 19:17:13,657 DEV : loss 0.5878506898880005 - score 0.0\n",
+      "2021-09-21 19:17:13,658 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:17:13,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:13,724 epoch 9 - iter 1/7 - loss 1.20665395 - samples/sec: 20.49 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,780 epoch 9 - iter 2/7 - loss 0.78134829 - samples/sec: 18.07 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,848 epoch 9 - iter 3/7 - loss 0.53795428 - samples/sec: 14.69 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,901 epoch 9 - iter 4/7 - loss 0.48575305 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 19:17:13,954 epoch 9 - iter 5/7 - loss 0.58503845 - samples/sec: 19.35 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,000 epoch 9 - iter 6/7 - loss 0.49329849 - samples/sec: 21.89 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,056 epoch 9 - iter 7/7 - loss 0.43218366 - samples/sec: 18.15 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,057 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:14,057 EPOCH 9 done: loss 0.4322 - lr 0.0100000\n",
+      "2021-09-21 19:17:14,197 DEV : loss 0.4785677492618561 - score 0.0\n",
+      "2021-09-21 19:17:14,199 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:17:14,281 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:14,359 epoch 10 - iter 1/7 - loss 0.19340026 - samples/sec: 17.94 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,415 epoch 10 - iter 2/7 - loss 0.11600089 - samples/sec: 18.01 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,488 epoch 10 - iter 3/7 - loss 0.15374777 - samples/sec: 13.73 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,553 epoch 10 - iter 4/7 - loss 0.18399392 - samples/sec: 15.49 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,612 epoch 10 - iter 5/7 - loss 0.15480606 - samples/sec: 17.30 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,671 epoch 10 - iter 6/7 - loss 0.23679466 - samples/sec: 17.12 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,727 epoch 10 - iter 7/7 - loss 0.20919211 - samples/sec: 17.75 - lr: 0.010000\n",
+      "2021-09-21 19:17:14,729 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:14,729 EPOCH 10 done: loss 0.2092 - lr 0.0100000\n",
+      "2021-09-21 19:17:14,872 DEV : loss 0.6008217930793762 - score 0.0\n",
+      "2021-09-21 19:17:14,874 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:17:19,615 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:19,616 Testing using best model ...\n",
+      "2021-09-21 19:17:19,617 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:52:12,883 \t0.0\n",
-      "2021-09-08 10:52:12,884 \n",
+      "2021-09-21 19:17:24,962 \t0.0\n",
+      "2021-09-21 19:17:24,963 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -1624,40 +1621,40 @@
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
+      "         joy     0.0000    0.0000    0.0000         0\n",
       "    surprise     0.0000    0.0000    0.0000         0\n",
       "     disgust     0.0000    0.0000    0.0000         0\n",
       "        fear     0.0000    0.0000    0.0000         0\n",
       "       anger     0.0000    0.0000    0.0000         0\n",
-      "       guilt     0.0000    0.0000    0.0000         0\n",
       "       shame     0.0000    0.0000    0.0000         0\n",
       "     sadness     0.0000    0.0000    0.0000         0\n",
-      "         joy     0.0000    0.0000    0.0000         1\n",
+      "       guilt     0.0000    0.0000    0.0000         1\n",
       "\n",
       "   micro avg     0.0000    0.0000    0.0000         1\n",
       "   macro avg     0.0000    0.0000    0.0000         1\n",
       "weighted avg     0.0000    0.0000    0.0000         1\n",
       " samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 10:52:12,884 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:51,258 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:17:24,963 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:00,259 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:52:55,530 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:19:04,850 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 30812.15it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 28387.84it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:55,532 [b'joy', b'surprise', b'love', b'disgust', b'fear', b'shame', b'sadness', b'anger']\n",
-      "2021-09-08 10:52:55,670 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:55,672 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:19:04,852 [b'joy', b'surprise', b'love', b'fear', b'anger', b'shame', b'sadness', b'disgust']\n",
+      "2021-09-21 19:19:05,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,004 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1970,25 +1967,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:55,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:55,673 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:52:55,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:55,674 Parameters:\n",
-      "2021-09-08 10:52:55,674  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:52:55,674  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:52:55,674  - patience: \"3\"\n",
-      "2021-09-08 10:52:55,675  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:52:55,675  - max_epochs: \"10\"\n",
-      "2021-09-08 10:52:55,675  - shuffle: \"True\"\n",
-      "2021-09-08 10:52:55,676  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:52:55,676  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:52:55,676 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:55,676 Model training base path: \"None\"\n",
-      "2021-09-08 10:52:55,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:55,677 Device: cuda:1\n",
-      "2021-09-08 10:52:55,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:55,677 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:52:55,698 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 19:19:05,005 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,005 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:19:05,006 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,006 Parameters:\n",
+      "2021-09-21 19:19:05,006  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:19:05,006  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:19:05,007  - patience: \"3\"\n",
+      "2021-09-21 19:19:05,007  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:19:05,007  - max_epochs: \"10\"\n",
+      "2021-09-21 19:19:05,008  - shuffle: \"True\"\n",
+      "2021-09-21 19:19:05,008  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:19:05,008  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:19:05,008 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,009 Model training base path: \"None\"\n",
+      "2021-09-21 19:19:05,009 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,009 Device: cuda:0\n",
+      "2021-09-21 19:19:05,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,010 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -2002,146 +1998,146 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:55,752 epoch 1 - iter 1/7 - loss 0.34041247 - samples/sec: 25.44 - lr: 0.020000\n",
-      "2021-09-08 10:52:55,806 epoch 1 - iter 2/7 - loss 0.58591084 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 10:52:55,853 epoch 1 - iter 3/7 - loss 0.63186035 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 10:52:55,900 epoch 1 - iter 4/7 - loss 0.61851639 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 10:52:55,947 epoch 1 - iter 5/7 - loss 0.65547699 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 10:52:55,994 epoch 1 - iter 6/7 - loss 0.63914773 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 10:52:56,041 epoch 1 - iter 7/7 - loss 0.64043485 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 10:52:56,042 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:56,042 EPOCH 1 done: loss 0.6404 - lr 0.0200000\n",
-      "2021-09-08 10:52:56,428 DEV : loss 0.4345299303531647 - score 0.0\n",
-      "2021-09-08 10:52:56,429 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:19:05,177 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,296 epoch 1 - iter 1/7 - loss 0.23746534 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 19:19:05,380 epoch 1 - iter 2/7 - loss 0.56027568 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 19:19:05,464 epoch 1 - iter 3/7 - loss 0.53887264 - samples/sec: 12.06 - lr: 0.020000\n",
+      "2021-09-21 19:19:05,549 epoch 1 - iter 4/7 - loss 0.52231487 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 19:19:05,630 epoch 1 - iter 5/7 - loss 0.49873254 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 19:19:05,716 epoch 1 - iter 6/7 - loss 0.59813718 - samples/sec: 11.77 - lr: 0.020000\n",
+      "2021-09-21 19:19:05,797 epoch 1 - iter 7/7 - loss 0.67986321 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 19:19:05,798 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,798 EPOCH 1 done: loss 0.6799 - lr 0.0200000\n",
+      "2021-09-21 19:19:06,533 DEV : loss 0.4443213641643524 - score 0.0\n",
+      "2021-09-21 19:19:06,533 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:53:00,727 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:00,790 epoch 2 - iter 1/7 - loss 0.71067399 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 10:53:00,839 epoch 2 - iter 2/7 - loss 0.69950759 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 10:53:00,889 epoch 2 - iter 3/7 - loss 0.64266127 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 10:53:00,936 epoch 2 - iter 4/7 - loss 0.65129800 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 10:53:00,984 epoch 2 - iter 5/7 - loss 0.68288283 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,031 epoch 2 - iter 6/7 - loss 0.66264975 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,172 epoch 2 - iter 7/7 - loss 0.69005864 - samples/sec: 7.13 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,173 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:01,174 EPOCH 2 done: loss 0.6901 - lr 0.0200000\n",
-      "2021-09-08 10:53:01,308 DEV : loss 0.628367006778717 - score 0.0\n",
-      "2021-09-08 10:53:01,309 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:01,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:01,443 epoch 3 - iter 1/7 - loss 0.64350957 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,490 epoch 3 - iter 2/7 - loss 0.59952104 - samples/sec: 21.11 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,538 epoch 3 - iter 3/7 - loss 0.66288988 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,585 epoch 3 - iter 4/7 - loss 0.65790938 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,633 epoch 3 - iter 5/7 - loss 0.65761832 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,680 epoch 3 - iter 6/7 - loss 0.65447332 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,728 epoch 3 - iter 7/7 - loss 0.70697062 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 10:53:01,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:01,729 EPOCH 3 done: loss 0.7070 - lr 0.0200000\n",
-      "2021-09-08 10:53:01,927 DEV : loss 0.31399449706077576 - score 0.0\n",
-      "2021-09-08 10:53:01,928 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:19:14,955 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:15,019 epoch 2 - iter 1/7 - loss 0.84443384 - samples/sec: 20.47 - lr: 0.020000\n",
+      "2021-09-21 19:19:15,120 epoch 2 - iter 2/7 - loss 0.75421578 - samples/sec: 9.99 - lr: 0.020000\n",
+      "2021-09-21 19:19:15,204 epoch 2 - iter 3/7 - loss 0.65012460 - samples/sec: 11.95 - lr: 0.020000\n",
+      "2021-09-21 19:19:15,291 epoch 2 - iter 4/7 - loss 0.66191807 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 19:19:15,378 epoch 2 - iter 5/7 - loss 0.79179756 - samples/sec: 11.70 - lr: 0.020000\n",
+      "2021-09-21 19:19:15,467 epoch 2 - iter 6/7 - loss 0.82406796 - samples/sec: 11.35 - lr: 0.020000\n",
+      "2021-09-21 19:19:15,548 epoch 2 - iter 7/7 - loss 0.82532883 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 19:19:15,549 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:15,550 EPOCH 2 done: loss 0.8253 - lr 0.0200000\n",
+      "2021-09-21 19:19:15,626 DEV : loss 0.43093106150627136 - score 0.0\n",
+      "2021-09-21 19:19:15,630 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:53:07,709 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:07,774 epoch 4 - iter 1/7 - loss 0.58896232 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 10:53:07,826 epoch 4 - iter 2/7 - loss 0.60589013 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 10:53:07,876 epoch 4 - iter 3/7 - loss 0.64077226 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 10:53:07,925 epoch 4 - iter 4/7 - loss 0.63034843 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 10:53:07,974 epoch 4 - iter 5/7 - loss 0.66432848 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 10:53:08,025 epoch 4 - iter 6/7 - loss 0.66598649 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 10:53:08,077 epoch 4 - iter 7/7 - loss 0.66865920 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 10:53:08,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:08,079 EPOCH 4 done: loss 0.6687 - lr 0.0200000\n",
-      "2021-09-08 10:53:08,115 DEV : loss 0.21613836288452148 - score 0.0\n",
-      "2021-09-08 10:53:08,116 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:19:30,590 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:30,661 epoch 3 - iter 1/7 - loss 0.78190160 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 19:19:30,738 epoch 3 - iter 2/7 - loss 0.68273401 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 19:19:30,817 epoch 3 - iter 3/7 - loss 0.63647769 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 19:19:30,888 epoch 3 - iter 4/7 - loss 0.64473693 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 19:19:30,953 epoch 3 - iter 5/7 - loss 0.67681050 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,020 epoch 3 - iter 6/7 - loss 0.67822751 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,097 epoch 3 - iter 7/7 - loss 0.67197529 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,099 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:31,100 EPOCH 3 done: loss 0.6720 - lr 0.0200000\n",
+      "2021-09-21 19:19:31,240 DEV : loss 0.46958833932876587 - score 0.0\n",
+      "2021-09-21 19:19:31,241 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:19:31,256 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:31,348 epoch 4 - iter 1/7 - loss 0.61748052 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,407 epoch 4 - iter 2/7 - loss 0.62796012 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,476 epoch 4 - iter 3/7 - loss 0.65609527 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,526 epoch 4 - iter 4/7 - loss 0.67550452 - samples/sec: 20.31 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,585 epoch 4 - iter 5/7 - loss 0.67005405 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,651 epoch 4 - iter 6/7 - loss 0.66536405 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,717 epoch 4 - iter 7/7 - loss 0.65645717 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 19:19:31,719 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:31,719 EPOCH 4 done: loss 0.6565 - lr 0.0200000\n",
+      "2021-09-21 19:19:32,068 DEV : loss 0.36844882369041443 - score 0.0\n",
+      "2021-09-21 19:19:32,070 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:53:15,560 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:15,634 epoch 5 - iter 1/7 - loss 0.60291433 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,688 epoch 5 - iter 2/7 - loss 0.61052045 - samples/sec: 18.49 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,739 epoch 5 - iter 3/7 - loss 0.62026191 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,793 epoch 5 - iter 4/7 - loss 0.62906998 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,846 epoch 5 - iter 5/7 - loss 0.66098038 - samples/sec: 19.02 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,897 epoch 5 - iter 6/7 - loss 0.65736232 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,960 epoch 5 - iter 7/7 - loss 0.64326204 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,961 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:15,961 EPOCH 5 done: loss 0.6433 - lr 0.0200000\n",
-      "2021-09-08 10:53:19,286 DEV : loss 0.19846972823143005 - score 0.0\n",
-      "2021-09-08 10:53:19,287 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:53:30,437 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:30,511 epoch 6 - iter 1/7 - loss 0.27724171 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,576 epoch 6 - iter 2/7 - loss 0.46067679 - samples/sec: 15.42 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,633 epoch 6 - iter 3/7 - loss 0.51956236 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,691 epoch 6 - iter 4/7 - loss 0.55285522 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,750 epoch 6 - iter 5/7 - loss 0.56409327 - samples/sec: 17.00 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,807 epoch 6 - iter 6/7 - loss 0.59497298 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,872 epoch 6 - iter 7/7 - loss 0.58404792 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,873 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:30,873 EPOCH 6 done: loss 0.5840 - lr 0.0200000\n",
-      "2021-09-08 10:53:34,214 DEV : loss 0.3570435643196106 - score 0.0\n",
-      "2021-09-08 10:53:34,215 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:34,222 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,287 epoch 7 - iter 1/7 - loss 0.65058017 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,339 epoch 7 - iter 2/7 - loss 0.65055475 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,389 epoch 7 - iter 3/7 - loss 0.63665837 - samples/sec: 20.31 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,439 epoch 7 - iter 4/7 - loss 0.59507506 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,488 epoch 7 - iter 5/7 - loss 0.61612003 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,538 epoch 7 - iter 6/7 - loss 0.54502832 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,590 epoch 7 - iter 7/7 - loss 0.55081254 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,592 EPOCH 7 done: loss 0.5508 - lr 0.0200000\n",
-      "2021-09-08 10:53:34,623 DEV : loss 0.24630047380924225 - score 0.0\n",
-      "2021-09-08 10:53:34,624 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:53:34,627 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,692 epoch 8 - iter 1/7 - loss 0.55584735 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,740 epoch 8 - iter 2/7 - loss 0.34674791 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,791 epoch 8 - iter 3/7 - loss 0.42974180 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,843 epoch 8 - iter 4/7 - loss 0.45419598 - samples/sec: 19.54 - lr: 0.020000\n"
+      "2021-09-21 19:19:53,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:53,108 epoch 5 - iter 1/7 - loss 0.71974915 - samples/sec: 22.20 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,154 epoch 5 - iter 2/7 - loss 0.69338715 - samples/sec: 22.19 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,201 epoch 5 - iter 3/7 - loss 0.67167240 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,252 epoch 5 - iter 4/7 - loss 0.65757987 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,313 epoch 5 - iter 5/7 - loss 0.64453313 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,377 epoch 5 - iter 6/7 - loss 0.64140287 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,425 epoch 5 - iter 7/7 - loss 0.64048975 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:53,427 EPOCH 5 done: loss 0.6405 - lr 0.0200000\n",
+      "2021-09-21 19:19:53,594 DEV : loss 0.3941517174243927 - score 0.0\n",
+      "2021-09-21 19:19:53,595 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:19:53,621 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:53,708 epoch 6 - iter 1/7 - loss 0.64155436 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,757 epoch 6 - iter 2/7 - loss 0.62823889 - samples/sec: 20.85 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,805 epoch 6 - iter 3/7 - loss 0.62679233 - samples/sec: 21.14 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,855 epoch 6 - iter 4/7 - loss 0.63770001 - samples/sec: 20.41 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,903 epoch 6 - iter 5/7 - loss 0.63691912 - samples/sec: 20.89 - lr: 0.020000\n",
+      "2021-09-21 19:19:53,958 epoch 6 - iter 6/7 - loss 0.64211750 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,014 epoch 6 - iter 7/7 - loss 0.64420560 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,016 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:54,016 EPOCH 6 done: loss 0.6442 - lr 0.0200000\n",
+      "2021-09-21 19:19:54,072 DEV : loss 0.3802343010902405 - score 0.0\n",
+      "2021-09-21 19:19:54,073 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:19:54,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:54,217 epoch 7 - iter 1/7 - loss 0.63070303 - samples/sec: 20.46 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,264 epoch 7 - iter 2/7 - loss 0.64160320 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,312 epoch 7 - iter 3/7 - loss 0.64908783 - samples/sec: 21.21 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,361 epoch 7 - iter 4/7 - loss 0.65133654 - samples/sec: 20.69 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,406 epoch 7 - iter 5/7 - loss 0.65343857 - samples/sec: 22.25 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,451 epoch 7 - iter 6/7 - loss 0.64752687 - samples/sec: 22.61 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,498 epoch 7 - iter 7/7 - loss 0.64653211 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,499 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:54,500 EPOCH 7 done: loss 0.6465 - lr 0.0200000\n",
+      "2021-09-21 19:19:54,658 DEV : loss 0.3743593096733093 - score 0.0\n",
+      "2021-09-21 19:19:54,659 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:19:54,732 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:54,798 epoch 8 - iter 1/7 - loss 0.64403600 - samples/sec: 21.25 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,844 epoch 8 - iter 2/7 - loss 0.63904124 - samples/sec: 22.40 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,890 epoch 8 - iter 3/7 - loss 0.64814570 - samples/sec: 21.72 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:34,893 epoch 8 - iter 5/7 - loss 0.45461558 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,941 epoch 8 - iter 6/7 - loss 0.53407993 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,988 epoch 8 - iter 7/7 - loss 0.52857209 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,989 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,990 EPOCH 8 done: loss 0.5286 - lr 0.0200000\n",
-      "2021-09-08 10:53:35,134 DEV : loss 0.31594064831733704 - score 0.0\n",
-      "2021-09-08 10:53:35,134 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:53:35,203 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,264 epoch 9 - iter 1/7 - loss 0.46646497 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,311 epoch 9 - iter 2/7 - loss 0.57990463 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,359 epoch 9 - iter 3/7 - loss 0.67849051 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,406 epoch 9 - iter 4/7 - loss 0.67993513 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,452 epoch 9 - iter 5/7 - loss 0.56457613 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,499 epoch 9 - iter 6/7 - loss 0.60397545 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,546 epoch 9 - iter 7/7 - loss 0.58835399 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,547 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,548 EPOCH 9 done: loss 0.5884 - lr 0.0200000\n",
-      "2021-09-08 10:53:35,727 DEV : loss 0.325913667678833 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:53:35,728 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:53:35,842 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,903 epoch 10 - iter 1/7 - loss 0.60490966 - samples/sec: 21.28 - lr: 0.010000\n",
-      "2021-09-08 10:53:35,950 epoch 10 - iter 2/7 - loss 0.55705294 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 10:53:35,998 epoch 10 - iter 3/7 - loss 0.54098235 - samples/sec: 21.35 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,045 epoch 10 - iter 4/7 - loss 0.53763404 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,092 epoch 10 - iter 5/7 - loss 0.54000572 - samples/sec: 21.50 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,139 epoch 10 - iter 6/7 - loss 0.52291739 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,183 epoch 10 - iter 7/7 - loss 0.46052108 - samples/sec: 23.14 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,184 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:36,184 EPOCH 10 done: loss 0.4605 - lr 0.0100000\n",
-      "2021-09-08 10:53:36,333 DEV : loss 0.37815260887145996 - score 0.0\n",
-      "2021-09-08 10:53:36,334 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:48,237 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:48,237 Testing using best model ...\n",
-      "2021-09-08 10:53:48,344 loading file None/best-model.pt\n",
+      "2021-09-21 19:19:54,940 epoch 8 - iter 4/7 - loss 0.64532924 - samples/sec: 20.29 - lr: 0.020000\n",
+      "2021-09-21 19:19:54,985 epoch 8 - iter 5/7 - loss 0.63668466 - samples/sec: 22.71 - lr: 0.020000\n",
+      "2021-09-21 19:19:55,033 epoch 8 - iter 6/7 - loss 0.62068461 - samples/sec: 20.93 - lr: 0.020000\n",
+      "2021-09-21 19:19:55,082 epoch 8 - iter 7/7 - loss 0.62275600 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 19:19:55,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:55,083 EPOCH 8 done: loss 0.6228 - lr 0.0200000\n",
+      "2021-09-21 19:19:55,211 DEV : loss 0.41387730836868286 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:19:55,212 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:19:55,287 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:55,355 epoch 9 - iter 1/7 - loss 0.64324147 - samples/sec: 21.48 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,402 epoch 9 - iter 2/7 - loss 0.59994656 - samples/sec: 21.55 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,449 epoch 9 - iter 3/7 - loss 0.61669564 - samples/sec: 21.39 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,497 epoch 9 - iter 4/7 - loss 0.64272088 - samples/sec: 21.16 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,546 epoch 9 - iter 5/7 - loss 0.64168183 - samples/sec: 20.28 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,595 epoch 9 - iter 6/7 - loss 0.64435101 - samples/sec: 20.76 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,643 epoch 9 - iter 7/7 - loss 0.64826028 - samples/sec: 21.19 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,644 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:55,644 EPOCH 9 done: loss 0.6483 - lr 0.0100000\n",
+      "2021-09-21 19:19:55,780 DEV : loss 0.44390565156936646 - score 0.0\n",
+      "2021-09-21 19:19:55,781 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:19:55,861 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:55,931 epoch 10 - iter 1/7 - loss 0.98995996 - samples/sec: 19.84 - lr: 0.010000\n",
+      "2021-09-21 19:19:55,981 epoch 10 - iter 2/7 - loss 0.80003235 - samples/sec: 20.24 - lr: 0.010000\n",
+      "2021-09-21 19:19:56,031 epoch 10 - iter 3/7 - loss 0.72962562 - samples/sec: 20.54 - lr: 0.010000\n",
+      "2021-09-21 19:19:56,079 epoch 10 - iter 4/7 - loss 0.71206735 - samples/sec: 20.90 - lr: 0.010000\n",
+      "2021-09-21 19:19:56,125 epoch 10 - iter 5/7 - loss 0.68542086 - samples/sec: 21.84 - lr: 0.010000\n",
+      "2021-09-21 19:19:56,170 epoch 10 - iter 6/7 - loss 0.67647362 - samples/sec: 22.66 - lr: 0.010000\n",
+      "2021-09-21 19:19:56,214 epoch 10 - iter 7/7 - loss 0.66802382 - samples/sec: 22.87 - lr: 0.010000\n",
+      "2021-09-21 19:19:56,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:56,216 EPOCH 10 done: loss 0.6680 - lr 0.0100000\n",
+      "2021-09-21 19:19:56,344 DEV : loss 0.41233542561531067 - score 0.0\n",
+      "2021-09-21 19:19:56,344 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:20:00,108 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:00,109 Testing using best model ...\n",
+      "2021-09-21 19:20:00,110 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:53:53,430 \t1.0\n",
-      "2021-09-08 10:53:53,431 \n",
+      "2021-09-21 19:20:05,244 \t0.0\n",
+      "2021-09-21 19:20:05,244 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.125\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "              precision    recall  f1-score   support\n",
@@ -2149,50 +2145,37 @@
       "         joy     0.0000    0.0000    0.0000         0\n",
       "    surprise     0.0000    0.0000    0.0000         0\n",
       "        love     0.0000    0.0000    0.0000         0\n",
-      "     disgust     0.0000    0.0000    0.0000         0\n",
       "        fear     0.0000    0.0000    0.0000         0\n",
+      "       anger     0.0000    0.0000    0.0000         0\n",
       "       shame     0.0000    0.0000    0.0000         0\n",
       "     sadness     0.0000    0.0000    0.0000         0\n",
-      "       anger     1.0000    1.0000    1.0000         1\n",
+      "     disgust     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "   micro avg     1.0000    1.0000    1.0000         1\n",
-      "   macro avg     0.1250    0.1250    0.1250         1\n",
-      "weighted avg     1.0000    1.0000    1.0000         1\n",
-      " samples avg     1.0000    1.0000    1.0000         1\n",
+      "   micro avg     0.0000    0.0000    0.0000         1\n",
+      "   macro avg     0.0000    0.0000    0.0000         1\n",
+      "weighted avg     0.0000    0.0000    0.0000         1\n",
+      " samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 10:53:53,432 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:35,129 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:20:05,245 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:54,818 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:54:41,865 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:21:59,061 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 27213.65it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 25594.53it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:41,867 [b'surprise', b'love', b'disgust', b'fear', b'anger', b'shame', b'sadness', b'guilt']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:54:44,465 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,467 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:21:59,063 [b'love', b'disgust', b'fear', b'anger', b'guilt', b'shame', b'sadness', b'surprise']\n",
+      "2021-09-21 19:21:59,204 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,206 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2505,160 +2488,173 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:44,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,468 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:54:44,469 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,469 Parameters:\n",
-      "2021-09-08 10:54:44,469  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:54:44,469  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:54:44,470  - patience: \"3\"\n",
-      "2021-09-08 10:54:44,470  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:54:44,470  - max_epochs: \"10\"\n",
-      "2021-09-08 10:54:44,470  - shuffle: \"True\"\n",
-      "2021-09-08 10:54:44,471  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:54:44,471  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:54:44,471 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,472 Model training base path: \"None\"\n",
-      "2021-09-08 10:54:44,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,472 Device: cuda:1\n",
-      "2021-09-08 10:54:44,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,473 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:54:44,483 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,544 epoch 1 - iter 1/7 - loss 0.61006272 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,616 epoch 1 - iter 2/7 - loss 0.63271961 - samples/sec: 13.87 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,673 epoch 1 - iter 3/7 - loss 0.73227199 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,731 epoch 1 - iter 4/7 - loss 0.70794021 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,788 epoch 1 - iter 5/7 - loss 0.68263226 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,853 epoch 1 - iter 6/7 - loss 0.66940511 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,912 epoch 1 - iter 7/7 - loss 0.68471748 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,913 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,913 EPOCH 1 done: loss 0.6847 - lr 0.0200000\n",
-      "2021-09-08 10:54:44,947 DEV : loss 0.4904974102973938 - score 0.0\n",
-      "2021-09-08 10:54:44,948 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:54:51,255 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:51,319 epoch 2 - iter 1/7 - loss 0.66863203 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,366 epoch 2 - iter 2/7 - loss 0.64230314 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,413 epoch 2 - iter 3/7 - loss 0.66298612 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,460 epoch 2 - iter 4/7 - loss 0.72922438 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,507 epoch 2 - iter 5/7 - loss 0.68506225 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,553 epoch 2 - iter 6/7 - loss 0.67223551 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,599 epoch 2 - iter 7/7 - loss 0.63390065 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,600 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:51,601 EPOCH 2 done: loss 0.6339 - lr 0.0200000\n",
-      "2021-09-08 10:54:51,742 DEV : loss 0.5183403491973877 - score 0.0\n",
-      "2021-09-08 10:54:51,742 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:51,846 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:51,907 epoch 3 - iter 1/7 - loss 0.21702510 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 10:54:51,953 epoch 3 - iter 2/7 - loss 0.39903462 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 10:54:52,000 epoch 3 - iter 3/7 - loss 0.48692860 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 10:54:52,048 epoch 3 - iter 4/7 - loss 0.49536434 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 10:54:52,095 epoch 3 - iter 5/7 - loss 0.56178669 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 10:54:52,142 epoch 3 - iter 6/7 - loss 0.57996081 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 10:54:52,189 epoch 3 - iter 7/7 - loss 0.70384779 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 10:54:52,190 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:52,190 EPOCH 3 done: loss 0.7038 - lr 0.0200000\n",
-      "2021-09-08 10:54:53,341 DEV : loss 0.5452213883399963 - score 0.0\n",
-      "2021-09-08 10:54:53,342 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:57,022 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:57,085 epoch 4 - iter 1/7 - loss 0.31225863 - samples/sec: 20.70 - lr: 0.020000\n",
-      "2021-09-08 10:54:57,132 epoch 4 - iter 2/7 - loss 0.38498421 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 10:54:57,179 epoch 4 - iter 3/7 - loss 0.46503060 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:54:57,226 epoch 4 - iter 4/7 - loss 0.50901862 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 10:54:57,273 epoch 4 - iter 5/7 - loss 0.49371273 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 10:54:57,320 epoch 4 - iter 6/7 - loss 0.46961900 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 10:54:57,367 epoch 4 - iter 7/7 - loss 0.46288498 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:54:57,368 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:57,368 EPOCH 4 done: loss 0.4629 - lr 0.0200000\n",
-      "2021-09-08 10:54:58,190 DEV : loss 0.582598090171814 - score 0.0\n",
-      "2021-09-08 10:54:58,191 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:54:58,193 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:58,258 epoch 5 - iter 1/7 - loss 0.53562194 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 10:54:58,308 epoch 5 - iter 2/7 - loss 0.60410717 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 10:54:58,356 epoch 5 - iter 3/7 - loss 0.49619030 - samples/sec: 21.03 - lr: 0.020000\n",
-      "2021-09-08 10:54:58,403 epoch 5 - iter 4/7 - loss 0.45347942 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 10:54:58,450 epoch 5 - iter 5/7 - loss 0.46912660 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 10:54:58,500 epoch 5 - iter 6/7 - loss 0.46589766 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 10:54:58,547 epoch 5 - iter 7/7 - loss 0.49555924 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 10:54:58,548 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:58,548 EPOCH 5 done: loss 0.4956 - lr 0.0200000\n",
-      "2021-09-08 10:54:58,576 DEV : loss 0.43608352541923523 - score 0.0\n",
-      "2021-09-08 10:54:58,577 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:21:59,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,207 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:21:59,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,208 Parameters:\n",
+      "2021-09-21 19:21:59,208  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:21:59,208  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:21:59,208  - patience: \"3\"\n",
+      "2021-09-21 19:21:59,209  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:21:59,209  - max_epochs: \"10\"\n",
+      "2021-09-21 19:21:59,209  - shuffle: \"True\"\n",
+      "2021-09-21 19:21:59,210  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:21:59,210  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:21:59,210 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,210 Model training base path: \"None\"\n",
+      "2021-09-21 19:21:59,211 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,211 Device: cuda:0\n",
+      "2021-09-21 19:21:59,211 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,212 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:21:59,397 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,502 epoch 1 - iter 1/7 - loss 0.61876708 - samples/sec: 19.92 - lr: 0.020000\n",
+      "2021-09-21 19:21:59,583 epoch 1 - iter 2/7 - loss 0.59339041 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 19:21:59,654 epoch 1 - iter 3/7 - loss 0.64403617 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 19:21:59,731 epoch 1 - iter 4/7 - loss 0.63835685 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 19:21:59,801 epoch 1 - iter 5/7 - loss 0.62906412 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 19:21:59,862 epoch 1 - iter 6/7 - loss 0.63860424 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 19:21:59,933 epoch 1 - iter 7/7 - loss 0.68030207 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 19:21:59,934 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:59,934 EPOCH 1 done: loss 0.6803 - lr 0.0200000\n",
+      "2021-09-21 19:22:00,083 DEV : loss 0.21751628816127777 - score 0.0\n",
+      "2021-09-21 19:22:00,084 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:02,766 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:02,842 epoch 6 - iter 1/7 - loss 0.57575220 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 10:55:02,898 epoch 6 - iter 2/7 - loss 0.42204800 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 10:55:02,953 epoch 6 - iter 3/7 - loss 0.33389054 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 10:55:03,001 epoch 6 - iter 4/7 - loss 0.39056564 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 10:55:03,053 epoch 6 - iter 5/7 - loss 0.50696217 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 10:55:03,101 epoch 6 - iter 6/7 - loss 0.46412617 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 10:55:03,151 epoch 6 - iter 7/7 - loss 0.44173872 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 10:55:03,152 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:03,152 EPOCH 6 done: loss 0.4417 - lr 0.0200000\n",
-      "2021-09-08 10:55:03,187 DEV : loss 0.39707085490226746 - score 0.0\n",
-      "2021-09-08 10:55:03,188 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:22:13,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:13,638 epoch 2 - iter 1/7 - loss 0.60859340 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 19:22:13,710 epoch 2 - iter 2/7 - loss 0.56115246 - samples/sec: 14.15 - lr: 0.020000\n",
+      "2021-09-21 19:22:13,792 epoch 2 - iter 3/7 - loss 0.62431659 - samples/sec: 12.30 - lr: 0.020000\n",
+      "2021-09-21 19:22:13,867 epoch 2 - iter 4/7 - loss 0.54571407 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 19:22:13,931 epoch 2 - iter 5/7 - loss 0.55260412 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 19:22:13,990 epoch 2 - iter 6/7 - loss 0.56362266 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,069 epoch 2 - iter 7/7 - loss 0.56679388 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,070 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:14,070 EPOCH 2 done: loss 0.5668 - lr 0.0200000\n",
+      "2021-09-21 19:22:14,117 DEV : loss 0.2822231948375702 - score 0.0\n",
+      "2021-09-21 19:22:14,118 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:22:14,121 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:14,238 epoch 3 - iter 1/7 - loss 0.59052616 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,311 epoch 3 - iter 2/7 - loss 0.65713704 - samples/sec: 13.82 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,392 epoch 3 - iter 3/7 - loss 0.61392409 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,461 epoch 3 - iter 4/7 - loss 0.65109989 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,526 epoch 3 - iter 5/7 - loss 0.63392802 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,587 epoch 3 - iter 6/7 - loss 0.64656592 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,653 epoch 3 - iter 7/7 - loss 0.57622974 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:14,654 EPOCH 3 done: loss 0.5762 - lr 0.0200000\n",
+      "2021-09-21 19:22:14,711 DEV : loss 0.23789232969284058 - score 0.0\n",
+      "2021-09-21 19:22:14,714 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:22:14,716 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:14,833 epoch 4 - iter 1/7 - loss 0.66095585 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,906 epoch 4 - iter 2/7 - loss 0.64550421 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 19:22:14,986 epoch 4 - iter 3/7 - loss 0.69929874 - samples/sec: 12.57 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,058 epoch 4 - iter 4/7 - loss 0.66394772 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,138 epoch 4 - iter 5/7 - loss 0.63500578 - samples/sec: 12.64 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,210 epoch 4 - iter 6/7 - loss 0.56229900 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,279 epoch 4 - iter 7/7 - loss 0.56287181 - samples/sec: 14.78 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:15,280 EPOCH 4 done: loss 0.5629 - lr 0.0200000\n",
+      "2021-09-21 19:22:15,357 DEV : loss 0.2370227575302124 - score 0.0\n",
+      "2021-09-21 19:22:15,360 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:22:15,362 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:15,481 epoch 5 - iter 1/7 - loss 0.44110277 - samples/sec: 12.47 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,542 epoch 5 - iter 2/7 - loss 0.42104459 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,605 epoch 5 - iter 3/7 - loss 0.46623272 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,682 epoch 5 - iter 4/7 - loss 0.49647352 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,756 epoch 5 - iter 5/7 - loss 0.40584493 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,842 epoch 5 - iter 6/7 - loss 0.47162860 - samples/sec: 11.72 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,915 epoch 5 - iter 7/7 - loss 0.48319508 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 19:22:15,916 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:15,916 EPOCH 5 done: loss 0.4832 - lr 0.0200000\n",
+      "2021-09-21 19:22:15,981 DEV : loss 0.13537847995758057 - score 0.0\n",
+      "2021-09-21 19:22:15,982 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:07,775 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:07,845 epoch 7 - iter 1/7 - loss 0.13650544 - samples/sec: 20.35 - lr: 0.020000\n"
+      "2021-09-21 19:22:20,095 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:20,230 epoch 6 - iter 1/7 - loss 0.54861397 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,306 epoch 6 - iter 2/7 - loss 0.74978620 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,379 epoch 6 - iter 3/7 - loss 0.68884981 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,444 epoch 6 - iter 4/7 - loss 0.53550858 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,523 epoch 6 - iter 5/7 - loss 0.52320737 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,594 epoch 6 - iter 6/7 - loss 0.52551292 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,656 epoch 6 - iter 7/7 - loss 0.51241696 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,658 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:20,658 EPOCH 6 done: loss 0.5124 - lr 0.0200000\n",
+      "2021-09-21 19:22:20,708 DEV : loss 0.2998638153076172 - score 0.0\n",
+      "2021-09-21 19:22:20,710 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:22:20,713 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:20,813 epoch 7 - iter 1/7 - loss 0.24464887 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,872 epoch 7 - iter 2/7 - loss 0.59589660 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 19:22:20,962 epoch 7 - iter 3/7 - loss 0.43726045 - samples/sec: 11.11 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,031 epoch 7 - iter 4/7 - loss 0.42439724 - samples/sec: 14.57 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,095 epoch 7 - iter 5/7 - loss 0.46871660 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,158 epoch 7 - iter 6/7 - loss 0.46586477 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,236 epoch 7 - iter 7/7 - loss 0.45798841 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,237 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:21,237 EPOCH 7 done: loss 0.4580 - lr 0.0200000\n",
+      "2021-09-21 19:22:21,285 DEV : loss 0.6059575080871582 - score 0.0\n",
+      "2021-09-21 19:22:21,287 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:22:21,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:21,425 epoch 8 - iter 1/7 - loss 0.69126087 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,502 epoch 8 - iter 2/7 - loss 0.63571918 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,569 epoch 8 - iter 3/7 - loss 0.57863140 - samples/sec: 14.90 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:07,900 epoch 7 - iter 2/7 - loss 0.42307992 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 10:55:07,952 epoch 7 - iter 3/7 - loss 0.45197009 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 10:55:08,000 epoch 7 - iter 4/7 - loss 0.35811922 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 10:55:08,053 epoch 7 - iter 5/7 - loss 0.39363972 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 10:55:08,101 epoch 7 - iter 6/7 - loss 0.34416286 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 10:55:08,146 epoch 7 - iter 7/7 - loss 0.31051067 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 10:55:08,148 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:08,148 EPOCH 7 done: loss 0.3105 - lr 0.0200000\n",
-      "2021-09-08 10:55:08,283 DEV : loss 0.2620686888694763 - score 0.0\n",
-      "2021-09-08 10:55:08,284 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:22:21,648 epoch 8 - iter 4/7 - loss 0.45026433 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,723 epoch 8 - iter 5/7 - loss 0.45021915 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,790 epoch 8 - iter 6/7 - loss 0.45240979 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,878 epoch 8 - iter 7/7 - loss 0.43234021 - samples/sec: 11.38 - lr: 0.020000\n",
+      "2021-09-21 19:22:21,879 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:21,880 EPOCH 8 done: loss 0.4323 - lr 0.0200000\n",
+      "2021-09-21 19:22:21,938 DEV : loss 0.25159645080566406 - score 0.0\n",
+      "2021-09-21 19:22:21,941 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:22:21,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:22,034 epoch 9 - iter 1/7 - loss 0.19611005 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:22:22,116 epoch 9 - iter 2/7 - loss 0.13460948 - samples/sec: 12.21 - lr: 0.020000\n",
+      "2021-09-21 19:22:22,185 epoch 9 - iter 3/7 - loss 0.24455704 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 19:22:22,258 epoch 9 - iter 4/7 - loss 0.28590115 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 19:22:22,330 epoch 9 - iter 5/7 - loss 0.33344223 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 19:22:22,399 epoch 9 - iter 6/7 - loss 0.34628812 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 19:22:22,456 epoch 9 - iter 7/7 - loss 0.37423151 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 19:22:22,457 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:22,457 EPOCH 9 done: loss 0.3742 - lr 0.0200000\n",
+      "2021-09-21 19:22:22,514 DEV : loss 0.15333223342895508 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:22:22,518 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:22:22,520 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:22,627 epoch 10 - iter 1/7 - loss 0.04340261 - samples/sec: 15.23 - lr: 0.010000\n",
+      "2021-09-21 19:22:22,694 epoch 10 - iter 2/7 - loss 0.09827724 - samples/sec: 15.14 - lr: 0.010000\n",
+      "2021-09-21 19:22:22,771 epoch 10 - iter 3/7 - loss 0.13789226 - samples/sec: 13.01 - lr: 0.010000\n",
+      "2021-09-21 19:22:22,846 epoch 10 - iter 4/7 - loss 0.22647805 - samples/sec: 13.49 - lr: 0.010000\n",
+      "2021-09-21 19:22:22,917 epoch 10 - iter 5/7 - loss 0.25390759 - samples/sec: 14.11 - lr: 0.010000\n",
+      "2021-09-21 19:22:22,991 epoch 10 - iter 6/7 - loss 0.23610641 - samples/sec: 13.69 - lr: 0.010000\n",
+      "2021-09-21 19:22:23,090 epoch 10 - iter 7/7 - loss 0.25776640 - samples/sec: 10.15 - lr: 0.010000\n",
+      "2021-09-21 19:22:23,091 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:23,091 EPOCH 10 done: loss 0.2578 - lr 0.0100000\n",
+      "2021-09-21 19:22:23,133 DEV : loss 0.1116107776761055 - score 0.0\n",
+      "2021-09-21 19:22:23,138 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:13,188 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,264 epoch 8 - iter 1/7 - loss 0.20724027 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,322 epoch 8 - iter 2/7 - loss 0.44341692 - samples/sec: 17.32 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,379 epoch 8 - iter 3/7 - loss 0.37458455 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,432 epoch 8 - iter 4/7 - loss 0.30229544 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,488 epoch 8 - iter 5/7 - loss 0.35203522 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,550 epoch 8 - iter 6/7 - loss 0.36645542 - samples/sec: 16.40 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,609 epoch 8 - iter 7/7 - loss 0.32720188 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,611 EPOCH 8 done: loss 0.3272 - lr 0.0200000\n",
-      "2021-09-08 10:55:13,643 DEV : loss 0.4583706259727478 - score 0.0\n",
-      "2021-09-08 10:55:13,643 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:13,645 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,714 epoch 9 - iter 1/7 - loss 0.03059370 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,771 epoch 9 - iter 2/7 - loss 0.28169057 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,828 epoch 9 - iter 3/7 - loss 0.32541973 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,885 epoch 9 - iter 4/7 - loss 0.31263665 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,938 epoch 9 - iter 5/7 - loss 0.25983561 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,996 epoch 9 - iter 6/7 - loss 0.32806563 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,054 epoch 9 - iter 7/7 - loss 0.32044762 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,055 EPOCH 9 done: loss 0.3204 - lr 0.0200000\n",
-      "2021-09-08 10:55:14,088 DEV : loss 0.5968599915504456 - score 0.0\n",
-      "2021-09-08 10:55:14,089 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:55:14,091 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,181 epoch 10 - iter 1/7 - loss 0.21915746 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,239 epoch 10 - iter 2/7 - loss 0.18052299 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,296 epoch 10 - iter 3/7 - loss 0.21381886 - samples/sec: 17.60 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,349 epoch 10 - iter 4/7 - loss 0.17668484 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,405 epoch 10 - iter 5/7 - loss 0.15995262 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,461 epoch 10 - iter 6/7 - loss 0.31579960 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,524 epoch 10 - iter 7/7 - loss 0.28526329 - samples/sec: 15.90 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,525 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,525 EPOCH 10 done: loss 0.2853 - lr 0.0200000\n",
-      "2021-09-08 10:55:14,558 DEV : loss 0.7172576189041138 - score 0.0\n",
-      "2021-09-08 10:55:14,559 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:55:22,393 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:22,394 Testing using best model ...\n",
-      "2021-09-08 10:55:22,430 loading file None/best-model.pt\n",
+      "2021-09-21 19:22:35,935 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:35,936 Testing using best model ...\n",
+      "2021-09-21 19:22:35,937 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:55:28,138 \t0.0\n",
-      "2021-09-08 10:55:28,139 \n",
+      "2021-09-21 19:22:41,386 \t0.0\n",
+      "2021-09-21 19:22:41,386 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -2667,22 +2663,22 @@
       "By class:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "    surprise     0.0000    0.0000    0.0000         0\n",
       "        love     0.0000    0.0000    0.0000         0\n",
       "     disgust     0.0000    0.0000    0.0000         0\n",
       "        fear     0.0000    0.0000    0.0000         0\n",
       "       anger     0.0000    0.0000    0.0000         0\n",
+      "       guilt     0.0000    0.0000    0.0000         0\n",
       "       shame     0.0000    0.0000    0.0000         0\n",
       "     sadness     0.0000    0.0000    0.0000         0\n",
-      "       guilt     0.0000    0.0000    0.0000         1\n",
+      "    surprise     0.0000    0.0000    0.0000         1\n",
       "\n",
       "   micro avg     0.0000    0.0000    0.0000         1\n",
       "   macro avg     0.0000    0.0000    0.0000         1\n",
       "weighted avg     0.0000    0.0000    0.0000         1\n",
       " samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 10:55:28,139 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.16163357062570252\n"
+      "2021-09-21 19:22:41,387 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.18343949044585986\n"
      ]
     }
    ],
@@ -2758,6 +2754,26 @@
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a6faa5a3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.22780067440989135, 0.19932559010865492, 0.2240539527913076, 0.09966279505432746, 0.16635443986511803]\n",
+      "0.04728689247744835\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "440b1d3b",
@@ -2768,33 +2784,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:08,227 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:24:07,397 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:56:12,296 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:24:12,269 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 17163.39it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 28532.68it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:12,299 [b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses sadness', b'this text expresses shame']\n",
-      "2021-09-08 10:56:12,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,310 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:24:12,271 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses shame', b'this text expresses sadness']\n",
+      "2021-09-21 19:24:12,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,282 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3107,27 +3125,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:12,311 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,311 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:56:12,311 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,312 Parameters:\n",
-      "2021-09-08 10:56:12,312  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:56:12,312  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:56:12,312  - patience: \"3\"\n",
-      "2021-09-08 10:56:12,313  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:56:12,313  - max_epochs: \"10\"\n",
-      "2021-09-08 10:56:12,313  - shuffle: \"True\"\n",
-      "2021-09-08 10:56:12,314  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:56:12,314  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:56:12,314 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,314 Model training base path: \"None\"\n",
-      "2021-09-08 10:56:12,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,315 Device: cuda:1\n",
-      "2021-09-08 10:56:12,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,316 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:56:12,323 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,394 epoch 1 - iter 1/7 - loss 0.69431227 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 10:56:12,453 epoch 1 - iter 2/7 - loss 0.65354252 - samples/sec: 17.18 - lr: 0.020000\n"
+      "2021-09-21 19:24:12,282 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,282 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:24:12,283 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,283 Parameters:\n",
+      "2021-09-21 19:24:12,283  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:24:12,283  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:24:12,284  - patience: \"3\"\n",
+      "2021-09-21 19:24:12,284  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:24:12,284  - max_epochs: \"10\"\n",
+      "2021-09-21 19:24:12,285  - shuffle: \"True\"\n",
+      "2021-09-21 19:24:12,285  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:24:12,285  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:24:12,285 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,286 Model training base path: \"None\"\n",
+      "2021-09-21 19:24:12,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,286 Device: cuda:0\n",
+      "2021-09-21 19:24:12,287 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,287 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:24:12,293 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,388 epoch 1 - iter 1/7 - loss 0.50536323 - samples/sec: 16.03 - lr: 0.020000\n"
      ]
     },
     {
@@ -3141,140 +3158,140 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:12,512 epoch 1 - iter 3/7 - loss 0.62616879 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 10:56:12,570 epoch 1 - iter 4/7 - loss 0.69793473 - samples/sec: 17.40 - lr: 0.020000\n",
-      "2021-09-08 10:56:12,628 epoch 1 - iter 5/7 - loss 0.72742834 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 10:56:12,688 epoch 1 - iter 6/7 - loss 0.73253662 - samples/sec: 16.56 - lr: 0.020000\n",
-      "2021-09-08 10:56:12,744 epoch 1 - iter 7/7 - loss 0.66626697 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 10:56:12,745 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:12,746 EPOCH 1 done: loss 0.6663 - lr 0.0200000\n",
-      "2021-09-08 10:56:12,781 DEV : loss 0.1840227246284485 - score 0.0\n",
-      "2021-09-08 10:56:12,782 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:24:12,471 epoch 1 - iter 2/7 - loss 0.61221144 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,557 epoch 1 - iter 3/7 - loss 0.67343903 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,621 epoch 1 - iter 4/7 - loss 0.64836581 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,712 epoch 1 - iter 5/7 - loss 0.65146117 - samples/sec: 11.03 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,792 epoch 1 - iter 6/7 - loss 0.65278364 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,861 epoch 1 - iter 7/7 - loss 0.65475337 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,862 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,863 EPOCH 1 done: loss 0.6548 - lr 0.0200000\n",
+      "2021-09-21 19:24:13,012 DEV : loss 0.3146503269672394 - score 0.0\n",
+      "2021-09-21 19:24:13,012 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:56:16,453 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:16,536 epoch 2 - iter 1/7 - loss 0.57152569 - samples/sec: 16.88 - lr: 0.020000\n",
-      "2021-09-08 10:56:16,593 epoch 2 - iter 2/7 - loss 0.59733635 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 10:56:16,649 epoch 2 - iter 3/7 - loss 0.52315211 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 10:56:16,705 epoch 2 - iter 4/7 - loss 0.54500593 - samples/sec: 17.83 - lr: 0.020000\n",
-      "2021-09-08 10:56:16,761 epoch 2 - iter 5/7 - loss 0.54363973 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 10:56:16,818 epoch 2 - iter 6/7 - loss 0.56627351 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 10:56:16,879 epoch 2 - iter 7/7 - loss 0.60704597 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 10:56:16,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:16,881 EPOCH 2 done: loss 0.6070 - lr 0.0200000\n",
-      "2021-09-08 10:56:16,914 DEV : loss 0.46390438079833984 - score 0.0\n",
-      "2021-09-08 10:56:16,915 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:16,917 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:16,995 epoch 3 - iter 1/7 - loss 0.61180425 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,054 epoch 3 - iter 2/7 - loss 0.54050308 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,111 epoch 3 - iter 3/7 - loss 0.57242312 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,166 epoch 3 - iter 4/7 - loss 0.57905892 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,226 epoch 3 - iter 5/7 - loss 0.67888079 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,282 epoch 3 - iter 6/7 - loss 0.63672478 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,338 epoch 3 - iter 7/7 - loss 0.57701137 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,340 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:17,340 EPOCH 3 done: loss 0.5770 - lr 0.0200000\n",
-      "2021-09-08 10:56:17,372 DEV : loss 0.20354966819286346 - score 0.0\n",
-      "2021-09-08 10:56:17,373 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:56:17,375 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:17,446 epoch 4 - iter 1/7 - loss 0.36485210 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,501 epoch 4 - iter 2/7 - loss 0.25124124 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,557 epoch 4 - iter 3/7 - loss 0.26359147 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,613 epoch 4 - iter 4/7 - loss 0.35084251 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,670 epoch 4 - iter 5/7 - loss 0.40291196 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,726 epoch 4 - iter 6/7 - loss 0.43281589 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,783 epoch 4 - iter 7/7 - loss 0.48270562 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,784 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:17,784 EPOCH 4 done: loss 0.4827 - lr 0.0200000\n",
-      "2021-09-08 10:56:17,819 DEV : loss 0.31129080057144165 - score 0.0\n",
-      "2021-09-08 10:56:17,820 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:56:17,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:17,924 epoch 5 - iter 1/7 - loss 0.54062438 - samples/sec: 17.19 - lr: 0.020000\n",
-      "2021-09-08 10:56:17,980 epoch 5 - iter 2/7 - loss 0.38614840 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 10:56:18,036 epoch 5 - iter 3/7 - loss 0.39886590 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 10:56:18,092 epoch 5 - iter 4/7 - loss 0.57417941 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 10:56:18,148 epoch 5 - iter 5/7 - loss 0.50573682 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 10:56:18,203 epoch 5 - iter 6/7 - loss 0.44263153 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 10:56:18,260 epoch 5 - iter 7/7 - loss 0.47748461 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 10:56:18,261 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:18,261 EPOCH 5 done: loss 0.4775 - lr 0.0200000\n",
-      "2021-09-08 10:56:18,293 DEV : loss 0.1904430091381073 - score 0.0\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:56:18,293 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:56:18,295 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:18,367 epoch 6 - iter 1/7 - loss 0.56382966 - samples/sec: 18.03 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,418 epoch 6 - iter 2/7 - loss 0.33404609 - samples/sec: 19.59 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,469 epoch 6 - iter 3/7 - loss 0.24337123 - samples/sec: 19.74 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,526 epoch 6 - iter 4/7 - loss 0.27744107 - samples/sec: 17.89 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,582 epoch 6 - iter 5/7 - loss 0.28593876 - samples/sec: 18.05 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,638 epoch 6 - iter 6/7 - loss 0.31022266 - samples/sec: 17.76 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,694 epoch 6 - iter 7/7 - loss 0.36216650 - samples/sec: 18.14 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:18,695 EPOCH 6 done: loss 0.3622 - lr 0.0100000\n",
-      "2021-09-08 10:56:18,729 DEV : loss 0.21677982807159424 - score 0.0\n",
-      "2021-09-08 10:56:18,730 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:18,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:18,803 epoch 7 - iter 1/7 - loss 0.35074136 - samples/sec: 17.95 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,855 epoch 7 - iter 2/7 - loss 0.23331819 - samples/sec: 19.60 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,911 epoch 7 - iter 3/7 - loss 0.44880248 - samples/sec: 18.03 - lr: 0.010000\n",
-      "2021-09-08 10:56:18,967 epoch 7 - iter 4/7 - loss 0.39063441 - samples/sec: 18.01 - lr: 0.010000\n",
-      "2021-09-08 10:56:19,023 epoch 7 - iter 5/7 - loss 0.40691642 - samples/sec: 17.84 - lr: 0.010000\n",
-      "2021-09-08 10:56:19,074 epoch 7 - iter 6/7 - loss 0.34963174 - samples/sec: 19.87 - lr: 0.010000\n",
-      "2021-09-08 10:56:19,130 epoch 7 - iter 7/7 - loss 0.33480484 - samples/sec: 18.02 - lr: 0.010000\n",
-      "2021-09-08 10:56:19,131 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:19,131 EPOCH 7 done: loss 0.3348 - lr 0.0100000\n",
-      "2021-09-08 10:56:19,163 DEV : loss 0.1593499630689621 - score 0.0\n",
-      "2021-09-08 10:56:19,164 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:24:18,826 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:19,023 epoch 2 - iter 1/7 - loss 0.67424536 - samples/sec: 10.95 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,128 epoch 2 - iter 2/7 - loss 0.41917571 - samples/sec: 9.55 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,210 epoch 2 - iter 3/7 - loss 0.50851366 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,287 epoch 2 - iter 4/7 - loss 0.57353548 - samples/sec: 13.13 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,369 epoch 2 - iter 5/7 - loss 0.58828978 - samples/sec: 12.27 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,458 epoch 2 - iter 6/7 - loss 0.58216888 - samples/sec: 11.32 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,546 epoch 2 - iter 7/7 - loss 0.58363106 - samples/sec: 11.39 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:19,548 EPOCH 2 done: loss 0.5836 - lr 0.0200000\n",
+      "2021-09-21 19:24:19,634 DEV : loss 0.2626540958881378 - score 0.0\n",
+      "2021-09-21 19:24:19,638 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:24:24,321 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:24,462 epoch 3 - iter 1/7 - loss 0.45730424 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 19:24:24,534 epoch 3 - iter 2/7 - loss 0.72857654 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 19:24:24,617 epoch 3 - iter 3/7 - loss 0.72978654 - samples/sec: 12.12 - lr: 0.020000\n",
+      "2021-09-21 19:24:24,680 epoch 3 - iter 4/7 - loss 0.65283931 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 19:24:24,758 epoch 3 - iter 5/7 - loss 0.63322527 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 19:24:24,835 epoch 3 - iter 6/7 - loss 0.61820573 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 19:24:24,899 epoch 3 - iter 7/7 - loss 0.57228494 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 19:24:24,900 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:24,900 EPOCH 3 done: loss 0.5723 - lr 0.0200000\n",
+      "2021-09-21 19:24:25,445 DEV : loss 0.21085992455482483 - score 0.0\n",
+      "2021-09-21 19:24:25,447 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:24:35,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:36,040 epoch 4 - iter 1/7 - loss 0.67968220 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,147 epoch 4 - iter 2/7 - loss 0.62160209 - samples/sec: 9.37 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,246 epoch 4 - iter 3/7 - loss 0.61775617 - samples/sec: 10.22 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,327 epoch 4 - iter 4/7 - loss 0.56442031 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,421 epoch 4 - iter 5/7 - loss 0.55629768 - samples/sec: 10.66 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,506 epoch 4 - iter 6/7 - loss 0.55056980 - samples/sec: 11.89 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,577 epoch 4 - iter 7/7 - loss 0.49212965 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,578 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:36,579 EPOCH 4 done: loss 0.4921 - lr 0.0200000\n",
+      "2021-09-21 19:24:36,651 DEV : loss 0.3103100061416626 - score 0.0\n",
+      "2021-09-21 19:24:36,655 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:24:36,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:36,778 epoch 5 - iter 1/7 - loss 0.06630737 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,862 epoch 5 - iter 2/7 - loss 0.24497338 - samples/sec: 12.00 - lr: 0.020000\n",
+      "2021-09-21 19:24:36,952 epoch 5 - iter 3/7 - loss 0.22844190 - samples/sec: 11.18 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,030 epoch 5 - iter 4/7 - loss 0.35061898 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,102 epoch 5 - iter 5/7 - loss 0.36438778 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,179 epoch 5 - iter 6/7 - loss 0.40166451 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,263 epoch 5 - iter 7/7 - loss 0.41806781 - samples/sec: 12.07 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,264 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:37,264 EPOCH 5 done: loss 0.4181 - lr 0.0200000\n",
+      "2021-09-21 19:24:37,349 DEV : loss 0.4582175314426422 - score 0.0\n",
+      "2021-09-21 19:24:37,353 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:24:37,355 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:37,509 epoch 6 - iter 1/7 - loss 0.37260962 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,588 epoch 6 - iter 2/7 - loss 0.46988937 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,670 epoch 6 - iter 3/7 - loss 0.54642419 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,757 epoch 6 - iter 4/7 - loss 0.57192291 - samples/sec: 11.58 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,825 epoch 6 - iter 5/7 - loss 0.58019216 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,906 epoch 6 - iter 6/7 - loss 0.49866553 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,984 epoch 6 - iter 7/7 - loss 0.48876636 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 19:24:37,985 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:37,986 EPOCH 6 done: loss 0.4888 - lr 0.0200000\n",
+      "2021-09-21 19:24:38,055 DEV : loss 0.2347518801689148 - score 0.0\n",
+      "2021-09-21 19:24:38,060 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:24:38,063 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:38,197 epoch 7 - iter 1/7 - loss 0.63762301 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 19:24:38,270 epoch 7 - iter 2/7 - loss 0.35775410 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 19:24:38,352 epoch 7 - iter 3/7 - loss 0.33623952 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 19:24:38,426 epoch 7 - iter 4/7 - loss 0.43972457 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 19:24:38,514 epoch 7 - iter 5/7 - loss 0.49439087 - samples/sec: 11.40 - lr: 0.020000\n",
+      "2021-09-21 19:24:38,586 epoch 7 - iter 6/7 - loss 0.42674240 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 19:24:38,678 epoch 7 - iter 7/7 - loss 0.46092991 - samples/sec: 10.96 - lr: 0.020000\n",
+      "2021-09-21 19:24:38,679 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:38,679 EPOCH 7 done: loss 0.4609 - lr 0.0200000\n",
+      "2021-09-21 19:24:38,774 DEV : loss 0.11995202302932739 - score 0.0\n",
+      "2021-09-21 19:24:38,776 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:56:23,213 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:23,294 epoch 8 - iter 1/7 - loss 0.49826574 - samples/sec: 15.97 - lr: 0.010000\n",
-      "2021-09-08 10:56:23,355 epoch 8 - iter 2/7 - loss 0.61629549 - samples/sec: 16.67 - lr: 0.010000\n",
-      "2021-09-08 10:56:23,415 epoch 8 - iter 3/7 - loss 0.57246517 - samples/sec: 16.91 - lr: 0.010000\n",
-      "2021-09-08 10:56:23,475 epoch 8 - iter 4/7 - loss 0.48815428 - samples/sec: 16.81 - lr: 0.010000\n",
-      "2021-09-08 10:56:23,536 epoch 8 - iter 5/7 - loss 0.48431461 - samples/sec: 16.71 - lr: 0.010000\n"
+      "2021-09-21 19:24:42,845 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:42,942 epoch 8 - iter 1/7 - loss 0.63730353 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,027 epoch 8 - iter 2/7 - loss 0.47414921 - samples/sec: 11.93 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,100 epoch 8 - iter 3/7 - loss 0.49230378 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,162 epoch 8 - iter 4/7 - loss 0.44252615 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,222 epoch 8 - iter 5/7 - loss 0.45176544 - samples/sec: 16.89 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:23,592 epoch 8 - iter 6/7 - loss 0.41980811 - samples/sec: 18.07 - lr: 0.010000\n",
-      "2021-09-08 10:56:23,649 epoch 8 - iter 7/7 - loss 0.38359399 - samples/sec: 17.72 - lr: 0.010000\n",
-      "2021-09-08 10:56:23,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:23,650 EPOCH 8 done: loss 0.3836 - lr 0.0100000\n",
-      "2021-09-08 10:56:23,682 DEV : loss 0.1446561962366104 - score 0.0\n",
-      "2021-09-08 10:56:23,683 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:56:30,226 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:30,319 epoch 9 - iter 1/7 - loss 0.54397207 - samples/sec: 15.08 - lr: 0.010000\n",
-      "2021-09-08 10:56:30,384 epoch 9 - iter 2/7 - loss 0.41901498 - samples/sec: 15.50 - lr: 0.010000\n",
-      "2021-09-08 10:56:30,445 epoch 9 - iter 3/7 - loss 0.29646987 - samples/sec: 16.71 - lr: 0.010000\n",
-      "2021-09-08 10:56:30,511 epoch 9 - iter 4/7 - loss 0.25298515 - samples/sec: 15.24 - lr: 0.010000\n",
-      "2021-09-08 10:56:30,575 epoch 9 - iter 5/7 - loss 0.26425299 - samples/sec: 15.60 - lr: 0.010000\n",
-      "2021-09-08 10:56:30,639 epoch 9 - iter 6/7 - loss 0.24514521 - samples/sec: 15.70 - lr: 0.010000\n",
-      "2021-09-08 10:56:30,706 epoch 9 - iter 7/7 - loss 0.27236072 - samples/sec: 15.20 - lr: 0.010000\n",
-      "2021-09-08 10:56:30,707 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:30,707 EPOCH 9 done: loss 0.2724 - lr 0.0100000\n",
-      "2021-09-08 10:56:30,751 DEV : loss 0.13204118609428406 - score 0.0\n",
-      "2021-09-08 10:56:30,752 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:56:39,291 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,366 epoch 10 - iter 1/7 - loss 0.03600054 - samples/sec: 17.96 - lr: 0.010000\n",
-      "2021-09-08 10:56:39,432 epoch 10 - iter 2/7 - loss 0.07004909 - samples/sec: 15.10 - lr: 0.010000\n",
-      "2021-09-08 10:56:39,497 epoch 10 - iter 3/7 - loss 0.14358252 - samples/sec: 15.58 - lr: 0.010000\n",
-      "2021-09-08 10:56:39,560 epoch 10 - iter 4/7 - loss 0.14997472 - samples/sec: 15.85 - lr: 0.010000\n",
-      "2021-09-08 10:56:39,625 epoch 10 - iter 5/7 - loss 0.15806246 - samples/sec: 15.49 - lr: 0.010000\n",
-      "2021-09-08 10:56:39,694 epoch 10 - iter 6/7 - loss 0.19206733 - samples/sec: 14.70 - lr: 0.010000\n",
-      "2021-09-08 10:56:39,758 epoch 10 - iter 7/7 - loss 0.18717092 - samples/sec: 15.75 - lr: 0.010000\n",
-      "2021-09-08 10:56:39,759 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,759 EPOCH 10 done: loss 0.1872 - lr 0.0100000\n",
-      "2021-09-08 10:56:40,165 DEV : loss 0.1674247682094574 - score 0.0\n",
-      "2021-09-08 10:56:40,168 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:50,797 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:50,798 Testing using best model ...\n",
-      "2021-09-08 10:56:50,838 loading file None/best-model.pt\n",
+      "2021-09-21 19:24:43,280 epoch 8 - iter 6/7 - loss 0.44670814 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,339 epoch 8 - iter 7/7 - loss 0.38857666 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,340 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:43,340 EPOCH 8 done: loss 0.3886 - lr 0.0200000\n",
+      "2021-09-21 19:24:43,517 DEV : loss 0.3473562002182007 - score 0.0\n",
+      "2021-09-21 19:24:43,517 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:24:43,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:43,715 epoch 9 - iter 1/7 - loss 0.29903466 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,789 epoch 9 - iter 2/7 - loss 0.56635463 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,853 epoch 9 - iter 3/7 - loss 0.49619866 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,916 epoch 9 - iter 4/7 - loss 0.38496814 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 19:24:43,992 epoch 9 - iter 5/7 - loss 0.32387623 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,109 epoch 9 - iter 6/7 - loss 0.40321343 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,182 epoch 9 - iter 7/7 - loss 0.37461590 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,183 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:44,184 EPOCH 9 done: loss 0.3746 - lr 0.0200000\n",
+      "2021-09-21 19:24:44,348 DEV : loss 0.7493157386779785 - score 0.0\n",
+      "2021-09-21 19:24:44,350 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:24:44,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:44,539 epoch 10 - iter 1/7 - loss 0.04068792 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,606 epoch 10 - iter 2/7 - loss 0.18146886 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,671 epoch 10 - iter 3/7 - loss 0.20866418 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,736 epoch 10 - iter 4/7 - loss 0.38612801 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,798 epoch 10 - iter 5/7 - loss 0.31314485 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,855 epoch 10 - iter 6/7 - loss 0.34207714 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,935 epoch 10 - iter 7/7 - loss 0.35305862 - samples/sec: 12.60 - lr: 0.020000\n",
+      "2021-09-21 19:24:44,936 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:44,937 EPOCH 10 done: loss 0.3531 - lr 0.0200000\n",
+      "2021-09-21 19:24:45,079 DEV : loss 0.18277986347675323 - score 0.0\n",
+      "2021-09-21 19:24:45,082 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:24:50,683 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:50,684 Testing using best model ...\n",
+      "2021-09-21 19:24:50,685 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:56:58,213 \t0.0\n",
-      "2021-09-08 10:56:58,214 \n",
+      "2021-09-21 19:24:56,434 \t0.0\n",
+      "2021-09-21 19:24:56,435 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -3283,53 +3300,40 @@
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
       "this text expresses surprise     0.0000    0.0000    0.0000         0\n",
       "    this text expresses love     0.0000    0.0000    0.0000         0\n",
       " this text expresses disgust     0.0000    0.0000    0.0000         0\n",
       "    this text expresses fear     0.0000    0.0000    0.0000         0\n",
       "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses guilt     0.0000    0.0000    0.0000         0\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses shame     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses shame     0.0000    0.0000    0.0000         0\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                   micro avg     0.0000    0.0000    0.0000         1\n",
       "                   macro avg     0.0000    0.0000    0.0000         1\n",
       "                weighted avg     0.0000    0.0000    0.0000         1\n",
       "                 samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 10:56:58,214 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:47,562 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:24:56,435 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:48,818 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:57:51,846 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 32078.81it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:57:51,848 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses guilt', b'this text expresses shame', b'this text expresses anger']\n"
+      "2021-09-21 19:26:53,015 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 8/8 [00:00<00:00, 28556.96it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:52,232 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,235 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:26:53,017 [b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness', b'this text expresses joy']\n",
+      "2021-09-21 19:26:53,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,168 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3642,160 +3646,174 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:52,235 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,236 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:57:52,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,236 Parameters:\n",
-      "2021-09-08 10:57:52,236  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:57:52,237  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:57:52,237  - patience: \"3\"\n",
-      "2021-09-08 10:57:52,237  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:57:52,238  - max_epochs: \"10\"\n",
-      "2021-09-08 10:57:52,238  - shuffle: \"True\"\n",
-      "2021-09-08 10:57:52,238  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:57:52,238  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:57:52,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,239 Model training base path: \"None\"\n",
-      "2021-09-08 10:57:52,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,239 Device: cuda:1\n",
-      "2021-09-08 10:57:52,240 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,240 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:57:52,248 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,302 epoch 1 - iter 1/7 - loss 0.15447615 - samples/sec: 25.27 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,350 epoch 1 - iter 2/7 - loss 0.57503595 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,397 epoch 1 - iter 3/7 - loss 0.67201624 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,445 epoch 1 - iter 4/7 - loss 0.70554740 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,492 epoch 1 - iter 5/7 - loss 0.76311915 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,540 epoch 1 - iter 6/7 - loss 0.75246514 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,587 epoch 1 - iter 7/7 - loss 0.75438228 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,588 EPOCH 1 done: loss 0.7544 - lr 0.0200000\n",
-      "2021-09-08 10:57:52,616 DEV : loss 0.33062461018562317 - score 0.0\n",
-      "2021-09-08 10:57:52,617 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:26:53,168 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,169 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:26:53,169 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,169 Parameters:\n",
+      "2021-09-21 19:26:53,169  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:26:53,170  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:26:53,170  - patience: \"3\"\n",
+      "2021-09-21 19:26:53,170  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:26:53,171  - max_epochs: \"10\"\n",
+      "2021-09-21 19:26:53,171  - shuffle: \"True\"\n",
+      "2021-09-21 19:26:53,171  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:26:53,172  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:26:53,172 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,172 Model training base path: \"None\"\n",
+      "2021-09-21 19:26:53,172 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,173 Device: cuda:0\n",
+      "2021-09-21 19:26:53,173 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,173 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:26:53,346 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,402 epoch 1 - iter 1/7 - loss 0.87553716 - samples/sec: 24.71 - lr: 0.020000\n",
+      "2021-09-21 19:26:53,450 epoch 1 - iter 2/7 - loss 0.83721140 - samples/sec: 20.77 - lr: 0.020000\n",
+      "2021-09-21 19:26:53,499 epoch 1 - iter 3/7 - loss 0.78442236 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 19:26:53,547 epoch 1 - iter 4/7 - loss 0.66121461 - samples/sec: 20.89 - lr: 0.020000\n",
+      "2021-09-21 19:26:53,595 epoch 1 - iter 5/7 - loss 0.72421756 - samples/sec: 21.12 - lr: 0.020000\n",
+      "2021-09-21 19:26:53,644 epoch 1 - iter 6/7 - loss 0.70505032 - samples/sec: 20.81 - lr: 0.020000\n",
+      "2021-09-21 19:26:53,697 epoch 1 - iter 7/7 - loss 0.71455204 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 19:26:53,698 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:26:53,698 EPOCH 1 done: loss 0.7146 - lr 0.0200000\n",
+      "2021-09-21 19:26:53,985 DEV : loss 0.4267440736293793 - score 0.0\n",
+      "2021-09-21 19:26:53,986 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:27:10,609 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:10,675 epoch 2 - iter 1/7 - loss 0.35487890 - samples/sec: 19.61 - lr: 0.020000\n",
+      "2021-09-21 19:27:10,722 epoch 2 - iter 2/7 - loss 0.50118831 - samples/sec: 21.53 - lr: 0.020000\n",
+      "2021-09-21 19:27:10,786 epoch 2 - iter 3/7 - loss 0.56316109 - samples/sec: 15.90 - lr: 0.020000\n",
+      "2021-09-21 19:27:10,850 epoch 2 - iter 4/7 - loss 0.50379767 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 19:27:10,909 epoch 2 - iter 5/7 - loss 0.52728936 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 19:27:10,962 epoch 2 - iter 6/7 - loss 0.57888279 - samples/sec: 18.97 - lr: 0.020000\n",
+      "2021-09-21 19:27:11,017 epoch 2 - iter 7/7 - loss 0.58504294 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 19:27:11,018 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:11,019 EPOCH 2 done: loss 0.5850 - lr 0.0200000\n",
+      "2021-09-21 19:27:11,059 DEV : loss 0.19772079586982727 - score 0.0\n",
+      "2021-09-21 19:27:11,060 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:04,511 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:04,573 epoch 2 - iter 1/7 - loss 0.77046871 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,621 epoch 2 - iter 2/7 - loss 0.64248422 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,669 epoch 2 - iter 3/7 - loss 0.63999820 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,716 epoch 2 - iter 4/7 - loss 0.61088040 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,762 epoch 2 - iter 5/7 - loss 0.61835822 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,809 epoch 2 - iter 6/7 - loss 0.56212821 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,856 epoch 2 - iter 7/7 - loss 0.57523624 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,857 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:04,857 EPOCH 2 done: loss 0.5752 - lr 0.0200000\n",
-      "2021-09-08 10:58:07,076 DEV : loss 0.34895390272140503 - score 0.0\n",
-      "2021-09-08 10:58:07,077 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:07,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,161 epoch 3 - iter 1/7 - loss 0.34140435 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,223 epoch 3 - iter 2/7 - loss 0.34768660 - samples/sec: 16.26 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,279 epoch 3 - iter 3/7 - loss 0.28812957 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,330 epoch 3 - iter 4/7 - loss 0.45676540 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,379 epoch 3 - iter 5/7 - loss 0.45960539 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,433 epoch 3 - iter 6/7 - loss 0.50897899 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,495 epoch 3 - iter 7/7 - loss 0.49680821 - samples/sec: 16.34 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,496 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,496 EPOCH 3 done: loss 0.4968 - lr 0.0200000\n",
-      "2021-09-08 10:58:07,541 DEV : loss 1.0402730703353882 - score 0.0\n",
-      "2021-09-08 10:58:07,542 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:07,544 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,618 epoch 4 - iter 1/7 - loss 0.70301515 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,676 epoch 4 - iter 2/7 - loss 0.61170772 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,733 epoch 4 - iter 3/7 - loss 0.61905964 - samples/sec: 17.64 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,790 epoch 4 - iter 4/7 - loss 0.63305034 - samples/sec: 17.63 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,847 epoch 4 - iter 5/7 - loss 0.53582585 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,895 epoch 4 - iter 6/7 - loss 0.49160899 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,952 epoch 4 - iter 7/7 - loss 0.46774420 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,953 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,953 EPOCH 4 done: loss 0.4677 - lr 0.0200000\n",
-      "2021-09-08 10:58:07,982 DEV : loss 0.21015214920043945 - score 0.0\n",
-      "2021-09-08 10:58:07,984 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:27:21,593 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:21,656 epoch 3 - iter 1/7 - loss 0.57154197 - samples/sec: 20.55 - lr: 0.020000\n",
+      "2021-09-21 19:27:21,732 epoch 3 - iter 2/7 - loss 0.59929389 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 19:27:21,809 epoch 3 - iter 3/7 - loss 0.60385742 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 19:27:21,888 epoch 3 - iter 4/7 - loss 0.61184570 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 19:27:21,977 epoch 3 - iter 5/7 - loss 0.57402201 - samples/sec: 11.39 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,061 epoch 3 - iter 6/7 - loss 0.59730190 - samples/sec: 11.94 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,150 epoch 3 - iter 7/7 - loss 0.56206987 - samples/sec: 11.27 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,151 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:22,152 EPOCH 3 done: loss 0.5621 - lr 0.0200000\n",
+      "2021-09-21 19:27:22,235 DEV : loss 0.9152153730392456 - score 0.0\n",
+      "2021-09-21 19:27:22,236 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:27:22,238 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:22,383 epoch 4 - iter 1/7 - loss 0.57172889 - samples/sec: 12.06 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,469 epoch 4 - iter 2/7 - loss 0.43551055 - samples/sec: 11.73 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,540 epoch 4 - iter 3/7 - loss 0.46957461 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,619 epoch 4 - iter 4/7 - loss 0.47106122 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,695 epoch 4 - iter 5/7 - loss 0.47528573 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,782 epoch 4 - iter 6/7 - loss 0.41984519 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,878 epoch 4 - iter 7/7 - loss 0.44281084 - samples/sec: 10.45 - lr: 0.020000\n",
+      "2021-09-21 19:27:22,879 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:22,880 EPOCH 4 done: loss 0.4428 - lr 0.0200000\n",
+      "2021-09-21 19:27:22,938 DEV : loss 0.15898281335830688 - score 0.0\n",
+      "2021-09-21 19:27:22,941 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:18,649 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:18,713 epoch 5 - iter 1/7 - loss 0.61147350 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,760 epoch 5 - iter 2/7 - loss 0.44151519 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,806 epoch 5 - iter 3/7 - loss 0.34759180 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,854 epoch 5 - iter 4/7 - loss 0.39774154 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,901 epoch 5 - iter 5/7 - loss 0.40902704 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,948 epoch 5 - iter 6/7 - loss 0.41219003 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,995 epoch 5 - iter 7/7 - loss 0.44288913 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:18,996 EPOCH 5 done: loss 0.4429 - lr 0.0200000\n",
-      "2021-09-08 10:58:21,423 DEV : loss 0.31498581171035767 - score 0.0\n",
-      "2021-09-08 10:58:21,424 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:21,778 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:21,854 epoch 6 - iter 1/7 - loss 0.26712567 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 10:58:21,910 epoch 6 - iter 2/7 - loss 0.37190448 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 10:58:21,962 epoch 6 - iter 3/7 - loss 0.39049565 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 10:58:22,009 epoch 6 - iter 4/7 - loss 0.30606608 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 10:58:22,062 epoch 6 - iter 5/7 - loss 0.35164723 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 10:58:22,114 epoch 6 - iter 6/7 - loss 0.35655789 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 10:58:22,165 epoch 6 - iter 7/7 - loss 0.34839247 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 10:58:22,166 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:22,166 EPOCH 6 done: loss 0.3484 - lr 0.0200000\n",
-      "2021-09-08 10:58:22,196 DEV : loss 0.168984055519104 - score 0.0\n",
-      "2021-09-08 10:58:22,197 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:27:26,745 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:26,875 epoch 5 - iter 1/7 - loss 0.56248981 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 19:27:26,957 epoch 5 - iter 2/7 - loss 0.39980441 - samples/sec: 12.18 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,036 epoch 5 - iter 3/7 - loss 0.31178100 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,120 epoch 5 - iter 4/7 - loss 0.42480375 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,195 epoch 5 - iter 5/7 - loss 0.47009340 - samples/sec: 13.42 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,297 epoch 5 - iter 6/7 - loss 0.51495864 - samples/sec: 9.84 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,371 epoch 5 - iter 7/7 - loss 0.50855843 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:27,372 EPOCH 5 done: loss 0.5086 - lr 0.0200000\n",
+      "2021-09-21 19:27:27,463 DEV : loss 0.4222409725189209 - score 0.0\n",
+      "2021-09-21 19:27:27,466 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:27:27,468 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:27,602 epoch 6 - iter 1/7 - loss 0.34558830 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,684 epoch 6 - iter 2/7 - loss 0.46355094 - samples/sec: 12.27 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,761 epoch 6 - iter 3/7 - loss 0.53625259 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,835 epoch 6 - iter 4/7 - loss 0.48668846 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 19:27:27,913 epoch 6 - iter 5/7 - loss 0.48905509 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 19:27:28,010 epoch 6 - iter 6/7 - loss 0.43307793 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 19:27:28,104 epoch 6 - iter 7/7 - loss 0.56609525 - samples/sec: 10.71 - lr: 0.020000\n",
+      "2021-09-21 19:27:28,105 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:28,105 EPOCH 6 done: loss 0.5661 - lr 0.0200000\n",
+      "2021-09-21 19:27:28,165 DEV : loss 0.11463379859924316 - score 0.0\n",
+      "2021-09-21 19:27:28,168 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:27,561 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:27,635 epoch 7 - iter 1/7 - loss 0.38853157 - samples/sec: 18.60 - lr: 0.020000\n"
+      "2021-09-21 19:27:32,154 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:32,302 epoch 7 - iter 1/7 - loss 0.34187293 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 19:27:32,405 epoch 7 - iter 2/7 - loss 0.42650589 - samples/sec: 9.79 - lr: 0.020000\n",
+      "2021-09-21 19:27:32,464 epoch 7 - iter 3/7 - loss 0.41658317 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 19:27:32,525 epoch 7 - iter 4/7 - loss 0.33577294 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 19:27:32,612 epoch 7 - iter 5/7 - loss 0.31581307 - samples/sec: 11.69 - lr: 0.020000\n",
+      "2021-09-21 19:27:32,689 epoch 7 - iter 6/7 - loss 0.32646240 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 19:27:32,782 epoch 7 - iter 7/7 - loss 0.36581467 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 19:27:32,783 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:32,783 EPOCH 7 done: loss 0.3658 - lr 0.0200000\n",
+      "2021-09-21 19:27:32,833 DEV : loss 0.4965215027332306 - score 0.0\n",
+      "2021-09-21 19:27:32,838 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:27:32,839 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:32,962 epoch 8 - iter 1/7 - loss 0.28681889 - samples/sec: 13.76 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,038 epoch 8 - iter 2/7 - loss 0.44818179 - samples/sec: 13.26 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:27,688 epoch 7 - iter 2/7 - loss 0.36514415 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 10:58:27,739 epoch 7 - iter 3/7 - loss 0.34148693 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 10:58:27,791 epoch 7 - iter 4/7 - loss 0.37643661 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 10:58:27,840 epoch 7 - iter 5/7 - loss 0.32160269 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 10:58:27,892 epoch 7 - iter 6/7 - loss 0.43001217 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 10:58:27,945 epoch 7 - iter 7/7 - loss 0.39350460 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 10:58:27,946 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:27,946 EPOCH 7 done: loss 0.3935 - lr 0.0200000\n",
-      "2021-09-08 10:58:28,557 DEV : loss 0.2186160534620285 - score 0.0\n",
-      "2021-09-08 10:58:28,558 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:28,560 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:28,623 epoch 8 - iter 1/7 - loss 0.03958773 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 10:58:28,677 epoch 8 - iter 2/7 - loss 0.21832780 - samples/sec: 18.49 - lr: 0.020000\n",
-      "2021-09-08 10:58:28,724 epoch 8 - iter 3/7 - loss 0.17230028 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 10:58:28,775 epoch 8 - iter 4/7 - loss 0.19950713 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 10:58:28,825 epoch 8 - iter 5/7 - loss 0.19567190 - samples/sec: 20.12 - lr: 0.020000\n",
-      "2021-09-08 10:58:28,878 epoch 8 - iter 6/7 - loss 0.28015976 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 10:58:28,928 epoch 8 - iter 7/7 - loss 0.27839008 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 10:58:28,929 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:28,929 EPOCH 8 done: loss 0.2784 - lr 0.0200000\n",
-      "2021-09-08 10:58:28,963 DEV : loss 0.1375834345817566 - score 0.0\n",
-      "2021-09-08 10:58:28,964 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:58:33,021 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:33,081 epoch 9 - iter 1/7 - loss 0.02583387 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,128 epoch 9 - iter 2/7 - loss 0.12566727 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,175 epoch 9 - iter 3/7 - loss 0.59878118 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,221 epoch 9 - iter 4/7 - loss 0.51531571 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,265 epoch 9 - iter 5/7 - loss 0.42854730 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,311 epoch 9 - iter 6/7 - loss 0.38548878 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,361 epoch 9 - iter 7/7 - loss 0.37214066 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,362 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:33,362 EPOCH 9 done: loss 0.3721 - lr 0.0200000\n",
-      "2021-09-08 10:58:33,391 DEV : loss 0.1571541428565979 - score 0.0\n",
-      "2021-09-08 10:58:33,391 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:33,393 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:33,453 epoch 10 - iter 1/7 - loss 0.11793733 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,499 epoch 10 - iter 2/7 - loss 0.32033631 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,547 epoch 10 - iter 3/7 - loss 0.27185835 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,591 epoch 10 - iter 4/7 - loss 0.22213122 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,636 epoch 10 - iter 5/7 - loss 0.18975638 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,680 epoch 10 - iter 6/7 - loss 0.17048980 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,723 epoch 10 - iter 7/7 - loss 0.15078687 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 10:58:33,724 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:33,725 EPOCH 10 done: loss 0.1508 - lr 0.0200000\n",
-      "2021-09-08 10:58:33,752 DEV : loss 0.302469402551651 - score 0.0\n",
-      "2021-09-08 10:58:33,753 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:39,020 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:39,021 Testing using best model ...\n",
-      "2021-09-08 10:58:39,051 loading file None/best-model.pt\n",
+      "2021-09-21 19:27:33,125 epoch 8 - iter 3/7 - loss 0.56458450 - samples/sec: 11.54 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,205 epoch 8 - iter 4/7 - loss 0.61332961 - samples/sec: 12.66 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,295 epoch 8 - iter 5/7 - loss 0.56523499 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,395 epoch 8 - iter 6/7 - loss 0.51457770 - samples/sec: 9.99 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,461 epoch 8 - iter 7/7 - loss 0.44885586 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,462 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:33,462 EPOCH 8 done: loss 0.4489 - lr 0.0200000\n",
+      "2021-09-21 19:27:33,537 DEV : loss 0.20464102923870087 - score 0.0\n",
+      "2021-09-21 19:27:33,538 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:27:33,540 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:33,688 epoch 9 - iter 1/7 - loss 0.23685138 - samples/sec: 12.49 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,770 epoch 9 - iter 2/7 - loss 0.13085271 - samples/sec: 12.31 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,879 epoch 9 - iter 3/7 - loss 0.11516496 - samples/sec: 9.21 - lr: 0.020000\n",
+      "2021-09-21 19:27:33,951 epoch 9 - iter 4/7 - loss 0.16672134 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,028 epoch 9 - iter 5/7 - loss 0.26479920 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,112 epoch 9 - iter 6/7 - loss 0.24294938 - samples/sec: 11.95 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,186 epoch 9 - iter 7/7 - loss 0.24714676 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,187 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:34,187 EPOCH 9 done: loss 0.2471 - lr 0.0200000\n",
+      "2021-09-21 19:27:34,266 DEV : loss 0.21226227283477783 - score 0.0\n",
+      "2021-09-21 19:27:34,267 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:27:34,269 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:34,397 epoch 10 - iter 1/7 - loss 0.16254868 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,489 epoch 10 - iter 2/7 - loss 0.11854818 - samples/sec: 10.94 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,566 epoch 10 - iter 3/7 - loss 0.24203846 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,647 epoch 10 - iter 4/7 - loss 0.25762276 - samples/sec: 12.46 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,732 epoch 10 - iter 5/7 - loss 0.26750149 - samples/sec: 11.77 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,828 epoch 10 - iter 6/7 - loss 0.35844698 - samples/sec: 10.49 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,913 epoch 10 - iter 7/7 - loss 0.34067145 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 19:27:34,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:34,914 EPOCH 10 done: loss 0.3407 - lr 0.0200000\n",
+      "2021-09-21 19:27:34,959 DEV : loss 0.13235856592655182 - score 0.0\n",
+      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:27:34,961 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:27:38,865 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:27:38,866 Testing using best model ...\n",
+      "2021-09-21 19:27:38,867 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:58:44,652 \t0.0\n",
-      "2021-09-08 10:58:44,652 \n",
+      "2021-09-21 19:27:44,159 \t0.0\n",
+      "2021-09-21 19:27:44,159 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -3804,53 +3822,40 @@
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
       "this text expresses surprise     0.0000    0.0000    0.0000         0\n",
       "    this text expresses love     0.0000    0.0000    0.0000         0\n",
       " this text expresses disgust     0.0000    0.0000    0.0000         0\n",
-      "    this text expresses fear     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
       "   this text expresses guilt     0.0000    0.0000    0.0000         0\n",
       "   this text expresses shame     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                   micro avg     0.0000    0.0000    0.0000         1\n",
       "                   macro avg     0.0000    0.0000    0.0000         1\n",
       "                weighted avg     0.0000    0.0000    0.0000         1\n",
       "                 samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 10:58:44,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:31,453 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:27:44,160 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:30,553 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:59:36,503 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 28851.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:59:36,505 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses anger', b'this text expresses shame', b'this text expresses sadness', b'this text expresses fear']\n"
+      "2021-09-21 19:29:34,750 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 8/8 [00:00<00:00, 32608.78it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:36,711 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:36,712 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:29:34,753 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses fear', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness', b'this text expresses disgust']\n",
+      "2021-09-21 19:29:34,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:34,763 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4163,159 +4168,175 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:36,713 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:36,713 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 10:59:36,714 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:36,714 Parameters:\n",
-      "2021-09-08 10:59:36,714  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:59:36,714  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:59:36,715  - patience: \"3\"\n",
-      "2021-09-08 10:59:36,715  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:59:36,715  - max_epochs: \"10\"\n",
-      "2021-09-08 10:59:36,716  - shuffle: \"True\"\n",
-      "2021-09-08 10:59:36,716  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:59:36,716  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:59:36,716 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:36,717 Model training base path: \"None\"\n",
-      "2021-09-08 10:59:36,717 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:36,717 Device: cuda:1\n",
-      "2021-09-08 10:59:36,718 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:36,718 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:59:36,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:37,048 epoch 1 - iter 1/7 - loss 0.14852200 - samples/sec: 27.09 - lr: 0.020000\n",
-      "2021-09-08 10:59:37,095 epoch 1 - iter 2/7 - loss 0.41074737 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 10:59:37,142 epoch 1 - iter 3/7 - loss 0.74068794 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:59:37,189 epoch 1 - iter 4/7 - loss 0.71690254 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 10:59:37,237 epoch 1 - iter 5/7 - loss 0.75429353 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 10:59:37,283 epoch 1 - iter 6/7 - loss 0.79206315 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:59:37,331 epoch 1 - iter 7/7 - loss 0.77024117 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 10:59:37,332 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:37,332 EPOCH 1 done: loss 0.7702 - lr 0.0200000\n",
-      "2021-09-08 10:59:37,652 DEV : loss 0.6044533252716064 - score 0.0\n",
-      "2021-09-08 10:59:37,653 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:29:34,764 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:34,764 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:29:34,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:34,765 Parameters:\n",
+      "2021-09-21 19:29:34,766  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:29:34,766  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:29:34,767  - patience: \"3\"\n",
+      "2021-09-21 19:29:34,767  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:29:34,768  - max_epochs: \"10\"\n",
+      "2021-09-21 19:29:34,768  - shuffle: \"True\"\n",
+      "2021-09-21 19:29:34,768  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:29:34,769  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:29:34,769 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:34,770 Model training base path: \"None\"\n",
+      "2021-09-21 19:29:34,770 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:34,771 Device: cuda:0\n",
+      "2021-09-21 19:29:34,771 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:34,772 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:29:34,779 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:34,883 epoch 1 - iter 1/7 - loss 0.30280837 - samples/sec: 15.82 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:29:34,958 epoch 1 - iter 2/7 - loss 0.45822336 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 19:29:35,022 epoch 1 - iter 3/7 - loss 0.52411041 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 19:29:35,085 epoch 1 - iter 4/7 - loss 0.60550048 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 19:29:35,146 epoch 1 - iter 5/7 - loss 0.56798980 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 19:29:35,212 epoch 1 - iter 6/7 - loss 0.58930935 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 19:29:35,281 epoch 1 - iter 7/7 - loss 0.57624903 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 19:29:35,282 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:35,283 EPOCH 1 done: loss 0.5762 - lr 0.0200000\n",
+      "2021-09-21 19:29:35,335 DEV : loss 0.5891553163528442 - score 0.0\n",
+      "2021-09-21 19:29:35,336 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:29:39,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:39,584 epoch 2 - iter 1/7 - loss 0.68512440 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 19:29:39,640 epoch 2 - iter 2/7 - loss 0.65457526 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 19:29:39,694 epoch 2 - iter 3/7 - loss 0.64311282 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 19:29:39,756 epoch 2 - iter 4/7 - loss 0.61824577 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 19:29:39,824 epoch 2 - iter 5/7 - loss 0.63163261 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 19:29:39,902 epoch 2 - iter 6/7 - loss 0.63992915 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 19:29:39,956 epoch 2 - iter 7/7 - loss 0.65814313 - samples/sec: 18.80 - lr: 0.020000\n",
+      "2021-09-21 19:29:39,957 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:39,957 EPOCH 2 done: loss 0.6581 - lr 0.0200000\n",
+      "2021-09-21 19:29:40,005 DEV : loss 0.5404208302497864 - score 0.0\n",
+      "2021-09-21 19:29:40,008 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:29:43,952 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:44,088 epoch 3 - iter 1/7 - loss 0.52440983 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 19:29:44,145 epoch 3 - iter 2/7 - loss 0.58965486 - samples/sec: 17.75 - lr: 0.020000\n",
+      "2021-09-21 19:29:44,208 epoch 3 - iter 3/7 - loss 0.61009787 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 19:29:44,279 epoch 3 - iter 4/7 - loss 0.61598369 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 19:29:44,353 epoch 3 - iter 5/7 - loss 0.65985992 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 19:29:44,409 epoch 3 - iter 6/7 - loss 0.65783561 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 19:29:44,465 epoch 3 - iter 7/7 - loss 0.68022889 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 19:29:44,466 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:44,467 EPOCH 3 done: loss 0.6802 - lr 0.0200000\n",
+      "2021-09-21 19:29:44,526 DEV : loss 0.39725929498672485 - score 0.0\n",
+      "2021-09-21 19:29:44,529 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:59:43,802 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:43,874 epoch 2 - iter 1/7 - loss 0.45016980 - samples/sec: 19.07 - lr: 0.020000\n",
-      "2021-09-08 10:59:43,927 epoch 2 - iter 2/7 - loss 0.51921540 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 10:59:43,977 epoch 2 - iter 3/7 - loss 0.54214068 - samples/sec: 20.45 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,025 epoch 2 - iter 4/7 - loss 0.46787795 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,076 epoch 2 - iter 5/7 - loss 0.54649378 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,133 epoch 2 - iter 6/7 - loss 0.60579594 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,185 epoch 2 - iter 7/7 - loss 0.56632338 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,186 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:44,186 EPOCH 2 done: loss 0.5663 - lr 0.0200000\n",
-      "2021-09-08 10:59:44,221 DEV : loss 0.7247359752655029 - score 0.0\n",
-      "2021-09-08 10:59:44,221 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:44,223 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:44,293 epoch 3 - iter 1/7 - loss 0.13372444 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,343 epoch 3 - iter 2/7 - loss 0.29303036 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,394 epoch 3 - iter 3/7 - loss 0.27748253 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,448 epoch 3 - iter 4/7 - loss 0.33110132 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,502 epoch 3 - iter 5/7 - loss 0.47004310 - samples/sec: 18.62 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,550 epoch 3 - iter 6/7 - loss 0.47918485 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,597 epoch 3 - iter 7/7 - loss 0.45811621 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,598 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:44,598 EPOCH 3 done: loss 0.4581 - lr 0.0200000\n",
-      "2021-09-08 10:59:44,625 DEV : loss 1.0127959251403809 - score 0.0\n",
-      "2021-09-08 10:59:44,626 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:44,628 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:44,688 epoch 4 - iter 1/7 - loss 0.54112065 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,736 epoch 4 - iter 2/7 - loss 0.60950834 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,784 epoch 4 - iter 3/7 - loss 0.66722927 - samples/sec: 20.82 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,836 epoch 4 - iter 4/7 - loss 0.65869960 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,892 epoch 4 - iter 5/7 - loss 0.58727428 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,943 epoch 4 - iter 6/7 - loss 0.62566974 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,994 epoch 4 - iter 7/7 - loss 0.60799662 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 10:59:44,995 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:44,996 EPOCH 4 done: loss 0.6080 - lr 0.0200000\n",
-      "2021-09-08 10:59:45,136 DEV : loss 0.4626125693321228 - score 0.0\n",
-      "2021-09-08 10:59:45,136 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:29:48,662 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:48,766 epoch 4 - iter 1/7 - loss 0.60210657 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 19:29:48,835 epoch 4 - iter 2/7 - loss 0.59402966 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 19:29:48,892 epoch 4 - iter 3/7 - loss 0.60954601 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 19:29:48,965 epoch 4 - iter 4/7 - loss 0.61000076 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 19:29:49,026 epoch 4 - iter 5/7 - loss 0.61983683 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 19:29:49,088 epoch 4 - iter 6/7 - loss 0.61885258 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 19:29:49,162 epoch 4 - iter 7/7 - loss 0.62744795 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 19:29:49,163 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:49,163 EPOCH 4 done: loss 0.6274 - lr 0.0200000\n",
+      "2021-09-21 19:29:49,216 DEV : loss 0.37799912691116333 - score 0.0\n",
+      "2021-09-21 19:29:49,218 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:59:51,451 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:51,514 epoch 5 - iter 1/7 - loss 0.61162841 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,565 epoch 5 - iter 2/7 - loss 0.64619452 - samples/sec: 19.80 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,617 epoch 5 - iter 3/7 - loss 0.71263764 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,669 epoch 5 - iter 4/7 - loss 0.69784163 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,720 epoch 5 - iter 5/7 - loss 0.64183160 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,771 epoch 5 - iter 6/7 - loss 0.61197326 - samples/sec: 20.06 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,817 epoch 5 - iter 7/7 - loss 0.54507425 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,818 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:51,819 EPOCH 5 done: loss 0.5451 - lr 0.0200000\n",
-      "2021-09-08 10:59:51,848 DEV : loss 0.9640911221504211 - score 0.0\n",
-      "2021-09-08 10:59:51,848 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:51,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:51,913 epoch 6 - iter 1/7 - loss 0.12663628 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 10:59:51,964 epoch 6 - iter 2/7 - loss 0.33709494 - samples/sec: 20.07 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,015 epoch 6 - iter 3/7 - loss 0.40742179 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,069 epoch 6 - iter 4/7 - loss 0.39306983 - samples/sec: 18.70 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,120 epoch 6 - iter 5/7 - loss 0.42675327 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,170 epoch 6 - iter 6/7 - loss 0.37609831 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,214 epoch 6 - iter 7/7 - loss 0.34652348 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,215 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:52,216 EPOCH 6 done: loss 0.3465 - lr 0.0200000\n",
-      "2021-09-08 10:59:52,351 DEV : loss 0.5900277495384216 - score 0.0\n",
-      "2021-09-08 10:59:52,351 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:52,442 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:52,503 epoch 7 - iter 1/7 - loss 0.17236416 - samples/sec: 21.52 - lr: 0.020000\n"
+      "2021-09-21 19:29:59,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:59,161 epoch 5 - iter 1/7 - loss 0.56447655 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,225 epoch 5 - iter 2/7 - loss 0.58309984 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,279 epoch 5 - iter 3/7 - loss 0.60387329 - samples/sec: 18.88 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,342 epoch 5 - iter 4/7 - loss 0.63549724 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,406 epoch 5 - iter 5/7 - loss 0.61930717 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,466 epoch 5 - iter 6/7 - loss 0.61546729 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,521 epoch 5 - iter 7/7 - loss 0.62383412 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,522 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:59,522 EPOCH 5 done: loss 0.6238 - lr 0.0200000\n",
+      "2021-09-21 19:29:59,598 DEV : loss 0.468666136264801 - score 0.0\n",
+      "2021-09-21 19:29:59,601 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:29:59,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:29:59,690 epoch 6 - iter 1/7 - loss 0.67311019 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,755 epoch 6 - iter 2/7 - loss 0.65519691 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,805 epoch 6 - iter 3/7 - loss 0.66281954 - samples/sec: 20.26 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,865 epoch 6 - iter 4/7 - loss 0.70923074 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,928 epoch 6 - iter 5/7 - loss 0.73694160 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 19:29:59,987 epoch 6 - iter 6/7 - loss 0.72306956 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 19:30:00,041 epoch 6 - iter 7/7 - loss 0.70597858 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 19:30:00,042 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:00,042 EPOCH 6 done: loss 0.7060 - lr 0.0200000\n",
+      "2021-09-21 19:30:00,079 DEV : loss 0.3642338514328003 - score 0.0\n",
+      "2021-09-21 19:30:00,082 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:30:04,201 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:04,317 epoch 7 - iter 1/7 - loss 0.74616551 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,369 epoch 7 - iter 2/7 - loss 0.71022260 - samples/sec: 19.23 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,442 epoch 7 - iter 3/7 - loss 0.66065532 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,500 epoch 7 - iter 4/7 - loss 0.66100164 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,569 epoch 7 - iter 5/7 - loss 0.64623426 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,633 epoch 7 - iter 6/7 - loss 0.63767045 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,691 epoch 7 - iter 7/7 - loss 0.64060460 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,692 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:04,693 EPOCH 7 done: loss 0.6406 - lr 0.0200000\n",
+      "2021-09-21 19:30:04,751 DEV : loss 0.41591086983680725 - score 0.0\n",
+      "2021-09-21 19:30:04,753 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:30:04,755 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:04,855 epoch 8 - iter 1/7 - loss 0.52642053 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,914 epoch 8 - iter 2/7 - loss 0.68740284 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 19:30:04,970 epoch 8 - iter 3/7 - loss 0.72250158 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,041 epoch 8 - iter 4/7 - loss 0.68171883 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,100 epoch 8 - iter 5/7 - loss 0.67106953 - samples/sec: 16.92 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:52,550 epoch 7 - iter 2/7 - loss 0.16783511 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,597 epoch 7 - iter 3/7 - loss 0.30854433 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,644 epoch 7 - iter 4/7 - loss 0.33433518 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,691 epoch 7 - iter 5/7 - loss 0.35097190 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,735 epoch 7 - iter 6/7 - loss 0.29645605 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,782 epoch 7 - iter 7/7 - loss 0.33693183 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 10:59:52,783 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:52,783 EPOCH 7 done: loss 0.3369 - lr 0.0200000\n",
-      "2021-09-08 10:59:52,921 DEV : loss 0.8862733244895935 - score 0.0\n",
-      "2021-09-08 10:59:52,921 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:59:52,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:53,054 epoch 8 - iter 1/7 - loss 0.24230219 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 10:59:53,101 epoch 8 - iter 2/7 - loss 0.25661808 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:59:53,148 epoch 8 - iter 3/7 - loss 0.21861899 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 10:59:53,193 epoch 8 - iter 4/7 - loss 0.19828997 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 10:59:53,242 epoch 8 - iter 5/7 - loss 0.23207305 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 10:59:53,289 epoch 8 - iter 6/7 - loss 0.25874110 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 10:59:53,336 epoch 8 - iter 7/7 - loss 0.27899848 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 10:59:53,337 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:53,338 EPOCH 8 done: loss 0.2790 - lr 0.0200000\n",
-      "2021-09-08 10:59:53,470 DEV : loss 1.1928966045379639 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:59:53,471 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:59:53,555 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:53,614 epoch 9 - iter 1/7 - loss 0.11405942 - samples/sec: 21.69 - lr: 0.010000\n",
-      "2021-09-08 10:59:53,658 epoch 9 - iter 2/7 - loss 0.09884948 - samples/sec: 23.11 - lr: 0.010000\n",
-      "2021-09-08 10:59:53,705 epoch 9 - iter 3/7 - loss 0.26730673 - samples/sec: 21.34 - lr: 0.010000\n",
-      "2021-09-08 10:59:53,752 epoch 9 - iter 4/7 - loss 0.30550575 - samples/sec: 21.67 - lr: 0.010000\n",
-      "2021-09-08 10:59:53,796 epoch 9 - iter 5/7 - loss 0.25386595 - samples/sec: 23.10 - lr: 0.010000\n",
-      "2021-09-08 10:59:53,843 epoch 9 - iter 6/7 - loss 0.23148905 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 10:59:53,890 epoch 9 - iter 7/7 - loss 0.26177556 - samples/sec: 21.65 - lr: 0.010000\n",
-      "2021-09-08 10:59:53,891 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:53,892 EPOCH 9 done: loss 0.2618 - lr 0.0100000\n",
-      "2021-09-08 10:59:54,027 DEV : loss 0.7190810441970825 - score 0.0\n",
-      "2021-09-08 10:59:54,027 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:54,123 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:54,183 epoch 10 - iter 1/7 - loss 0.14886014 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 10:59:54,230 epoch 10 - iter 2/7 - loss 0.26887401 - samples/sec: 21.57 - lr: 0.010000\n",
-      "2021-09-08 10:59:54,277 epoch 10 - iter 3/7 - loss 0.32329321 - samples/sec: 21.64 - lr: 0.010000\n",
-      "2021-09-08 10:59:54,324 epoch 10 - iter 4/7 - loss 0.36726519 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 10:59:54,371 epoch 10 - iter 5/7 - loss 0.33792516 - samples/sec: 21.56 - lr: 0.010000\n",
-      "2021-09-08 10:59:54,417 epoch 10 - iter 6/7 - loss 0.38714866 - samples/sec: 21.61 - lr: 0.010000\n",
-      "2021-09-08 10:59:54,463 epoch 10 - iter 7/7 - loss 0.33476441 - samples/sec: 21.94 - lr: 0.010000\n",
-      "2021-09-08 10:59:54,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:54,465 EPOCH 10 done: loss 0.3348 - lr 0.0100000\n",
-      "2021-09-08 10:59:54,616 DEV : loss 0.940350353717804 - score 0.0\n",
-      "2021-09-08 10:59:54,617 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:03,369 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,370 Testing using best model ...\n",
-      "2021-09-08 11:00:03,372 loading file None/best-model.pt\n",
+      "2021-09-21 19:30:05,159 epoch 8 - iter 6/7 - loss 0.66665809 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,221 epoch 8 - iter 7/7 - loss 0.66136224 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:05,222 EPOCH 8 done: loss 0.6614 - lr 0.0200000\n",
+      "2021-09-21 19:30:05,276 DEV : loss 0.37477895617485046 - score 0.0\n",
+      "2021-09-21 19:30:05,281 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:30:05,282 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:05,367 epoch 9 - iter 1/7 - loss 0.69928962 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,430 epoch 9 - iter 2/7 - loss 0.66864765 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,486 epoch 9 - iter 3/7 - loss 0.63911023 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,554 epoch 9 - iter 4/7 - loss 0.62922700 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,625 epoch 9 - iter 5/7 - loss 0.59516806 - samples/sec: 14.13 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,684 epoch 9 - iter 6/7 - loss 0.60271405 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,750 epoch 9 - iter 7/7 - loss 0.64646216 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 19:30:05,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:05,751 EPOCH 9 done: loss 0.6465 - lr 0.0200000\n",
+      "2021-09-21 19:30:05,797 DEV : loss 0.21411047875881195 - score 0.0\n",
+      "2021-09-21 19:30:05,801 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:30:09,454 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:09,546 epoch 10 - iter 1/7 - loss 0.70723963 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 19:30:09,606 epoch 10 - iter 2/7 - loss 0.57993144 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 19:30:09,665 epoch 10 - iter 3/7 - loss 0.59227069 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 19:30:09,726 epoch 10 - iter 4/7 - loss 0.63437648 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 19:30:09,782 epoch 10 - iter 5/7 - loss 0.62035296 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 19:30:09,845 epoch 10 - iter 6/7 - loss 0.62684114 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 19:30:09,904 epoch 10 - iter 7/7 - loss 0.62568254 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 19:30:09,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:09,906 EPOCH 10 done: loss 0.6257 - lr 0.0200000\n",
+      "2021-09-21 19:30:09,938 DEV : loss 0.37981557846069336 - score 0.0\n",
+      "2021-09-21 19:30:09,940 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:30:14,064 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:30:14,065 Testing using best model ...\n",
+      "2021-09-21 19:30:14,066 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:00:08,754 \t0.0\n",
-      "2021-09-08 11:00:08,754 \n",
+      "2021-09-21 19:30:19,327 \t0.0\n",
+      "2021-09-21 19:30:19,328 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -4327,37 +4348,37 @@
       "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
       "this text expresses surprise     0.0000    0.0000    0.0000         0\n",
       "    this text expresses love     0.0000    0.0000    0.0000         0\n",
-      " this text expresses disgust     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
+      "    this text expresses fear     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses guilt     0.0000    0.0000    0.0000         0\n",
       "   this text expresses shame     0.0000    0.0000    0.0000         0\n",
       " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
-      "    this text expresses fear     0.0000    0.0000    0.0000         1\n",
+      " this text expresses disgust     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                   micro avg     0.0000    0.0000    0.0000         1\n",
       "                   macro avg     0.0000    0.0000    0.0000         1\n",
       "                weighted avg     0.0000    0.0000    0.0000         1\n",
       "                 samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:00:08,755 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:54,198 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:30:19,328 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:46,156 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:00:58,253 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:31:50,518 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 31714.96it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 33723.05it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:58,255 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses guilt', b'this text expresses sadness', b'this text expresses shame']\n",
-      "2021-09-08 11:00:58,266 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,268 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:31:50,520 [b'this text expresses joy', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses shame', b'this text expresses sadness', b'this text expresses love']\n",
+      "2021-09-21 19:31:50,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:50,531 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4670,27 +4691,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:58,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,269 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:00:58,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,269 Parameters:\n",
-      "2021-09-08 11:00:58,270  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:00:58,270  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:00:58,270  - patience: \"3\"\n",
-      "2021-09-08 11:00:58,271  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:00:58,271  - max_epochs: \"10\"\n",
-      "2021-09-08 11:00:58,271  - shuffle: \"True\"\n",
-      "2021-09-08 11:00:58,271  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:00:58,272  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:00:58,272 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,272 Model training base path: \"None\"\n",
-      "2021-09-08 11:00:58,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,273 Device: cuda:1\n",
-      "2021-09-08 11:00:58,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,273 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:00:58,283 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,360 epoch 1 - iter 1/7 - loss 0.46775627 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,427 epoch 1 - iter 2/7 - loss 0.61557862 - samples/sec: 15.21 - lr: 0.020000\n"
+      "2021-09-21 19:31:50,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:50,531 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:31:50,532 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:50,532 Parameters:\n",
+      "2021-09-21 19:31:50,532  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:31:50,532  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:31:50,533  - patience: \"3\"\n",
+      "2021-09-21 19:31:50,533  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:31:50,533  - max_epochs: \"10\"\n",
+      "2021-09-21 19:31:50,534  - shuffle: \"True\"\n",
+      "2021-09-21 19:31:50,534  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:31:50,534  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:31:50,534 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:50,535 Model training base path: \"None\"\n",
+      "2021-09-21 19:31:50,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:50,535 Device: cuda:0\n",
+      "2021-09-21 19:31:50,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:50,536 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:31:50,542 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:50,637 epoch 1 - iter 1/7 - loss 0.80792618 - samples/sec: 14.31 - lr: 0.020000\n"
      ]
     },
     {
@@ -4704,182 +4724,181 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:58,484 epoch 1 - iter 3/7 - loss 0.79119410 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,545 epoch 1 - iter 4/7 - loss 0.77254395 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,610 epoch 1 - iter 5/7 - loss 0.74106854 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,675 epoch 1 - iter 6/7 - loss 0.75559851 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,738 epoch 1 - iter 7/7 - loss 0.73645569 - samples/sec: 15.84 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,739 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,740 EPOCH 1 done: loss 0.7365 - lr 0.0200000\n",
-      "2021-09-08 11:00:58,784 DEV : loss 0.40869590640068054 - score 0.0\n",
-      "2021-09-08 11:00:58,784 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:01:10,663 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:10,738 epoch 2 - iter 1/7 - loss 0.43662050 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 11:01:10,794 epoch 2 - iter 2/7 - loss 0.66232340 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 11:01:10,850 epoch 2 - iter 3/7 - loss 0.65782820 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 11:01:10,905 epoch 2 - iter 4/7 - loss 0.59569061 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 11:01:10,961 epoch 2 - iter 5/7 - loss 0.56632927 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 11:01:11,016 epoch 2 - iter 6/7 - loss 0.56791925 - samples/sec: 18.33 - lr: 0.020000\n",
-      "2021-09-08 11:01:11,073 epoch 2 - iter 7/7 - loss 0.63863291 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 11:01:11,074 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:11,074 EPOCH 2 done: loss 0.6386 - lr 0.0200000\n",
-      "2021-09-08 11:01:12,584 DEV : loss 0.534812867641449 - score 0.0\n",
-      "2021-09-08 11:01:12,584 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:12,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:12,689 epoch 3 - iter 1/7 - loss 0.49171874 - samples/sec: 16.36 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,747 epoch 3 - iter 2/7 - loss 0.50269316 - samples/sec: 17.32 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,805 epoch 3 - iter 3/7 - loss 0.55100623 - samples/sec: 17.35 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,865 epoch 3 - iter 4/7 - loss 0.56528784 - samples/sec: 16.70 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,924 epoch 3 - iter 5/7 - loss 0.57918240 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,984 epoch 3 - iter 6/7 - loss 0.54223366 - samples/sec: 16.79 - lr: 0.020000\n",
-      "2021-09-08 11:01:13,043 epoch 3 - iter 7/7 - loss 0.54961392 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 11:01:13,044 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,045 EPOCH 3 done: loss 0.5496 - lr 0.0200000\n",
-      "2021-09-08 11:01:13,084 DEV : loss 0.2831801176071167 - score 0.0\n",
-      "2021-09-08 11:01:13,085 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:01:20,387 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:20,450 epoch 4 - iter 1/7 - loss 0.49715748 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 11:01:20,498 epoch 4 - iter 2/7 - loss 0.34665538 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:01:20,545 epoch 4 - iter 3/7 - loss 0.38767468 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 11:01:20,591 epoch 4 - iter 4/7 - loss 0.41716523 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 11:01:20,639 epoch 4 - iter 5/7 - loss 0.43268410 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:01:20,686 epoch 4 - iter 6/7 - loss 0.48691706 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 11:01:20,734 epoch 4 - iter 7/7 - loss 0.49138346 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:01:20,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:20,735 EPOCH 4 done: loss 0.4914 - lr 0.0200000\n",
-      "2021-09-08 11:01:24,810 DEV : loss 0.1815166026353836 - score 0.0\n",
-      "2021-09-08 11:01:24,810 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:31:50,722 epoch 1 - iter 2/7 - loss 0.80566648 - samples/sec: 11.86 - lr: 0.020000\n",
+      "2021-09-21 19:31:50,803 epoch 1 - iter 3/7 - loss 0.83408403 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 19:31:50,871 epoch 1 - iter 4/7 - loss 0.72752693 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 19:31:50,941 epoch 1 - iter 5/7 - loss 0.71650079 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 19:31:51,013 epoch 1 - iter 6/7 - loss 0.70185657 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 19:31:51,099 epoch 1 - iter 7/7 - loss 0.72718798 - samples/sec: 11.68 - lr: 0.020000\n",
+      "2021-09-21 19:31:51,100 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:51,100 EPOCH 1 done: loss 0.7272 - lr 0.0200000\n",
+      "2021-09-21 19:31:51,241 DEV : loss 0.4609273374080658 - score 0.0\n",
+      "2021-09-21 19:31:51,242 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:01:32,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:32,367 epoch 5 - iter 1/7 - loss 0.16680382 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,415 epoch 5 - iter 2/7 - loss 0.19808155 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,462 epoch 5 - iter 3/7 - loss 0.32854433 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,509 epoch 5 - iter 4/7 - loss 0.30367975 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,557 epoch 5 - iter 5/7 - loss 0.32905002 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,603 epoch 5 - iter 6/7 - loss 0.41578972 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,651 epoch 5 - iter 7/7 - loss 0.43259121 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:32,652 EPOCH 5 done: loss 0.4326 - lr 0.0200000\n",
-      "2021-09-08 11:01:32,700 DEV : loss 0.30038192868232727 - score 0.0\n",
-      "2021-09-08 11:01:32,701 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:32,743 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:32,803 epoch 6 - iter 1/7 - loss 0.38060644 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,847 epoch 6 - iter 2/7 - loss 0.21714873 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,891 epoch 6 - iter 3/7 - loss 0.21635924 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,937 epoch 6 - iter 4/7 - loss 0.33096191 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 11:01:32,985 epoch 6 - iter 5/7 - loss 0.41073196 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 11:01:33,032 epoch 6 - iter 6/7 - loss 0.44462855 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:01:33,079 epoch 6 - iter 7/7 - loss 0.44989455 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:01:33,080 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:33,081 EPOCH 6 done: loss 0.4499 - lr 0.0200000\n",
-      "2021-09-08 11:01:33,122 DEV : loss 0.14434672892093658 - score 0.0\n",
-      "2021-09-08 11:01:33,122 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:31:55,463 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:55,572 epoch 2 - iter 1/7 - loss 0.49714479 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 19:31:55,654 epoch 2 - iter 2/7 - loss 0.58561452 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 19:31:55,733 epoch 2 - iter 3/7 - loss 0.57954638 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 19:31:55,790 epoch 2 - iter 4/7 - loss 0.59836676 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 19:31:55,864 epoch 2 - iter 5/7 - loss 0.61765035 - samples/sec: 13.55 - lr: 0.020000\n",
+      "2021-09-21 19:31:55,934 epoch 2 - iter 6/7 - loss 0.62593097 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,023 epoch 2 - iter 7/7 - loss 0.63382883 - samples/sec: 11.27 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:56,024 EPOCH 2 done: loss 0.6338 - lr 0.0200000\n",
+      "2021-09-21 19:31:56,110 DEV : loss 0.7316479682922363 - score 0.0\n",
+      "2021-09-21 19:31:56,111 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:31:56,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:56,271 epoch 3 - iter 1/7 - loss 0.25106058 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,356 epoch 3 - iter 2/7 - loss 0.43508150 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,434 epoch 3 - iter 3/7 - loss 0.49540884 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,514 epoch 3 - iter 4/7 - loss 0.53981883 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,589 epoch 3 - iter 5/7 - loss 0.54659608 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,678 epoch 3 - iter 6/7 - loss 0.55629128 - samples/sec: 11.38 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,740 epoch 3 - iter 7/7 - loss 0.56347629 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 19:31:56,741 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:56,741 EPOCH 3 done: loss 0.5635 - lr 0.0200000\n",
+      "2021-09-21 19:31:56,809 DEV : loss 0.9963215589523315 - score 0.0\n",
+      "2021-09-21 19:31:56,813 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:31:56,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:56,949 epoch 4 - iter 1/7 - loss 0.76778698 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 19:31:57,028 epoch 4 - iter 2/7 - loss 0.70783725 - samples/sec: 12.64 - lr: 0.020000\n",
+      "2021-09-21 19:31:57,096 epoch 4 - iter 3/7 - loss 0.73286442 - samples/sec: 14.93 - lr: 0.020000\n",
+      "2021-09-21 19:31:57,171 epoch 4 - iter 4/7 - loss 0.72102915 - samples/sec: 13.41 - lr: 0.020000\n",
+      "2021-09-21 19:31:57,248 epoch 4 - iter 5/7 - loss 0.72610124 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 19:31:57,302 epoch 4 - iter 6/7 - loss 0.67264484 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 19:31:57,358 epoch 4 - iter 7/7 - loss 0.68011137 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 19:31:57,359 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:31:57,360 EPOCH 4 done: loss 0.6801 - lr 0.0200000\n",
+      "2021-09-21 19:31:57,392 DEV : loss 0.175100177526474 - score 0.0\n",
+      "2021-09-21 19:31:57,393 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:01:37,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:37,944 epoch 7 - iter 1/7 - loss 0.26822481 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 11:01:37,991 epoch 7 - iter 2/7 - loss 0.56130128 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 11:01:38,038 epoch 7 - iter 3/7 - loss 0.45503703 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:01:38,084 epoch 7 - iter 4/7 - loss 0.37313463 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 11:01:38,132 epoch 7 - iter 5/7 - loss 0.40499460 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:01:38,178 epoch 7 - iter 6/7 - loss 0.37620124 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 11:01:38,226 epoch 7 - iter 7/7 - loss 0.37632233 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:01:38,226 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:38,227 EPOCH 7 done: loss 0.3763 - lr 0.0200000\n",
-      "2021-09-08 11:01:38,261 DEV : loss 0.04177432879805565 - score 0.0\n",
-      "2021-09-08 11:01:38,262 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:01:43,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:43,215 epoch 8 - iter 1/7 - loss 0.62576920 - samples/sec: 20.82 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,262 epoch 8 - iter 2/7 - loss 0.34529545 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,306 epoch 8 - iter 3/7 - loss 0.25029175 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,353 epoch 8 - iter 4/7 - loss 0.51133749 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,402 epoch 8 - iter 5/7 - loss 0.42774939 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,451 epoch 8 - iter 6/7 - loss 0.43877206 - samples/sec: 20.87 - lr: 0.020000\n"
+      "2021-09-21 19:32:02,107 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:02,191 epoch 5 - iter 1/7 - loss 0.46926352 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,246 epoch 5 - iter 2/7 - loss 0.51419462 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,310 epoch 5 - iter 3/7 - loss 0.52301104 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,377 epoch 5 - iter 4/7 - loss 0.51403573 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,440 epoch 5 - iter 5/7 - loss 0.46198283 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,497 epoch 5 - iter 6/7 - loss 0.49972818 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,561 epoch 5 - iter 7/7 - loss 0.53115430 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,563 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:02,563 EPOCH 5 done: loss 0.5312 - lr 0.0200000\n",
+      "2021-09-21 19:32:02,630 DEV : loss 0.47530558705329895 - score 0.0\n",
+      "2021-09-21 19:32:02,631 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:32:02,634 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:02,723 epoch 6 - iter 1/7 - loss 0.64164346 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,792 epoch 6 - iter 2/7 - loss 0.41753925 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,861 epoch 6 - iter 3/7 - loss 0.44700281 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,916 epoch 6 - iter 4/7 - loss 0.41244122 - samples/sec: 18.35 - lr: 0.020000\n",
+      "2021-09-21 19:32:02,984 epoch 6 - iter 5/7 - loss 0.45092650 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,047 epoch 6 - iter 6/7 - loss 0.52535007 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,100 epoch 6 - iter 7/7 - loss 0.58569307 - samples/sec: 18.96 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,101 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:03,102 EPOCH 6 done: loss 0.5857 - lr 0.0200000\n",
+      "2021-09-21 19:32:03,236 DEV : loss 0.30554109811782837 - score 0.0\n",
+      "2021-09-21 19:32:03,237 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:32:03,315 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:03,409 epoch 7 - iter 1/7 - loss 0.11606161 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,473 epoch 7 - iter 2/7 - loss 0.45057650 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,532 epoch 7 - iter 3/7 - loss 0.34602381 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,586 epoch 7 - iter 4/7 - loss 0.37668852 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,648 epoch 7 - iter 5/7 - loss 0.42601001 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,716 epoch 7 - iter 6/7 - loss 0.43656208 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,778 epoch 7 - iter 7/7 - loss 0.45744847 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 19:32:03,779 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:03,779 EPOCH 7 done: loss 0.4574 - lr 0.0200000\n",
+      "2021-09-21 19:32:03,910 DEV : loss 0.6477236747741699 - score 0.0\n",
+      "2021-09-21 19:32:03,911 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:32:03,988 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:04,062 epoch 8 - iter 1/7 - loss 0.38927016 - samples/sec: 18.95 - lr: 0.020000\n",
+      "2021-09-21 19:32:04,119 epoch 8 - iter 2/7 - loss 0.53824298 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 19:32:04,182 epoch 8 - iter 3/7 - loss 0.65850595 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 19:32:04,250 epoch 8 - iter 4/7 - loss 0.56874233 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 19:32:04,317 epoch 8 - iter 5/7 - loss 0.51894130 - samples/sec: 15.15 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:43,498 epoch 8 - iter 7/7 - loss 0.45340508 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,499 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:43,500 EPOCH 8 done: loss 0.4534 - lr 0.0200000\n",
-      "2021-09-08 11:01:43,711 DEV : loss 0.2798212468624115 - score 0.0\n",
-      "2021-09-08 11:01:43,711 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:43,803 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:43,864 epoch 9 - iter 1/7 - loss 0.52847892 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,911 epoch 9 - iter 2/7 - loss 0.58058804 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 11:01:43,958 epoch 9 - iter 3/7 - loss 0.43018793 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,004 epoch 9 - iter 4/7 - loss 0.46499147 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,051 epoch 9 - iter 5/7 - loss 0.48167978 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,098 epoch 9 - iter 6/7 - loss 0.43512686 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,145 epoch 9 - iter 7/7 - loss 0.39315212 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:44,146 EPOCH 9 done: loss 0.3932 - lr 0.0200000\n",
-      "2021-09-08 11:01:44,283 DEV : loss 0.06162730231881142 - score 0.0\n",
-      "2021-09-08 11:01:44,283 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:01:44,362 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:44,423 epoch 10 - iter 1/7 - loss 0.39147997 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,467 epoch 10 - iter 2/7 - loss 0.24608897 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,513 epoch 10 - iter 3/7 - loss 0.67341295 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,560 epoch 10 - iter 4/7 - loss 0.65223444 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,607 epoch 10 - iter 5/7 - loss 0.56337628 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,651 epoch 10 - iter 6/7 - loss 0.47369508 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,697 epoch 10 - iter 7/7 - loss 0.42997083 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 11:01:44,698 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:44,699 EPOCH 10 done: loss 0.4300 - lr 0.0200000\n",
-      "2021-09-08 11:01:45,247 DEV : loss 0.16482006013393402 - score 0.0\n",
-      "2021-09-08 11:01:45,248 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:01:50,555 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:50,556 Testing using best model ...\n",
-      "2021-09-08 11:01:50,587 loading file None/best-model.pt\n",
+      "2021-09-21 19:32:04,385 epoch 8 - iter 6/7 - loss 0.55589043 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 19:32:04,458 epoch 8 - iter 7/7 - loss 0.51757431 - samples/sec: 13.74 - lr: 0.020000\n",
+      "2021-09-21 19:32:04,459 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:04,460 EPOCH 8 done: loss 0.5176 - lr 0.0200000\n",
+      "2021-09-21 19:32:04,586 DEV : loss 0.5769301652908325 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:32:04,587 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:32:04,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:04,783 epoch 9 - iter 1/7 - loss 0.38487008 - samples/sec: 15.75 - lr: 0.010000\n",
+      "2021-09-21 19:32:04,843 epoch 9 - iter 2/7 - loss 0.34950724 - samples/sec: 16.85 - lr: 0.010000\n",
+      "2021-09-21 19:32:04,906 epoch 9 - iter 3/7 - loss 0.52869622 - samples/sec: 16.03 - lr: 0.010000\n",
+      "2021-09-21 19:32:04,962 epoch 9 - iter 4/7 - loss 0.44359817 - samples/sec: 18.03 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,026 epoch 9 - iter 5/7 - loss 0.47544194 - samples/sec: 15.76 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,093 epoch 9 - iter 6/7 - loss 0.49925392 - samples/sec: 15.11 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,147 epoch 9 - iter 7/7 - loss 0.49060941 - samples/sec: 18.80 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:05,149 EPOCH 9 done: loss 0.4906 - lr 0.0100000\n",
+      "2021-09-21 19:32:05,300 DEV : loss 0.3136330842971802 - score 0.0\n",
+      "2021-09-21 19:32:05,301 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:32:05,376 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:05,491 epoch 10 - iter 1/7 - loss 0.44097877 - samples/sec: 17.61 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,554 epoch 10 - iter 2/7 - loss 0.25392348 - samples/sec: 15.92 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,607 epoch 10 - iter 3/7 - loss 0.25266897 - samples/sec: 19.02 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,676 epoch 10 - iter 4/7 - loss 0.21185946 - samples/sec: 14.67 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,731 epoch 10 - iter 5/7 - loss 0.28200456 - samples/sec: 18.32 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,789 epoch 10 - iter 6/7 - loss 0.30553574 - samples/sec: 17.40 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,849 epoch 10 - iter 7/7 - loss 0.34365920 - samples/sec: 16.85 - lr: 0.010000\n",
+      "2021-09-21 19:32:05,850 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:05,850 EPOCH 10 done: loss 0.3437 - lr 0.0100000\n",
+      "2021-09-21 19:32:06,233 DEV : loss 0.41351962089538574 - score 0.0\n",
+      "2021-09-21 19:32:06,236 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:32:11,314 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:32:11,314 Testing using best model ...\n",
+      "2021-09-21 19:32:11,316 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:02:00,917 \t0.0\n",
-      "2021-09-08 11:02:00,917 \n",
+      "2021-09-21 19:32:16,271 \t0.0\n",
+      "2021-09-21 19:32:16,271 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
       "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
-      "                              precision    recall  f1-score   support\n",
+      "                             precision    recall  f1-score   support\n",
       "\n",
-      "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
-      "this text expresses surprise     0.0000    0.0000    0.0000         0\n",
-      "    this text expresses love     0.0000    0.0000    0.0000         0\n",
-      " this text expresses disgust     0.0000    0.0000    0.0000         0\n",
-      "    this text expresses fear     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses guilt     0.0000    0.0000    0.0000         0\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses shame     0.0000    0.0000    0.0000         1\n",
+      "    this text expresses joy     0.0000    0.0000    0.0000         0\n",
+      "this text expresses disgust     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses fear     0.0000    0.0000    0.0000         0\n",
+      "  this text expresses anger     0.0000    0.0000    0.0000         0\n",
+      "  this text expresses guilt     0.0000    0.0000    0.0000         0\n",
+      "  this text expresses shame     0.0000    0.0000    0.0000         0\n",
+      "this text expresses sadness     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses love     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                   micro avg     0.0000    0.0000    0.0000         1\n",
-      "                   macro avg     0.0000    0.0000    0.0000         1\n",
-      "                weighted avg     0.0000    0.0000    0.0000         1\n",
-      "                 samples avg     0.0000    0.0000    0.0000         1\n",
+      "                  micro avg     0.0000    0.0000    0.0000         1\n",
+      "                  macro avg     0.0000    0.0000    0.0000         1\n",
+      "               weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:02:00,918 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:51,001 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:32:16,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:12,448 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:02:54,976 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:34:16,805 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 27257.87it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 24600.02it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:54,978 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses sadness', b'this text expresses guilt']\n",
-      "2021-09-08 11:02:55,140 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,142 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:34:16,807 [b'this text expresses joy', b'this text expresses surprise', b'this text expresses love', b'this text expresses disgust', b'this text expresses fear', b'this text expresses anger', b'this text expresses guilt', b'this text expresses sadness']\n",
+      "2021-09-21 19:34:16,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,817 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5192,25 +5211,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:55,143 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,143 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:02:55,143 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,144 Parameters:\n",
-      "2021-09-08 11:02:55,144  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:02:55,144  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:02:55,144  - patience: \"3\"\n",
-      "2021-09-08 11:02:55,145  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:02:55,145  - max_epochs: \"10\"\n",
-      "2021-09-08 11:02:55,145  - shuffle: \"True\"\n",
-      "2021-09-08 11:02:55,146  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:02:55,146  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:02:55,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,146 Model training base path: \"None\"\n",
-      "2021-09-08 11:02:55,147 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,147 Device: cuda:1\n",
-      "2021-09-08 11:02:55,147 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,147 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:02:55,169 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 19:34:16,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,818 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:34:16,818 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,818 Parameters:\n",
+      "2021-09-21 19:34:16,819  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:34:16,819  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:34:16,819  - patience: \"3\"\n",
+      "2021-09-21 19:34:16,819  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:34:16,820  - max_epochs: \"10\"\n",
+      "2021-09-21 19:34:16,820  - shuffle: \"True\"\n",
+      "2021-09-21 19:34:16,820  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:34:16,821  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:34:16,821 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,821 Model training base path: \"None\"\n",
+      "2021-09-21 19:34:16,821 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,822 Device: cuda:0\n",
+      "2021-09-21 19:34:16,822 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,822 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:34:16,830 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,950 epoch 1 - iter 1/7 - loss 0.41910401 - samples/sec: 12.97 - lr: 0.020000\n"
      ]
     },
     {
@@ -5224,142 +5244,141 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:55,224 epoch 1 - iter 1/7 - loss 0.56788278 - samples/sec: 25.26 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,271 epoch 1 - iter 2/7 - loss 0.57454211 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,318 epoch 1 - iter 3/7 - loss 0.68069202 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,365 epoch 1 - iter 4/7 - loss 0.66239512 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,412 epoch 1 - iter 5/7 - loss 0.69451433 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,467 epoch 1 - iter 6/7 - loss 0.63748052 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,513 epoch 1 - iter 7/7 - loss 0.69995213 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,514 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,514 EPOCH 1 done: loss 0.7000 - lr 0.0200000\n",
-      "2021-09-08 11:02:55,543 DEV : loss 0.28812673687934875 - score 0.0\n",
-      "2021-09-08 11:02:55,544 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:03:07,628 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:07,692 epoch 2 - iter 1/7 - loss 0.58530718 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 11:03:07,740 epoch 2 - iter 2/7 - loss 0.68677461 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 11:03:07,786 epoch 2 - iter 3/7 - loss 0.55524253 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 11:03:07,834 epoch 2 - iter 4/7 - loss 0.56240725 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 11:03:07,881 epoch 2 - iter 5/7 - loss 0.57089005 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 11:03:07,930 epoch 2 - iter 6/7 - loss 0.53226309 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 11:03:07,977 epoch 2 - iter 7/7 - loss 0.55218114 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 11:03:07,978 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:07,979 EPOCH 2 done: loss 0.5522 - lr 0.0200000\n",
-      "2021-09-08 11:03:10,776 DEV : loss 0.393248051404953 - score 0.0\n",
-      "2021-09-08 11:03:10,776 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:03:10,779 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:10,838 epoch 3 - iter 1/7 - loss 0.13614629 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 11:03:10,885 epoch 3 - iter 2/7 - loss 0.31947740 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:03:10,932 epoch 3 - iter 3/7 - loss 0.51264711 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:03:10,985 epoch 3 - iter 4/7 - loss 0.45220942 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,031 epoch 3 - iter 5/7 - loss 0.44378538 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,078 epoch 3 - iter 6/7 - loss 0.49875316 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,125 epoch 3 - iter 7/7 - loss 0.51145060 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,126 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:11,127 EPOCH 3 done: loss 0.5115 - lr 0.0200000\n",
-      "2021-09-08 11:03:11,154 DEV : loss 0.2849293351173401 - score 0.0\n",
-      "2021-09-08 11:03:11,155 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:34:17,025 epoch 1 - iter 2/7 - loss 0.56066044 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 19:34:17,101 epoch 1 - iter 3/7 - loss 0.60583668 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 19:34:17,172 epoch 1 - iter 4/7 - loss 0.55870661 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 19:34:17,253 epoch 1 - iter 5/7 - loss 0.67255068 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 19:34:17,337 epoch 1 - iter 6/7 - loss 0.63085594 - samples/sec: 11.95 - lr: 0.020000\n",
+      "2021-09-21 19:34:17,404 epoch 1 - iter 7/7 - loss 0.65879212 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 19:34:17,405 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:17,405 EPOCH 1 done: loss 0.6588 - lr 0.0200000\n",
+      "2021-09-21 19:34:17,560 DEV : loss 0.436704158782959 - score 0.0\n",
+      "2021-09-21 19:34:17,561 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:03:20,128 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:20,192 epoch 4 - iter 1/7 - loss 0.25130209 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 11:03:20,240 epoch 4 - iter 2/7 - loss 0.16588796 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 11:03:20,287 epoch 4 - iter 3/7 - loss 0.26646022 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 11:03:20,334 epoch 4 - iter 4/7 - loss 0.39739254 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 11:03:20,382 epoch 4 - iter 5/7 - loss 0.44916328 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:03:20,428 epoch 4 - iter 6/7 - loss 0.42276811 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:03:20,475 epoch 4 - iter 7/7 - loss 0.43736567 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 11:03:20,476 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:20,476 EPOCH 4 done: loss 0.4374 - lr 0.0200000\n",
-      "2021-09-08 11:03:22,294 DEV : loss 0.42038458585739136 - score 0.0\n",
-      "2021-09-08 11:03:22,295 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:03:22,306 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:22,369 epoch 5 - iter 1/7 - loss 0.11691470 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 11:03:22,416 epoch 5 - iter 2/7 - loss 0.36679864 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:03:22,463 epoch 5 - iter 3/7 - loss 0.50572745 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 11:03:22,510 epoch 5 - iter 4/7 - loss 0.53255032 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 11:03:22,557 epoch 5 - iter 5/7 - loss 0.49654039 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 11:03:22,605 epoch 5 - iter 6/7 - loss 0.47512459 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 11:03:22,652 epoch 5 - iter 7/7 - loss 0.46569184 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 11:03:22,653 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:22,653 EPOCH 5 done: loss 0.4657 - lr 0.0200000\n",
-      "2021-09-08 11:03:22,681 DEV : loss 0.19040799140930176 - score 0.0\n",
-      "2021-09-08 11:03:22,681 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:34:25,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:25,133 epoch 2 - iter 1/7 - loss 0.22474171 - samples/sec: 20.94 - lr: 0.020000\n",
+      "2021-09-21 19:34:25,182 epoch 2 - iter 2/7 - loss 0.33457144 - samples/sec: 20.50 - lr: 0.020000\n",
+      "2021-09-21 19:34:25,229 epoch 2 - iter 3/7 - loss 0.52620808 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 19:34:25,277 epoch 2 - iter 4/7 - loss 0.55308504 - samples/sec: 21.33 - lr: 0.020000\n",
+      "2021-09-21 19:34:25,323 epoch 2 - iter 5/7 - loss 0.56438089 - samples/sec: 21.61 - lr: 0.020000\n",
+      "2021-09-21 19:34:25,370 epoch 2 - iter 6/7 - loss 0.64303003 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 19:34:25,418 epoch 2 - iter 7/7 - loss 0.62817418 - samples/sec: 20.84 - lr: 0.020000\n",
+      "2021-09-21 19:34:25,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:25,420 EPOCH 2 done: loss 0.6282 - lr 0.0200000\n",
+      "2021-09-21 19:34:25,588 DEV : loss 0.35015448927879333 - score 0.0\n",
+      "2021-09-21 19:34:25,589 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:03:26,785 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:26,845 epoch 6 - iter 1/7 - loss 0.14535077 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 11:03:26,893 epoch 6 - iter 2/7 - loss 0.30043391 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:03:26,939 epoch 6 - iter 3/7 - loss 0.27253319 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 11:03:26,986 epoch 6 - iter 4/7 - loss 0.43212270 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 11:03:27,032 epoch 6 - iter 5/7 - loss 0.36327168 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:03:27,079 epoch 6 - iter 6/7 - loss 0.42207143 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 11:03:27,125 epoch 6 - iter 7/7 - loss 0.49852514 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 11:03:27,126 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:27,127 EPOCH 6 done: loss 0.4985 - lr 0.0200000\n",
-      "2021-09-08 11:03:27,154 DEV : loss 0.1580030769109726 - score 0.0\n",
-      "2021-09-08 11:03:27,155 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:34:38,421 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:38,526 epoch 3 - iter 1/7 - loss 0.30760431 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 19:34:38,606 epoch 3 - iter 2/7 - loss 0.38175416 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 19:34:38,662 epoch 3 - iter 3/7 - loss 0.48952667 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 19:34:38,719 epoch 3 - iter 4/7 - loss 0.51178165 - samples/sec: 17.99 - lr: 0.020000\n",
+      "2021-09-21 19:34:38,775 epoch 3 - iter 5/7 - loss 0.53176372 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 19:34:38,832 epoch 3 - iter 6/7 - loss 0.56037334 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 19:34:38,892 epoch 3 - iter 7/7 - loss 0.59674084 - samples/sec: 16.87 - lr: 0.020000\n",
+      "2021-09-21 19:34:38,894 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:38,894 EPOCH 3 done: loss 0.5967 - lr 0.0200000\n",
+      "2021-09-21 19:34:38,949 DEV : loss 0.4655188322067261 - score 0.0\n",
+      "2021-09-21 19:34:38,950 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:34:38,952 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:39,044 epoch 4 - iter 1/7 - loss 1.27481127 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 19:34:39,102 epoch 4 - iter 2/7 - loss 1.00769150 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 19:34:39,166 epoch 4 - iter 3/7 - loss 0.73724041 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 19:34:39,224 epoch 4 - iter 4/7 - loss 0.69755759 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 19:34:39,282 epoch 4 - iter 5/7 - loss 0.64724088 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 19:34:39,346 epoch 4 - iter 6/7 - loss 0.57504095 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 19:34:39,403 epoch 4 - iter 7/7 - loss 0.64614279 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 19:34:39,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:39,404 EPOCH 4 done: loss 0.6461 - lr 0.0200000\n",
+      "2021-09-21 19:34:39,582 DEV : loss 0.25431469082832336 - score 0.0\n",
+      "2021-09-21 19:34:39,583 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:03:31,512 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:31,575 epoch 7 - iter 1/7 - loss 0.11510216 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 11:03:31,622 epoch 7 - iter 2/7 - loss 0.17840305 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:03:31,668 epoch 7 - iter 3/7 - loss 0.35524193 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 11:03:31,715 epoch 7 - iter 4/7 - loss 0.40601851 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:03:31,762 epoch 7 - iter 5/7 - loss 0.36274629 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:03:31,809 epoch 7 - iter 6/7 - loss 0.53465792 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 11:03:31,854 epoch 7 - iter 7/7 - loss 0.46501374 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 11:03:31,854 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:31,855 EPOCH 7 done: loss 0.4650 - lr 0.0200000\n",
-      "2021-09-08 11:03:31,883 DEV : loss 0.15144658088684082 - score 0.0\n",
-      "2021-09-08 11:03:31,884 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:03:36,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:36,221 epoch 8 - iter 1/7 - loss 0.41601291 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,268 epoch 8 - iter 2/7 - loss 0.27283635 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,315 epoch 8 - iter 3/7 - loss 0.25189546 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,362 epoch 8 - iter 4/7 - loss 0.37210564 - samples/sec: 21.27 - lr: 0.020000\n"
+      "2021-09-21 19:34:58,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:58,745 epoch 5 - iter 1/7 - loss 0.54995137 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 19:34:58,838 epoch 5 - iter 2/7 - loss 0.40676658 - samples/sec: 10.83 - lr: 0.020000\n",
+      "2021-09-21 19:34:58,919 epoch 5 - iter 3/7 - loss 0.46926040 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 19:34:58,992 epoch 5 - iter 4/7 - loss 0.58173413 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,071 epoch 5 - iter 5/7 - loss 0.59190900 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,136 epoch 5 - iter 6/7 - loss 0.50522372 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,226 epoch 5 - iter 7/7 - loss 0.49754824 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,227 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:59,227 EPOCH 5 done: loss 0.4975 - lr 0.0200000\n",
+      "2021-09-21 19:34:59,420 DEV : loss 0.2724918723106384 - score 0.0\n",
+      "2021-09-21 19:34:59,423 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:34:59,509 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:59,659 epoch 6 - iter 1/7 - loss 0.52885801 - samples/sec: 10.58 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,720 epoch 6 - iter 2/7 - loss 0.45664236 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,789 epoch 6 - iter 3/7 - loss 0.32120260 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,850 epoch 6 - iter 4/7 - loss 0.35339846 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,918 epoch 6 - iter 5/7 - loss 0.37752129 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,969 epoch 6 - iter 6/7 - loss 0.38634373 - samples/sec: 19.99 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,044 epoch 6 - iter 7/7 - loss 0.35524511 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,045 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:00,046 EPOCH 6 done: loss 0.3552 - lr 0.0200000\n",
+      "2021-09-21 19:35:00,201 DEV : loss 0.2710326910018921 - score 0.0\n",
+      "2021-09-21 19:35:00,204 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:35:00,281 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:00,480 epoch 7 - iter 1/7 - loss 0.36036727 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,544 epoch 7 - iter 2/7 - loss 0.23765133 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,606 epoch 7 - iter 3/7 - loss 0.43851353 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,679 epoch 7 - iter 4/7 - loss 0.46338909 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,758 epoch 7 - iter 5/7 - loss 0.48095862 - samples/sec: 12.80 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,808 epoch 7 - iter 6/7 - loss 0.47642811 - samples/sec: 20.19 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,875 epoch 7 - iter 7/7 - loss 0.47955095 - samples/sec: 15.14 - lr: 0.020000\n",
+      "2021-09-21 19:35:00,876 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:00,876 EPOCH 7 done: loss 0.4796 - lr 0.0200000\n",
+      "2021-09-21 19:35:01,060 DEV : loss 0.37405553460121155 - score 0.0\n",
+      "2021-09-21 19:35:01,063 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:35:01,158 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:01,252 epoch 8 - iter 1/7 - loss 0.22006260 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 19:35:01,342 epoch 8 - iter 2/7 - loss 0.28662492 - samples/sec: 11.14 - lr: 0.020000\n",
+      "2021-09-21 19:35:01,401 epoch 8 - iter 3/7 - loss 0.34228760 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 19:35:01,462 epoch 8 - iter 4/7 - loss 0.30981060 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 19:35:01,521 epoch 8 - iter 5/7 - loss 0.34620328 - samples/sec: 17.08 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:03:36,409 epoch 8 - iter 5/7 - loss 0.45092950 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,454 epoch 8 - iter 6/7 - loss 0.38225036 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,501 epoch 8 - iter 7/7 - loss 0.42719464 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,502 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:36,502 EPOCH 8 done: loss 0.4272 - lr 0.0200000\n",
-      "2021-09-08 11:03:36,530 DEV : loss 0.15149228274822235 - score 0.0\n",
-      "2021-09-08 11:03:36,530 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:03:36,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:36,592 epoch 9 - iter 1/7 - loss 0.53153211 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,636 epoch 9 - iter 2/7 - loss 0.30621247 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,682 epoch 9 - iter 3/7 - loss 0.34549525 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,729 epoch 9 - iter 4/7 - loss 0.30770345 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,776 epoch 9 - iter 5/7 - loss 0.31679546 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,821 epoch 9 - iter 6/7 - loss 0.26828818 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,869 epoch 9 - iter 7/7 - loss 0.24490869 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 11:03:36,870 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:36,870 EPOCH 9 done: loss 0.2449 - lr 0.0200000\n",
-      "2021-09-08 11:03:36,898 DEV : loss 0.22062374651432037 - score 0.0\n",
-      "2021-09-08 11:03:36,898 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:03:36,900 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:36,956 epoch 10 - iter 1/7 - loss 0.02945293 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 11:03:37,003 epoch 10 - iter 2/7 - loss 0.15829844 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 11:03:37,049 epoch 10 - iter 3/7 - loss 0.17898286 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:03:37,096 epoch 10 - iter 4/7 - loss 0.16570854 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 11:03:37,143 epoch 10 - iter 5/7 - loss 0.28543091 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:03:37,190 epoch 10 - iter 6/7 - loss 0.29049131 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:03:37,234 epoch 10 - iter 7/7 - loss 0.25238853 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 11:03:37,235 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:37,235 EPOCH 10 done: loss 0.2524 - lr 0.0200000\n",
-      "2021-09-08 11:03:37,262 DEV : loss 0.18824879825115204 - score 0.0\n",
-      "2021-09-08 11:03:37,263 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:03:41,353 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:41,354 Testing using best model ...\n",
-      "2021-09-08 11:03:41,355 loading file None/best-model.pt\n",
+      "2021-09-21 19:35:01,580 epoch 8 - iter 6/7 - loss 0.38893108 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 19:35:01,643 epoch 8 - iter 7/7 - loss 0.34643490 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 19:35:01,644 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:01,645 EPOCH 8 done: loss 0.3464 - lr 0.0200000\n",
+      "2021-09-21 19:35:01,828 DEV : loss 0.3046717941761017 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:35:01,829 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:35:01,951 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:02,016 epoch 9 - iter 1/7 - loss 0.26033276 - samples/sec: 20.02 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,060 epoch 9 - iter 2/7 - loss 0.19927910 - samples/sec: 22.92 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,107 epoch 9 - iter 3/7 - loss 0.20202452 - samples/sec: 21.68 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,155 epoch 9 - iter 4/7 - loss 0.28639651 - samples/sec: 21.22 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,202 epoch 9 - iter 5/7 - loss 0.32516850 - samples/sec: 21.36 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,252 epoch 9 - iter 6/7 - loss 0.31408377 - samples/sec: 20.24 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,296 epoch 9 - iter 7/7 - loss 0.27386154 - samples/sec: 22.84 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,297 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:02,298 EPOCH 9 done: loss 0.2739 - lr 0.0100000\n",
+      "2021-09-21 19:35:02,459 DEV : loss 0.3819499611854553 - score 0.0\n",
+      "2021-09-21 19:35:02,460 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:35:02,546 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:02,607 epoch 10 - iter 1/7 - loss 0.06792632 - samples/sec: 21.44 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,656 epoch 10 - iter 2/7 - loss 0.10648865 - samples/sec: 20.61 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,705 epoch 10 - iter 3/7 - loss 0.20108987 - samples/sec: 20.76 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,754 epoch 10 - iter 4/7 - loss 0.17814841 - samples/sec: 20.61 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,802 epoch 10 - iter 5/7 - loss 0.27100284 - samples/sec: 21.14 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,846 epoch 10 - iter 6/7 - loss 0.24065930 - samples/sec: 22.90 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,894 epoch 10 - iter 7/7 - loss 0.30260142 - samples/sec: 21.34 - lr: 0.010000\n",
+      "2021-09-21 19:35:02,895 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:02,895 EPOCH 10 done: loss 0.3026 - lr 0.0100000\n",
+      "2021-09-21 19:35:03,058 DEV : loss 0.20910502970218658 - score 0.0\n",
+      "2021-09-21 19:35:03,059 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:35:41,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:41,587 Testing using best model ...\n",
+      "2021-09-21 19:35:41,589 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:03:45,827 \t0.0\n",
-      "2021-09-08 11:03:45,828 \n",
+      "2021-09-21 19:35:47,193 \t0.0\n",
+      "2021-09-21 19:35:47,194 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -5374,16 +5393,16 @@
       " this text expresses disgust     0.0000    0.0000    0.0000         0\n",
       "    this text expresses fear     0.0000    0.0000    0.0000         0\n",
       "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses guilt     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses guilt     0.0000    0.0000    0.0000         0\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                   micro avg     0.0000    0.0000    0.0000         1\n",
       "                   macro avg     0.0000    0.0000    0.0000         1\n",
       "                weighted avg     0.0000    0.0000    0.0000         1\n",
       "                 samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:03:45,828 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.2187775677378702\n"
+      "2021-09-21 19:35:47,194 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.19155639571518587\n"
      ]
     }
    ],
@@ -5459,6 +5478,26 @@
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "709cee3d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.20384373030875866, 0.19974795211090107, 0.12885948330182734, 0.2148708254568368, 0.21045998739760555]\n",
+      "0.031780155526494734\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "7c31f0f3",
@@ -5469,7 +5508,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5477,25 +5516,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:35,925 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:37:50,818 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:04:39,866 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:37:55,126 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 29589.45it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 23029.81it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:39,868 [b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being', b'the emotion of great happiness']\n",
-      "2021-09-08 11:04:39,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:39,879 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:37:55,128 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being', b'a feeling that is oriented toward some real or supposed grievance']\n",
+      "2021-09-21 19:37:55,137 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,139 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5808,28 +5847,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:39,879 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:39,879 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:04:39,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:39,880 Parameters:\n",
-      "2021-09-08 11:04:39,880  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:04:39,881  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:04:39,881  - patience: \"3\"\n",
-      "2021-09-08 11:04:39,881  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:04:39,881  - max_epochs: \"10\"\n",
-      "2021-09-08 11:04:39,882  - shuffle: \"True\"\n",
-      "2021-09-08 11:04:39,882  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:04:39,882  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:04:39,883 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:39,883 Model training base path: \"None\"\n",
-      "2021-09-08 11:04:39,883 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:39,883 Device: cuda:1\n",
-      "2021-09-08 11:04:39,884 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:39,884 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:04:39,890 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:39,954 epoch 1 - iter 1/7 - loss 0.67440158 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 11:04:40,009 epoch 1 - iter 2/7 - loss 0.64981243 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 11:04:40,065 epoch 1 - iter 3/7 - loss 0.67126129 - samples/sec: 18.02 - lr: 0.020000\n"
+      "2021-09-21 19:37:55,140 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,140 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:37:55,140 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,141 Parameters:\n",
+      "2021-09-21 19:37:55,141  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:37:55,142  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:37:55,142  - patience: \"3\"\n",
+      "2021-09-21 19:37:55,142  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:37:55,143  - max_epochs: \"10\"\n",
+      "2021-09-21 19:37:55,143  - shuffle: \"True\"\n",
+      "2021-09-21 19:37:55,143  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:37:55,144  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:37:55,144 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,144 Model training base path: \"None\"\n",
+      "2021-09-21 19:37:55,145 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,145 Device: cuda:0\n",
+      "2021-09-21 19:37:55,145 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,145 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:37:55,152 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,246 epoch 1 - iter 1/7 - loss 0.68272167 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 19:37:55,315 epoch 1 - iter 2/7 - loss 0.61142197 - samples/sec: 14.47 - lr: 0.020000\n"
      ]
     },
     {
@@ -5843,138 +5881,139 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:40,121 epoch 1 - iter 4/7 - loss 0.67911538 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 11:04:40,177 epoch 1 - iter 5/7 - loss 0.71649930 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 11:04:40,233 epoch 1 - iter 6/7 - loss 0.73809925 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 11:04:40,289 epoch 1 - iter 7/7 - loss 0.73775664 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 11:04:40,289 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:40,290 EPOCH 1 done: loss 0.7378 - lr 0.0200000\n",
-      "2021-09-08 11:04:40,323 DEV : loss 0.3749215602874756 - score 0.0\n",
-      "2021-09-08 11:04:40,323 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:37:55,391 epoch 1 - iter 3/7 - loss 0.65916749 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 19:37:55,461 epoch 1 - iter 4/7 - loss 0.68186173 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 19:37:55,538 epoch 1 - iter 5/7 - loss 0.74155376 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 19:37:55,629 epoch 1 - iter 6/7 - loss 0.72177367 - samples/sec: 11.00 - lr: 0.020000\n",
+      "2021-09-21 19:37:55,704 epoch 1 - iter 7/7 - loss 0.74244748 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 19:37:55,705 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:55,705 EPOCH 1 done: loss 0.7424 - lr 0.0200000\n",
+      "2021-09-21 19:37:55,862 DEV : loss 0.3792186379432678 - score 0.0\n",
+      "2021-09-21 19:37:55,863 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:04:44,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:44,437 epoch 2 - iter 1/7 - loss 0.80012608 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 11:04:44,493 epoch 2 - iter 2/7 - loss 0.73192793 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 11:04:44,549 epoch 2 - iter 3/7 - loss 0.69905788 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 11:04:44,605 epoch 2 - iter 4/7 - loss 0.68400705 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 11:04:44,660 epoch 2 - iter 5/7 - loss 0.66119099 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 11:04:44,716 epoch 2 - iter 6/7 - loss 0.66123378 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 11:04:44,772 epoch 2 - iter 7/7 - loss 0.67303089 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 11:04:44,773 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:44,773 EPOCH 2 done: loss 0.6730 - lr 0.0200000\n",
-      "2021-09-08 11:04:44,901 DEV : loss 0.38371503353118896 - score 0.0\n",
-      "2021-09-08 11:04:44,902 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:04:44,977 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:45,049 epoch 3 - iter 1/7 - loss 0.52671820 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 11:04:45,105 epoch 3 - iter 2/7 - loss 0.53446859 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 11:04:45,160 epoch 3 - iter 3/7 - loss 0.55805304 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 11:04:45,218 epoch 3 - iter 4/7 - loss 0.55104870 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 11:04:45,273 epoch 3 - iter 5/7 - loss 0.56725582 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 11:04:45,329 epoch 3 - iter 6/7 - loss 0.57009010 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 11:04:45,385 epoch 3 - iter 7/7 - loss 0.59830600 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 11:04:45,386 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:45,386 EPOCH 3 done: loss 0.5983 - lr 0.0200000\n",
-      "2021-09-08 11:04:45,517 DEV : loss 0.3085048198699951 - score 0.0\n",
-      "2021-09-08 11:04:45,518 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:04:52,785 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:52,861 epoch 4 - iter 1/7 - loss 0.44375360 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 11:04:52,917 epoch 4 - iter 2/7 - loss 0.48990071 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 11:04:52,973 epoch 4 - iter 3/7 - loss 0.51006470 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,028 epoch 4 - iter 4/7 - loss 0.50660203 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,084 epoch 4 - iter 5/7 - loss 0.52823412 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,140 epoch 4 - iter 6/7 - loss 0.53651617 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,198 epoch 4 - iter 7/7 - loss 0.53705428 - samples/sec: 17.21 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,199 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,200 EPOCH 4 done: loss 0.5371 - lr 0.0200000\n",
-      "2021-09-08 11:04:53,234 DEV : loss 0.5961585640907288 - score 0.0\n",
-      "2021-09-08 11:04:53,234 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:04:53,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,308 epoch 5 - iter 1/7 - loss 0.41904119 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,364 epoch 5 - iter 2/7 - loss 0.56100695 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,420 epoch 5 - iter 3/7 - loss 0.55987794 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,476 epoch 5 - iter 4/7 - loss 0.53143149 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,531 epoch 5 - iter 5/7 - loss 0.54171646 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,587 epoch 5 - iter 6/7 - loss 0.55302359 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,642 epoch 5 - iter 7/7 - loss 0.54131330 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 11:04:53,643 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,643 EPOCH 5 done: loss 0.5413 - lr 0.0200000\n",
-      "2021-09-08 11:04:53,676 DEV : loss 0.2592724561691284 - score 0.0\n",
-      "2021-09-08 11:04:53,677 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:38:01,570 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:01,670 epoch 2 - iter 1/7 - loss 0.58039862 - samples/sec: 15.21 - lr: 0.020000\n",
+      "2021-09-21 19:38:01,741 epoch 2 - iter 2/7 - loss 0.63579291 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 19:38:01,807 epoch 2 - iter 3/7 - loss 0.62571744 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 19:38:01,876 epoch 2 - iter 4/7 - loss 0.66451396 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 19:38:01,947 epoch 2 - iter 5/7 - loss 0.67078658 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,011 epoch 2 - iter 6/7 - loss 0.67515812 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,096 epoch 2 - iter 7/7 - loss 0.66967552 - samples/sec: 11.81 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,097 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:02,098 EPOCH 2 done: loss 0.6697 - lr 0.0200000\n",
+      "2021-09-21 19:38:02,236 DEV : loss 0.41403326392173767 - score 0.0\n",
+      "2021-09-21 19:38:02,237 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:38:02,318 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:02,430 epoch 3 - iter 1/7 - loss 0.61085546 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,514 epoch 3 - iter 2/7 - loss 0.62036315 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,579 epoch 3 - iter 3/7 - loss 0.68335517 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,659 epoch 3 - iter 4/7 - loss 0.70115566 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,720 epoch 3 - iter 5/7 - loss 0.68685617 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,791 epoch 3 - iter 6/7 - loss 0.67266916 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,868 epoch 3 - iter 7/7 - loss 0.67576299 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 19:38:02,869 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:02,869 EPOCH 3 done: loss 0.6758 - lr 0.0200000\n",
+      "2021-09-21 19:38:03,036 DEV : loss 0.542035698890686 - score 0.0\n",
+      "2021-09-21 19:38:03,039 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:38:03,119 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:03,213 epoch 4 - iter 1/7 - loss 0.63790190 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,282 epoch 4 - iter 2/7 - loss 0.61325020 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,343 epoch 4 - iter 3/7 - loss 0.63287435 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,428 epoch 4 - iter 4/7 - loss 0.65057087 - samples/sec: 11.87 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,497 epoch 4 - iter 5/7 - loss 0.64756497 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,553 epoch 4 - iter 6/7 - loss 0.64523430 - samples/sec: 17.99 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,605 epoch 4 - iter 7/7 - loss 0.63432135 - samples/sec: 19.25 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,606 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:03,607 EPOCH 4 done: loss 0.6343 - lr 0.0200000\n",
+      "2021-09-21 19:38:03,754 DEV : loss 0.4509561061859131 - score 0.0\n",
+      "2021-09-21 19:38:03,756 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:38:03,831 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:03,922 epoch 5 - iter 1/7 - loss 0.69862819 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 19:38:03,985 epoch 5 - iter 2/7 - loss 0.64359102 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,053 epoch 5 - iter 3/7 - loss 0.60469600 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,125 epoch 5 - iter 4/7 - loss 0.60469478 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,189 epoch 5 - iter 5/7 - loss 0.59631553 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,254 epoch 5 - iter 6/7 - loss 0.60386944 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,333 epoch 5 - iter 7/7 - loss 0.61675130 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,334 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:04,334 EPOCH 5 done: loss 0.6168 - lr 0.0200000\n",
+      "2021-09-21 19:38:04,646 DEV : loss 0.49662089347839355 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:38:04,647 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:38:04,649 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:04,780 epoch 6 - iter 1/7 - loss 0.65176862 - samples/sec: 9.39 - lr: 0.010000\n",
+      "2021-09-21 19:38:04,900 epoch 6 - iter 2/7 - loss 0.63182905 - samples/sec: 8.39 - lr: 0.010000\n",
+      "2021-09-21 19:38:04,970 epoch 6 - iter 3/7 - loss 0.70880584 - samples/sec: 14.42 - lr: 0.010000\n",
+      "2021-09-21 19:38:05,033 epoch 6 - iter 4/7 - loss 0.68260631 - samples/sec: 16.02 - lr: 0.010000\n",
+      "2021-09-21 19:38:05,109 epoch 6 - iter 5/7 - loss 0.65494541 - samples/sec: 13.23 - lr: 0.010000\n",
+      "2021-09-21 19:38:05,189 epoch 6 - iter 6/7 - loss 0.66459401 - samples/sec: 12.56 - lr: 0.010000\n",
+      "2021-09-21 19:38:05,254 epoch 6 - iter 7/7 - loss 0.66126421 - samples/sec: 15.76 - lr: 0.010000\n",
+      "2021-09-21 19:38:05,255 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:05,255 EPOCH 6 done: loss 0.6613 - lr 0.0100000\n",
+      "2021-09-21 19:38:05,301 DEV : loss 0.3094290494918823 - score 0.0\n",
+      "2021-09-21 19:38:05,306 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:05:03,963 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:04,030 epoch 6 - iter 1/7 - loss 0.32877585 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,077 epoch 6 - iter 2/7 - loss 0.45489402 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,124 epoch 6 - iter 3/7 - loss 0.43565925 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,171 epoch 6 - iter 4/7 - loss 0.42550004 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,218 epoch 6 - iter 5/7 - loss 0.49331974 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,264 epoch 6 - iter 6/7 - loss 0.53716598 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,312 epoch 6 - iter 7/7 - loss 0.55767685 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,313 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:04,313 EPOCH 6 done: loss 0.5577 - lr 0.0200000\n",
-      "2021-09-08 11:05:04,344 DEV : loss 0.48706692457199097 - score 0.0\n",
-      "2021-09-08 11:05:04,344 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:05:04,349 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:04,409 epoch 7 - iter 1/7 - loss 0.59278101 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,456 epoch 7 - iter 2/7 - loss 0.56001192 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,503 epoch 7 - iter 3/7 - loss 0.56546130 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,550 epoch 7 - iter 4/7 - loss 0.57350716 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,596 epoch 7 - iter 5/7 - loss 0.54681280 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,643 epoch 7 - iter 6/7 - loss 0.51036508 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,689 epoch 7 - iter 7/7 - loss 0.49452286 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,690 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:04,691 EPOCH 7 done: loss 0.4945 - lr 0.0200000\n",
-      "2021-09-08 11:05:04,721 DEV : loss 0.2696418762207031 - score 0.0\n",
-      "2021-09-08 11:05:04,722 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:05:04,723 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:04,784 epoch 8 - iter 1/7 - loss 0.44911668 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,830 epoch 8 - iter 2/7 - loss 0.54128243 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,878 epoch 8 - iter 3/7 - loss 0.56278951 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,924 epoch 8 - iter 4/7 - loss 0.50984642 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:05:04,971 epoch 8 - iter 5/7 - loss 0.48132303 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,018 epoch 8 - iter 6/7 - loss 0.50445520 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,064 epoch 8 - iter 7/7 - loss 0.53643511 - samples/sec: 21.73 - lr: 0.020000\n"
+      "2021-09-21 19:38:15,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:15,685 epoch 7 - iter 1/7 - loss 0.54139829 - samples/sec: 12.30 - lr: 0.010000\n",
+      "2021-09-21 19:38:15,780 epoch 7 - iter 2/7 - loss 0.56698060 - samples/sec: 10.65 - lr: 0.010000\n",
+      "2021-09-21 19:38:15,864 epoch 7 - iter 3/7 - loss 0.57688149 - samples/sec: 11.96 - lr: 0.010000\n",
+      "2021-09-21 19:38:15,946 epoch 7 - iter 4/7 - loss 0.60041349 - samples/sec: 12.35 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,017 epoch 7 - iter 5/7 - loss 0.64075181 - samples/sec: 14.21 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,088 epoch 7 - iter 6/7 - loss 0.63129778 - samples/sec: 14.21 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,165 epoch 7 - iter 7/7 - loss 0.64586559 - samples/sec: 13.07 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,166 EPOCH 7 done: loss 0.6459 - lr 0.0100000\n",
+      "2021-09-21 19:38:16,239 DEV : loss 0.4240265488624573 - score 0.0\n",
+      "2021-09-21 19:38:16,244 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:38:16,245 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,359 epoch 8 - iter 1/7 - loss 0.60691261 - samples/sec: 11.90 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,443 epoch 8 - iter 2/7 - loss 0.55529201 - samples/sec: 12.07 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,521 epoch 8 - iter 3/7 - loss 0.52179342 - samples/sec: 12.88 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,593 epoch 8 - iter 4/7 - loss 0.52688576 - samples/sec: 13.99 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,674 epoch 8 - iter 5/7 - loss 0.54437308 - samples/sec: 12.37 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:05:05,065 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:05,065 EPOCH 8 done: loss 0.5364 - lr 0.0200000\n",
-      "2021-09-08 11:05:05,099 DEV : loss 0.2900888919830322 - score 0.0\n",
-      "2021-09-08 11:05:05,099 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:05:05,101 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:05,162 epoch 9 - iter 1/7 - loss 0.80282348 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,208 epoch 9 - iter 2/7 - loss 0.88273317 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,255 epoch 9 - iter 3/7 - loss 0.74917027 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,301 epoch 9 - iter 4/7 - loss 0.71573173 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,349 epoch 9 - iter 5/7 - loss 0.63032657 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,395 epoch 9 - iter 6/7 - loss 0.56540394 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,442 epoch 9 - iter 7/7 - loss 0.56018228 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 11:05:05,443 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:05,443 EPOCH 9 done: loss 0.5602 - lr 0.0200000\n",
-      "2021-09-08 11:05:05,475 DEV : loss 0.39756616950035095 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:05:05,475 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:05:05,477 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:05,538 epoch 10 - iter 1/7 - loss 0.20763569 - samples/sec: 21.57 - lr: 0.010000\n",
-      "2021-09-08 11:05:05,584 epoch 10 - iter 2/7 - loss 0.26932279 - samples/sec: 21.58 - lr: 0.010000\n",
-      "2021-09-08 11:05:05,631 epoch 10 - iter 3/7 - loss 0.46347627 - samples/sec: 21.56 - lr: 0.010000\n",
-      "2021-09-08 11:05:05,678 epoch 10 - iter 4/7 - loss 0.40097289 - samples/sec: 21.66 - lr: 0.010000\n",
-      "2021-09-08 11:05:05,724 epoch 10 - iter 5/7 - loss 0.35868720 - samples/sec: 21.55 - lr: 0.010000\n",
-      "2021-09-08 11:05:05,771 epoch 10 - iter 6/7 - loss 0.34028082 - samples/sec: 21.77 - lr: 0.010000\n",
-      "2021-09-08 11:05:05,818 epoch 10 - iter 7/7 - loss 0.34401099 - samples/sec: 21.44 - lr: 0.010000\n",
-      "2021-09-08 11:05:05,819 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:05,819 EPOCH 10 done: loss 0.3440 - lr 0.0100000\n",
-      "2021-09-08 11:05:05,847 DEV : loss 0.31458625197410583 - score 0.0\n",
-      "2021-09-08 11:05:05,848 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:05:09,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:09,928 Testing using best model ...\n",
-      "2021-09-08 11:05:09,929 loading file None/best-model.pt\n",
+      "2021-09-21 19:38:16,740 epoch 8 - iter 6/7 - loss 0.56282670 - samples/sec: 15.14 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,804 epoch 8 - iter 7/7 - loss 0.59007111 - samples/sec: 15.67 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,805 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,806 EPOCH 8 done: loss 0.5901 - lr 0.0100000\n",
+      "2021-09-21 19:38:16,867 DEV : loss 0.3565542995929718 - score 0.0\n",
+      "2021-09-21 19:38:16,872 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:38:16,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,992 epoch 9 - iter 1/7 - loss 0.39980757 - samples/sec: 13.11 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,049 epoch 9 - iter 2/7 - loss 0.58726829 - samples/sec: 17.78 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,109 epoch 9 - iter 3/7 - loss 0.54252363 - samples/sec: 16.71 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,170 epoch 9 - iter 4/7 - loss 0.58387870 - samples/sec: 16.60 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,231 epoch 9 - iter 5/7 - loss 0.59257993 - samples/sec: 16.70 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,301 epoch 9 - iter 6/7 - loss 0.58415852 - samples/sec: 14.40 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,371 epoch 9 - iter 7/7 - loss 0.56949887 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:17,373 EPOCH 9 done: loss 0.5695 - lr 0.0100000\n",
+      "2021-09-21 19:38:17,421 DEV : loss 0.3973992168903351 - score 0.0\n",
+      "2021-09-21 19:38:17,424 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:38:17,425 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:17,514 epoch 10 - iter 1/7 - loss 0.48922539 - samples/sec: 17.64 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,583 epoch 10 - iter 2/7 - loss 0.53415373 - samples/sec: 14.64 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,642 epoch 10 - iter 3/7 - loss 0.50118533 - samples/sec: 17.12 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,706 epoch 10 - iter 4/7 - loss 0.50031494 - samples/sec: 15.85 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,767 epoch 10 - iter 5/7 - loss 0.52904125 - samples/sec: 16.54 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,863 epoch 10 - iter 6/7 - loss 0.53030486 - samples/sec: 10.47 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,918 epoch 10 - iter 7/7 - loss 0.52033817 - samples/sec: 18.17 - lr: 0.010000\n",
+      "2021-09-21 19:38:17,919 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:17,920 EPOCH 10 done: loss 0.5203 - lr 0.0100000\n",
+      "2021-09-21 19:38:18,054 DEV : loss 0.432075560092926 - score 0.0\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:38:18,055 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:38:25,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:25,405 Testing using best model ...\n",
+      "2021-09-21 19:38:25,406 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:05:14,570 \t0.0\n",
-      "2021-09-08 11:05:14,571 \n",
+      "2021-09-21 19:38:31,312 \t0.0\n",
+      "2021-09-21 19:38:31,313 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -5983,40 +6022,53 @@
       "By class:\n",
       "                                                                            precision    recall  f1-score   support\n",
       "\n",
+      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
       "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         0\n",
       "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         0\n",
       "                                                strong feelings of dislike     0.0000    0.0000    0.0000         0\n",
-      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
       "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
       "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         0\n",
       "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         0\n",
-      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         1\n",
+      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                                                 micro avg     0.0000    0.0000    0.0000         1\n",
       "                                                                 macro avg     0.0000    0.0000    0.0000         1\n",
       "                                                              weighted avg     0.0000    0.0000    0.0000         1\n",
       "                                                               samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:05:14,571 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:58,036 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:38:31,313 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:03,400 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:06:01,948 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:40:07,709 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 8/8 [00:00<00:00, 33588.02it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:40:07,711 [b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 23237.14it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:06:01,950 [b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'emotions experienced when not in a state of well-being', b'the emotion of great happiness']\n",
-      "2021-09-08 11:06:01,961 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:01,964 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:40:12,889 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:12,891 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6329,172 +6381,161 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:06:01,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:01,965 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:06:01,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:01,965 Parameters:\n",
-      "2021-09-08 11:06:01,965  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:06:01,966  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:06:01,966  - patience: \"3\"\n",
-      "2021-09-08 11:06:01,966  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:06:01,966  - max_epochs: \"10\"\n",
-      "2021-09-08 11:06:01,967  - shuffle: \"True\"\n",
-      "2021-09-08 11:06:01,967  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:06:01,967  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:06:01,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:01,968 Model training base path: \"None\"\n",
-      "2021-09-08 11:06:01,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:01,968 Device: cuda:1\n",
-      "2021-09-08 11:06:01,969 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:01,969 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:06:01,981 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:02,036 epoch 1 - iter 1/7 - loss 0.62222147 - samples/sec: 25.25 - lr: 0.020000\n",
-      "2021-09-08 11:06:02,083 epoch 1 - iter 2/7 - loss 0.65550113 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 11:06:02,129 epoch 1 - iter 3/7 - loss 0.72435039 - samples/sec: 21.72 - lr: 0.020000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:06:02,178 epoch 1 - iter 4/7 - loss 0.69475585 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 11:06:02,323 epoch 1 - iter 5/7 - loss 0.67037987 - samples/sec: 6.91 - lr: 0.020000\n",
-      "2021-09-08 11:06:02,371 epoch 1 - iter 6/7 - loss 0.71895487 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 11:06:02,419 epoch 1 - iter 7/7 - loss 0.73568358 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 11:06:02,420 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:02,421 EPOCH 1 done: loss 0.7357 - lr 0.0200000\n",
-      "2021-09-08 11:06:02,450 DEV : loss 0.32390016317367554 - score 0.0\n",
-      "2021-09-08 11:06:02,450 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:40:12,891 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:12,892 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:40:12,892 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:12,892 Parameters:\n",
+      "2021-09-21 19:40:12,892  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:40:12,893  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:40:12,893  - patience: \"3\"\n",
+      "2021-09-21 19:40:12,893  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:40:12,894  - max_epochs: \"10\"\n",
+      "2021-09-21 19:40:12,894  - shuffle: \"True\"\n",
+      "2021-09-21 19:40:12,894  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:40:12,894  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:40:12,895 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:12,895 Model training base path: \"None\"\n",
+      "2021-09-21 19:40:12,895 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:12,896 Device: cuda:0\n",
+      "2021-09-21 19:40:12,896 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:12,896 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:40:12,908 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:12,964 epoch 1 - iter 1/7 - loss 0.46572351 - samples/sec: 24.61 - lr: 0.020000\n",
+      "2021-09-21 19:40:13,021 epoch 1 - iter 2/7 - loss 0.55834728 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 19:40:13,078 epoch 1 - iter 3/7 - loss 0.58669390 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 19:40:13,136 epoch 1 - iter 4/7 - loss 0.63275126 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 19:40:13,193 epoch 1 - iter 5/7 - loss 0.63435599 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 19:40:13,247 epoch 1 - iter 6/7 - loss 0.67690398 - samples/sec: 18.98 - lr: 0.020000\n",
+      "2021-09-21 19:40:13,298 epoch 1 - iter 7/7 - loss 0.67907392 - samples/sec: 19.87 - lr: 0.020000\n",
+      "2021-09-21 19:40:13,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:13,299 EPOCH 1 done: loss 0.6791 - lr 0.0200000\n",
+      "2021-09-21 19:40:13,495 DEV : loss 0.43945395946502686 - score 0.0\n",
+      "2021-09-21 19:40:13,496 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:06:06,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:06,344 epoch 2 - iter 1/7 - loss 0.59423882 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,392 epoch 2 - iter 2/7 - loss 0.60973576 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,439 epoch 2 - iter 3/7 - loss 0.60991383 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,486 epoch 2 - iter 4/7 - loss 0.61323117 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,533 epoch 2 - iter 5/7 - loss 0.61560450 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,581 epoch 2 - iter 6/7 - loss 0.62244316 - samples/sec: 20.89 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,629 epoch 2 - iter 7/7 - loss 0.64197935 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,630 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:06,630 EPOCH 2 done: loss 0.6420 - lr 0.0200000\n",
-      "2021-09-08 11:06:06,658 DEV : loss 0.3766365051269531 - score 0.0\n",
-      "2021-09-08 11:06:06,658 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:06:06,661 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:06,721 epoch 3 - iter 1/7 - loss 0.65446466 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,768 epoch 3 - iter 2/7 - loss 0.56341176 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,816 epoch 3 - iter 3/7 - loss 0.53135822 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,863 epoch 3 - iter 4/7 - loss 0.55614239 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,910 epoch 3 - iter 5/7 - loss 0.56648471 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:06:06,957 epoch 3 - iter 6/7 - loss 0.58988288 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,004 epoch 3 - iter 7/7 - loss 0.58458063 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,005 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:07,005 EPOCH 3 done: loss 0.5846 - lr 0.0200000\n",
-      "2021-09-08 11:06:07,032 DEV : loss 0.6316034197807312 - score 0.0\n",
-      "2021-09-08 11:06:07,033 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:06:07,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:07,096 epoch 4 - iter 1/7 - loss 0.58296770 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,143 epoch 4 - iter 2/7 - loss 0.44304809 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,189 epoch 4 - iter 3/7 - loss 0.55618143 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,238 epoch 4 - iter 4/7 - loss 0.52120850 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,285 epoch 4 - iter 5/7 - loss 0.52901105 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,333 epoch 4 - iter 6/7 - loss 0.53620410 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,380 epoch 4 - iter 7/7 - loss 0.52638034 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:07,381 EPOCH 4 done: loss 0.5264 - lr 0.0200000\n",
-      "2021-09-08 11:06:07,408 DEV : loss 0.4896160066127777 - score 0.0\n",
-      "2021-09-08 11:06:07,408 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:06:07,412 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:07,472 epoch 5 - iter 1/7 - loss 0.56081790 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,519 epoch 5 - iter 2/7 - loss 0.39037355 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,566 epoch 5 - iter 3/7 - loss 0.39299851 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,613 epoch 5 - iter 4/7 - loss 0.51605306 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,660 epoch 5 - iter 5/7 - loss 0.57073090 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,707 epoch 5 - iter 6/7 - loss 0.56611324 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,754 epoch 5 - iter 7/7 - loss 0.57462543 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:06:07,755 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:07,755 EPOCH 5 done: loss 0.5746 - lr 0.0200000\n",
-      "2021-09-08 11:06:07,783 DEV : loss 0.6067062020301819 - score 0.0\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:06:07,784 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:06:07,786 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:07,848 epoch 6 - iter 1/7 - loss 0.36427101 - samples/sec: 21.12 - lr: 0.010000\n",
-      "2021-09-08 11:06:07,895 epoch 6 - iter 2/7 - loss 0.31621437 - samples/sec: 21.54 - lr: 0.010000\n",
-      "2021-09-08 11:06:07,945 epoch 6 - iter 3/7 - loss 0.26777056 - samples/sec: 20.08 - lr: 0.010000\n",
-      "2021-09-08 11:06:07,992 epoch 6 - iter 4/7 - loss 0.37489135 - samples/sec: 21.64 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,038 epoch 6 - iter 5/7 - loss 0.46451474 - samples/sec: 21.73 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,085 epoch 6 - iter 6/7 - loss 0.49906714 - samples/sec: 21.43 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,132 epoch 6 - iter 7/7 - loss 0.51423228 - samples/sec: 21.56 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,133 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:08,133 EPOCH 6 done: loss 0.5142 - lr 0.0100000\n",
-      "2021-09-08 11:06:08,175 DEV : loss 0.4484029710292816 - score 0.0\n",
-      "2021-09-08 11:06:08,176 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:06:08,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:08,238 epoch 7 - iter 1/7 - loss 0.42728829 - samples/sec: 21.45 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,285 epoch 7 - iter 2/7 - loss 0.50896093 - samples/sec: 21.37 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,333 epoch 7 - iter 3/7 - loss 0.42508391 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,379 epoch 7 - iter 4/7 - loss 0.47831216 - samples/sec: 21.57 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,426 epoch 7 - iter 5/7 - loss 0.45876259 - samples/sec: 21.59 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,473 epoch 7 - iter 6/7 - loss 0.43814811 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,520 epoch 7 - iter 7/7 - loss 0.45356605 - samples/sec: 21.51 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,521 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:08,521 EPOCH 7 done: loss 0.4536 - lr 0.0100000\n",
-      "2021-09-08 11:06:08,551 DEV : loss 0.4733956456184387 - score 0.0\n",
-      "2021-09-08 11:06:08,551 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:06:08,553 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:08,616 epoch 8 - iter 1/7 - loss 0.23212443 - samples/sec: 20.66 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,663 epoch 8 - iter 2/7 - loss 0.38143756 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,713 epoch 8 - iter 3/7 - loss 0.46529398 - samples/sec: 20.22 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,761 epoch 8 - iter 4/7 - loss 0.41825035 - samples/sec: 21.07 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,808 epoch 8 - iter 5/7 - loss 0.45616673 - samples/sec: 21.28 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,855 epoch 8 - iter 6/7 - loss 0.47282777 - samples/sec: 21.59 - lr: 0.010000\n",
-      "2021-09-08 11:06:08,903 epoch 8 - iter 7/7 - loss 0.49189628 - samples/sec: 21.26 - lr: 0.010000\n"
+      "2021-09-21 19:40:21,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:21,146 epoch 2 - iter 1/7 - loss 0.58613956 - samples/sec: 21.20 - lr: 0.020000\n",
+      "2021-09-21 19:40:21,192 epoch 2 - iter 2/7 - loss 0.61935848 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 19:40:21,239 epoch 2 - iter 3/7 - loss 0.56076078 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 19:40:21,285 epoch 2 - iter 4/7 - loss 0.60527746 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 19:40:21,331 epoch 2 - iter 5/7 - loss 0.63440535 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 19:40:21,380 epoch 2 - iter 6/7 - loss 0.64371923 - samples/sec: 20.62 - lr: 0.020000\n",
+      "2021-09-21 19:40:21,427 epoch 2 - iter 7/7 - loss 0.63976306 - samples/sec: 21.61 - lr: 0.020000\n",
+      "2021-09-21 19:40:21,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:21,428 EPOCH 2 done: loss 0.6398 - lr 0.0200000\n",
+      "2021-09-21 19:40:22,342 DEV : loss 0.49016013741493225 - score 0.0\n",
+      "2021-09-21 19:40:22,343 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:40:22,365 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:22,456 epoch 3 - iter 1/7 - loss 0.67827708 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 19:40:22,518 epoch 3 - iter 2/7 - loss 0.65669405 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 19:40:22,579 epoch 3 - iter 3/7 - loss 0.71706400 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 19:40:22,642 epoch 3 - iter 4/7 - loss 0.72639924 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 19:40:22,701 epoch 3 - iter 5/7 - loss 0.70446695 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 19:40:22,764 epoch 3 - iter 6/7 - loss 0.68335239 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 19:40:22,828 epoch 3 - iter 7/7 - loss 0.67092280 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 19:40:22,829 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:22,829 EPOCH 3 done: loss 0.6709 - lr 0.0200000\n",
+      "2021-09-21 19:40:22,873 DEV : loss 0.4125604033470154 - score 0.0\n",
+      "2021-09-21 19:40:22,876 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:40:28,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:28,377 epoch 4 - iter 1/7 - loss 0.52671403 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,436 epoch 4 - iter 2/7 - loss 0.65639910 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,504 epoch 4 - iter 3/7 - loss 0.63587908 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,569 epoch 4 - iter 4/7 - loss 0.66102378 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,635 epoch 4 - iter 5/7 - loss 0.64184835 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,701 epoch 4 - iter 6/7 - loss 0.64948101 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,762 epoch 4 - iter 7/7 - loss 0.65439556 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,763 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:28,763 EPOCH 4 done: loss 0.6544 - lr 0.0200000\n",
+      "2021-09-21 19:40:28,814 DEV : loss 0.4925087094306946 - score 0.0\n",
+      "2021-09-21 19:40:28,816 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:40:28,819 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:28,914 epoch 5 - iter 1/7 - loss 0.72401875 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 19:40:28,982 epoch 5 - iter 2/7 - loss 0.68873757 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 19:40:29,049 epoch 5 - iter 3/7 - loss 0.66679515 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 19:40:29,120 epoch 5 - iter 4/7 - loss 0.65484634 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 19:40:29,174 epoch 5 - iter 5/7 - loss 0.62557881 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 19:40:29,238 epoch 5 - iter 6/7 - loss 0.61822843 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 19:40:29,301 epoch 5 - iter 7/7 - loss 0.61474616 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 19:40:29,302 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:29,302 EPOCH 5 done: loss 0.6147 - lr 0.0200000\n",
+      "2021-09-21 19:40:29,356 DEV : loss 0.400807648897171 - score 0.0\n",
+      "2021-09-21 19:40:29,358 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:40:33,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:33,539 epoch 6 - iter 1/7 - loss 0.77846575 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 19:40:33,594 epoch 6 - iter 2/7 - loss 0.70420638 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 19:40:33,659 epoch 6 - iter 3/7 - loss 0.66026322 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 19:40:33,712 epoch 6 - iter 4/7 - loss 0.66773327 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 19:40:33,771 epoch 6 - iter 5/7 - loss 0.67217243 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 19:40:33,833 epoch 6 - iter 6/7 - loss 0.66942804 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 19:40:33,893 epoch 6 - iter 7/7 - loss 0.66384780 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 19:40:33,894 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:33,894 EPOCH 6 done: loss 0.6638 - lr 0.0200000\n",
+      "2021-09-21 19:40:33,944 DEV : loss 0.36640578508377075 - score 0.0\n",
+      "2021-09-21 19:40:33,946 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:40:44,387 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:44,475 epoch 7 - iter 1/7 - loss 0.62459236 - samples/sec: 17.21 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:06:08,904 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:08,904 EPOCH 8 done: loss 0.4919 - lr 0.0100000\n",
-      "2021-09-08 11:06:08,932 DEV : loss 0.3271419405937195 - score 0.0\n",
-      "2021-09-08 11:06:08,933 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:06:08,938 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:08,998 epoch 9 - iter 1/7 - loss 0.53205043 - samples/sec: 21.82 - lr: 0.010000\n",
-      "2021-09-08 11:06:09,045 epoch 9 - iter 2/7 - loss 0.36503637 - samples/sec: 21.53 - lr: 0.010000\n",
-      "2021-09-08 11:06:09,092 epoch 9 - iter 3/7 - loss 0.41263566 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 11:06:09,139 epoch 9 - iter 4/7 - loss 0.40923402 - samples/sec: 21.61 - lr: 0.010000\n",
-      "2021-09-08 11:06:09,186 epoch 9 - iter 5/7 - loss 0.41837025 - samples/sec: 21.45 - lr: 0.010000\n",
-      "2021-09-08 11:06:09,232 epoch 9 - iter 6/7 - loss 0.37800809 - samples/sec: 21.62 - lr: 0.010000\n",
-      "2021-09-08 11:06:09,279 epoch 9 - iter 7/7 - loss 0.39940201 - samples/sec: 21.44 - lr: 0.010000\n",
-      "2021-09-08 11:06:09,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:09,281 EPOCH 9 done: loss 0.3994 - lr 0.0100000\n",
-      "2021-09-08 11:06:09,334 DEV : loss 0.2504335641860962 - score 0.0\n",
-      "2021-09-08 11:06:09,334 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:40:44,539 epoch 7 - iter 2/7 - loss 0.68315566 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 19:40:44,605 epoch 7 - iter 3/7 - loss 0.70628673 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 19:40:44,669 epoch 7 - iter 4/7 - loss 0.68143547 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 19:40:44,725 epoch 7 - iter 5/7 - loss 0.64814206 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 19:40:44,784 epoch 7 - iter 6/7 - loss 0.61219580 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 19:40:44,843 epoch 7 - iter 7/7 - loss 0.64668937 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 19:40:44,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:44,845 EPOCH 7 done: loss 0.6467 - lr 0.0200000\n",
+      "2021-09-21 19:40:44,905 DEV : loss 0.4430083632469177 - score 0.0\n",
+      "2021-09-21 19:40:44,905 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:40:44,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:45,012 epoch 8 - iter 1/7 - loss 0.51036417 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,075 epoch 8 - iter 2/7 - loss 0.56440204 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,135 epoch 8 - iter 3/7 - loss 0.48211083 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,202 epoch 8 - iter 4/7 - loss 0.47748968 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,264 epoch 8 - iter 5/7 - loss 0.48212470 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,329 epoch 8 - iter 6/7 - loss 0.52076254 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,394 epoch 8 - iter 7/7 - loss 0.56393296 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:45,395 EPOCH 8 done: loss 0.5639 - lr 0.0200000\n",
+      "2021-09-21 19:40:45,468 DEV : loss 0.6449317932128906 - score 0.0\n",
+      "2021-09-21 19:40:45,470 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:40:45,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:45,559 epoch 9 - iter 1/7 - loss 0.54106683 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,620 epoch 9 - iter 2/7 - loss 0.55489287 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,685 epoch 9 - iter 3/7 - loss 0.55695440 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,749 epoch 9 - iter 4/7 - loss 0.51210444 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,810 epoch 9 - iter 5/7 - loss 0.46771215 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,871 epoch 9 - iter 6/7 - loss 0.47818974 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,939 epoch 9 - iter 7/7 - loss 0.49588016 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 19:40:45,940 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:45,940 EPOCH 9 done: loss 0.4959 - lr 0.0200000\n",
+      "2021-09-21 19:40:46,008 DEV : loss 0.7884033918380737 - score 0.0\n",
+      "2021-09-21 19:40:46,010 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:40:46,012 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:46,091 epoch 10 - iter 1/7 - loss 0.24163139 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 19:40:46,161 epoch 10 - iter 2/7 - loss 0.46190718 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 19:40:46,213 epoch 10 - iter 3/7 - loss 0.62177896 - samples/sec: 19.60 - lr: 0.020000\n",
+      "2021-09-21 19:40:46,274 epoch 10 - iter 4/7 - loss 0.51440444 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 19:40:46,337 epoch 10 - iter 5/7 - loss 0.60562943 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 19:40:46,399 epoch 10 - iter 6/7 - loss 0.71891742 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 19:40:46,460 epoch 10 - iter 7/7 - loss 0.66687830 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 19:40:46,461 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:46,461 EPOCH 10 done: loss 0.6669 - lr 0.0200000\n",
+      "2021-09-21 19:40:46,521 DEV : loss 0.3194023370742798 - score 0.0\n",
+      "2021-09-21 19:40:46,522 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:06:13,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:13,461 epoch 10 - iter 1/7 - loss 0.24966539 - samples/sec: 20.68 - lr: 0.010000\n",
-      "2021-09-08 11:06:13,506 epoch 10 - iter 2/7 - loss 0.16682375 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 11:06:13,552 epoch 10 - iter 3/7 - loss 0.23576419 - samples/sec: 21.59 - lr: 0.010000\n",
-      "2021-09-08 11:06:13,599 epoch 10 - iter 4/7 - loss 0.24538895 - samples/sec: 21.45 - lr: 0.010000\n",
-      "2021-09-08 11:06:13,647 epoch 10 - iter 5/7 - loss 0.25892051 - samples/sec: 21.23 - lr: 0.010000\n",
-      "2021-09-08 11:06:13,694 epoch 10 - iter 6/7 - loss 0.28767758 - samples/sec: 21.44 - lr: 0.010000\n",
-      "2021-09-08 11:06:13,741 epoch 10 - iter 7/7 - loss 0.29073899 - samples/sec: 21.47 - lr: 0.010000\n",
-      "2021-09-08 11:06:13,742 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:13,742 EPOCH 10 done: loss 0.2907 - lr 0.0100000\n",
-      "2021-09-08 11:06:13,772 DEV : loss 0.45578333735466003 - score 0.0\n",
-      "2021-09-08 11:06:13,773 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:06:17,755 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:06:17,755 Testing using best model ...\n",
-      "2021-09-08 11:06:17,757 loading file None/best-model.pt\n",
+      "2021-09-21 19:40:54,253 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,254 Testing using best model ...\n",
+      "2021-09-21 19:40:54,255 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:06:22,328 \t0.0\n",
-      "2021-09-08 11:06:22,329 \n",
+      "2021-09-21 19:40:59,622 \t0.0\n",
+      "2021-09-21 19:40:59,623 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -6509,34 +6550,34 @@
       "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
       "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
       "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
-      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         0\n",
-      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         1\n",
+      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         0\n",
+      "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                                                 micro avg     0.0000    0.0000    0.0000         1\n",
       "                                                                 macro avg     0.0000    0.0000    0.0000         1\n",
       "                                                              weighted avg     0.0000    0.0000    0.0000         1\n",
       "                                                               samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:06:22,329 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:05,548 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:40:59,623 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,097 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:07:09,522 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:41:59,314 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 27846.00it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 33689.19it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:07:09,524 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'a feeling that is oriented toward some real or supposed grievance']\n",
-      "2021-09-08 11:07:09,649 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:09,650 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:41:59,316 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'strong feelings of dislike']\n",
+      "2021-09-21 19:41:59,477 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,479 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6849,25 +6890,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:07:09,651 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:09,651 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:07:09,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:09,652 Parameters:\n",
-      "2021-09-08 11:07:09,652  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:07:09,653  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:07:09,653  - patience: \"3\"\n",
-      "2021-09-08 11:07:09,653  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:07:09,653  - max_epochs: \"10\"\n",
-      "2021-09-08 11:07:09,654  - shuffle: \"True\"\n",
-      "2021-09-08 11:07:09,654  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:07:09,654  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:07:09,654 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:09,655 Model training base path: \"None\"\n",
-      "2021-09-08 11:07:09,655 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:09,655 Device: cuda:1\n",
-      "2021-09-08 11:07:09,656 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:09,656 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:07:09,693 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 19:41:59,480 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,480 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:41:59,480 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,481 Parameters:\n",
+      "2021-09-21 19:41:59,481  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:41:59,481  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:41:59,482  - patience: \"3\"\n",
+      "2021-09-21 19:41:59,482  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:41:59,482  - max_epochs: \"10\"\n",
+      "2021-09-21 19:41:59,482  - shuffle: \"True\"\n",
+      "2021-09-21 19:41:59,483  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:41:59,483  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:41:59,483 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,483 Model training base path: \"None\"\n",
+      "2021-09-21 19:41:59,484 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,484 Device: cuda:0\n",
+      "2021-09-21 19:41:59,484 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,485 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -6881,142 +6921,141 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:07:09,747 epoch 1 - iter 1/7 - loss 0.58260363 - samples/sec: 25.27 - lr: 0.020000\n",
-      "2021-09-08 11:07:09,794 epoch 1 - iter 2/7 - loss 0.66669843 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:07:09,842 epoch 1 - iter 3/7 - loss 0.63986945 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 11:07:09,890 epoch 1 - iter 4/7 - loss 0.68752252 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 11:07:09,937 epoch 1 - iter 5/7 - loss 0.68561711 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 11:07:09,983 epoch 1 - iter 6/7 - loss 0.72698250 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:07:10,030 epoch 1 - iter 7/7 - loss 0.71780246 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:07:10,031 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:10,032 EPOCH 1 done: loss 0.7178 - lr 0.0200000\n",
-      "2021-09-08 11:07:10,070 DEV : loss 0.4080207943916321 - score 0.0\n",
-      "2021-09-08 11:07:10,070 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:41:59,560 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,616 epoch 1 - iter 1/7 - loss 0.42351174 - samples/sec: 25.06 - lr: 0.020000\n",
+      "2021-09-21 19:41:59,663 epoch 1 - iter 2/7 - loss 0.46621117 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 19:41:59,710 epoch 1 - iter 3/7 - loss 0.54594419 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 19:41:59,757 epoch 1 - iter 4/7 - loss 0.56696856 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 19:41:59,804 epoch 1 - iter 5/7 - loss 0.61658274 - samples/sec: 21.43 - lr: 0.020000\n",
+      "2021-09-21 19:41:59,853 epoch 1 - iter 6/7 - loss 0.65254602 - samples/sec: 20.53 - lr: 0.020000\n",
+      "2021-09-21 19:41:59,899 epoch 1 - iter 7/7 - loss 0.65237745 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 19:41:59,900 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:59,901 EPOCH 1 done: loss 0.6524 - lr 0.0200000\n",
+      "2021-09-21 19:42:00,381 DEV : loss 0.4157443046569824 - score 0.0\n",
+      "2021-09-21 19:42:00,381 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:07:14,285 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:14,361 epoch 2 - iter 1/7 - loss 0.46308532 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,418 epoch 2 - iter 2/7 - loss 0.56350474 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,474 epoch 2 - iter 3/7 - loss 0.61860970 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,530 epoch 2 - iter 4/7 - loss 0.62421776 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,586 epoch 2 - iter 5/7 - loss 0.61387219 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,641 epoch 2 - iter 6/7 - loss 0.65796845 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,697 epoch 2 - iter 7/7 - loss 0.65423431 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,698 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:14,698 EPOCH 2 done: loss 0.6542 - lr 0.0200000\n",
-      "2021-09-08 11:07:14,731 DEV : loss 0.5225210189819336 - score 0.0\n",
-      "2021-09-08 11:07:14,732 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:07:14,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:14,806 epoch 3 - iter 1/7 - loss 0.52520531 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,862 epoch 3 - iter 2/7 - loss 0.63502622 - samples/sec: 18.09 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,918 epoch 3 - iter 3/7 - loss 0.65726115 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 11:07:14,974 epoch 3 - iter 4/7 - loss 0.63529946 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 11:07:15,029 epoch 3 - iter 5/7 - loss 0.62427619 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 11:07:15,085 epoch 3 - iter 6/7 - loss 0.61833675 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 11:07:15,140 epoch 3 - iter 7/7 - loss 0.65375314 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 11:07:15,141 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:15,141 EPOCH 3 done: loss 0.6538 - lr 0.0200000\n",
-      "2021-09-08 11:07:15,175 DEV : loss 0.3139582574367523 - score 0.0\n",
-      "2021-09-08 11:07:15,176 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:42:04,789 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:04,861 epoch 2 - iter 1/7 - loss 0.54058307 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 19:42:04,914 epoch 2 - iter 2/7 - loss 0.58069739 - samples/sec: 18.92 - lr: 0.020000\n",
+      "2021-09-21 19:42:04,964 epoch 2 - iter 3/7 - loss 0.68072659 - samples/sec: 20.26 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,016 epoch 2 - iter 4/7 - loss 0.69090334 - samples/sec: 19.61 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,065 epoch 2 - iter 5/7 - loss 0.69948232 - samples/sec: 20.55 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,114 epoch 2 - iter 6/7 - loss 0.70240691 - samples/sec: 20.37 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,168 epoch 2 - iter 7/7 - loss 0.69028166 - samples/sec: 18.71 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,169 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:05,170 EPOCH 2 done: loss 0.6903 - lr 0.0200000\n",
+      "2021-09-21 19:42:05,206 DEV : loss 0.4836110472679138 - score 0.0\n",
+      "2021-09-21 19:42:05,207 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:42:05,209 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:05,276 epoch 3 - iter 1/7 - loss 0.53354132 - samples/sec: 21.11 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,323 epoch 3 - iter 2/7 - loss 0.57038572 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,374 epoch 3 - iter 3/7 - loss 0.61480995 - samples/sec: 19.62 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,423 epoch 3 - iter 4/7 - loss 0.60319771 - samples/sec: 20.52 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,473 epoch 3 - iter 5/7 - loss 0.59931283 - samples/sec: 20.16 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,523 epoch 3 - iter 6/7 - loss 0.58789348 - samples/sec: 20.13 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,578 epoch 3 - iter 7/7 - loss 0.58703637 - samples/sec: 18.30 - lr: 0.020000\n",
+      "2021-09-21 19:42:05,579 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:05,580 EPOCH 3 done: loss 0.5870 - lr 0.0200000\n",
+      "2021-09-21 19:42:05,612 DEV : loss 0.3032812774181366 - score 0.0\n",
+      "2021-09-21 19:42:05,613 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:07:19,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:19,232 epoch 4 - iter 1/7 - loss 0.56079882 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,286 epoch 4 - iter 2/7 - loss 0.56908745 - samples/sec: 18.92 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,337 epoch 4 - iter 3/7 - loss 0.55670518 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,388 epoch 4 - iter 4/7 - loss 0.60066661 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,438 epoch 4 - iter 5/7 - loss 0.61200171 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,489 epoch 4 - iter 6/7 - loss 0.59053266 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,540 epoch 4 - iter 7/7 - loss 0.61726898 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,541 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:19,541 EPOCH 4 done: loss 0.6173 - lr 0.0200000\n",
-      "2021-09-08 11:07:19,573 DEV : loss 0.38624995946884155 - score 0.0\n",
-      "2021-09-08 11:07:19,574 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:07:19,593 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:19,658 epoch 5 - iter 1/7 - loss 0.45221165 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,710 epoch 5 - iter 2/7 - loss 0.52332343 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,761 epoch 5 - iter 3/7 - loss 0.51679884 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,812 epoch 5 - iter 4/7 - loss 0.49611855 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,863 epoch 5 - iter 5/7 - loss 0.54269537 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,913 epoch 5 - iter 6/7 - loss 0.54913115 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,964 epoch 5 - iter 7/7 - loss 0.58081664 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:07:19,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:19,965 EPOCH 5 done: loss 0.5808 - lr 0.0200000\n",
-      "2021-09-08 11:07:19,995 DEV : loss 0.28124186396598816 - score 0.0\n",
-      "2021-09-08 11:07:19,996 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:07:23,996 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:24,064 epoch 6 - iter 1/7 - loss 0.59012944 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 11:07:24,115 epoch 6 - iter 2/7 - loss 0.50622429 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 11:07:24,166 epoch 6 - iter 3/7 - loss 0.60757447 - samples/sec: 19.86 - lr: 0.020000\n",
-      "2021-09-08 11:07:24,216 epoch 6 - iter 4/7 - loss 0.56366387 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 11:07:24,268 epoch 6 - iter 5/7 - loss 0.56566926 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 11:07:24,320 epoch 6 - iter 6/7 - loss 0.54416275 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 11:07:24,371 epoch 6 - iter 7/7 - loss 0.56669634 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 11:07:24,372 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:24,372 EPOCH 6 done: loss 0.5667 - lr 0.0200000\n",
-      "2021-09-08 11:07:24,403 DEV : loss 0.12812845408916473 - score 0.0\n",
-      "2021-09-08 11:07:24,403 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:07:28,780 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:28,848 epoch 7 - iter 1/7 - loss 0.24168555 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 11:07:28,898 epoch 7 - iter 2/7 - loss 0.33133677 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 11:07:28,949 epoch 7 - iter 3/7 - loss 0.34123728 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,000 epoch 7 - iter 4/7 - loss 0.37962700 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,050 epoch 7 - iter 5/7 - loss 0.46555534 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,100 epoch 7 - iter 6/7 - loss 0.50071722 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,150 epoch 7 - iter 7/7 - loss 0.52815961 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:29,152 EPOCH 7 done: loss 0.5282 - lr 0.0200000\n",
-      "2021-09-08 11:07:29,185 DEV : loss 0.155975341796875 - score 0.0\n",
-      "2021-09-08 11:07:29,186 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:07:29,189 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:29,253 epoch 8 - iter 1/7 - loss 0.59414947 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,303 epoch 8 - iter 2/7 - loss 0.53968211 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,354 epoch 8 - iter 3/7 - loss 0.52516466 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,404 epoch 8 - iter 4/7 - loss 0.53209406 - samples/sec: 19.92 - lr: 0.020000\n"
+      "2021-09-21 19:42:09,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:09,811 epoch 4 - iter 1/7 - loss 0.69318861 - samples/sec: 19.69 - lr: 0.020000\n",
+      "2021-09-21 19:42:09,861 epoch 4 - iter 2/7 - loss 0.62631103 - samples/sec: 20.63 - lr: 0.020000\n",
+      "2021-09-21 19:42:09,911 epoch 4 - iter 3/7 - loss 0.63286676 - samples/sec: 20.02 - lr: 0.020000\n",
+      "2021-09-21 19:42:09,961 epoch 4 - iter 4/7 - loss 0.63759197 - samples/sec: 20.13 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,010 epoch 4 - iter 5/7 - loss 0.60521402 - samples/sec: 20.43 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,061 epoch 4 - iter 6/7 - loss 0.59637166 - samples/sec: 20.03 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,116 epoch 4 - iter 7/7 - loss 0.59824621 - samples/sec: 18.20 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,117 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:10,117 EPOCH 4 done: loss 0.5982 - lr 0.0200000\n",
+      "2021-09-21 19:42:10,149 DEV : loss 0.5878545045852661 - score 0.0\n",
+      "2021-09-21 19:42:10,150 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:42:10,151 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:10,227 epoch 5 - iter 1/7 - loss 0.75453418 - samples/sec: 17.17 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,276 epoch 5 - iter 2/7 - loss 0.64474151 - samples/sec: 20.50 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,325 epoch 5 - iter 3/7 - loss 0.63039895 - samples/sec: 20.64 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,375 epoch 5 - iter 4/7 - loss 0.60783623 - samples/sec: 20.10 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,426 epoch 5 - iter 5/7 - loss 0.62274332 - samples/sec: 19.75 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,475 epoch 5 - iter 6/7 - loss 0.63457200 - samples/sec: 20.52 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,523 epoch 5 - iter 7/7 - loss 0.60707251 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,523 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:10,524 EPOCH 5 done: loss 0.6071 - lr 0.0200000\n",
+      "2021-09-21 19:42:10,556 DEV : loss 0.39382052421569824 - score 0.0\n",
+      "2021-09-21 19:42:10,557 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:42:10,559 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:10,628 epoch 6 - iter 1/7 - loss 0.60935086 - samples/sec: 20.26 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,679 epoch 6 - iter 2/7 - loss 0.46115065 - samples/sec: 19.80 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,728 epoch 6 - iter 3/7 - loss 0.55041414 - samples/sec: 20.54 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,775 epoch 6 - iter 4/7 - loss 0.57943037 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,823 epoch 6 - iter 5/7 - loss 0.60580461 - samples/sec: 21.10 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,873 epoch 6 - iter 6/7 - loss 0.62864648 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,927 epoch 6 - iter 7/7 - loss 0.65677427 - samples/sec: 18.64 - lr: 0.020000\n",
+      "2021-09-21 19:42:10,928 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:10,928 EPOCH 6 done: loss 0.6568 - lr 0.0200000\n",
+      "2021-09-21 19:42:11,091 DEV : loss 0.4363715648651123 - score 0.0\n",
+      "2021-09-21 19:42:11,091 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:42:11,093 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:11,159 epoch 7 - iter 1/7 - loss 0.61157626 - samples/sec: 20.74 - lr: 0.020000\n",
+      "2021-09-21 19:42:11,209 epoch 7 - iter 2/7 - loss 0.59741896 - samples/sec: 20.07 - lr: 0.020000\n",
+      "2021-09-21 19:42:11,259 epoch 7 - iter 3/7 - loss 0.60950941 - samples/sec: 20.36 - lr: 0.020000\n",
+      "2021-09-21 19:42:11,306 epoch 7 - iter 4/7 - loss 0.58613612 - samples/sec: 21.09 - lr: 0.020000\n",
+      "2021-09-21 19:42:11,356 epoch 7 - iter 5/7 - loss 0.59270473 - samples/sec: 20.50 - lr: 0.020000\n",
+      "2021-09-21 19:42:11,412 epoch 7 - iter 6/7 - loss 0.60124799 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 19:42:11,464 epoch 7 - iter 7/7 - loss 0.57414014 - samples/sec: 19.48 - lr: 0.020000\n",
+      "2021-09-21 19:42:11,465 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:11,465 EPOCH 7 done: loss 0.5741 - lr 0.0200000\n",
+      "2021-09-21 19:42:11,497 DEV : loss 0.45663636922836304 - score 0.0\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:42:11,498 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:42:11,500 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:11,570 epoch 8 - iter 1/7 - loss 0.27045497 - samples/sec: 20.33 - lr: 0.010000\n",
+      "2021-09-21 19:42:11,617 epoch 8 - iter 2/7 - loss 0.33158210 - samples/sec: 21.51 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:07:29,455 epoch 8 - iter 5/7 - loss 0.51448824 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,505 epoch 8 - iter 6/7 - loss 0.46012976 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,556 epoch 8 - iter 7/7 - loss 0.45888928 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:29,557 EPOCH 8 done: loss 0.4589 - lr 0.0200000\n",
-      "2021-09-08 11:07:29,590 DEV : loss 0.3353467881679535 - score 0.0\n",
-      "2021-09-08 11:07:29,591 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:07:29,595 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:29,658 epoch 9 - iter 1/7 - loss 0.24357755 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,710 epoch 9 - iter 2/7 - loss 0.28551076 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,760 epoch 9 - iter 3/7 - loss 0.27375040 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,811 epoch 9 - iter 4/7 - loss 0.34620854 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,862 epoch 9 - iter 5/7 - loss 0.32309596 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,913 epoch 9 - iter 6/7 - loss 0.35829404 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,963 epoch 9 - iter 7/7 - loss 0.36715966 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 11:07:29,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:29,964 EPOCH 9 done: loss 0.3672 - lr 0.0200000\n",
-      "2021-09-08 11:07:29,996 DEV : loss 0.26640585064888 - score 0.0\n",
-      "2021-09-08 11:07:29,997 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:07:30,000 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:30,063 epoch 10 - iter 1/7 - loss 0.15173031 - samples/sec: 20.07 - lr: 0.020000\n",
-      "2021-09-08 11:07:30,114 epoch 10 - iter 2/7 - loss 0.36387748 - samples/sec: 20.06 - lr: 0.020000\n",
-      "2021-09-08 11:07:30,164 epoch 10 - iter 3/7 - loss 0.28871532 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 11:07:30,211 epoch 10 - iter 4/7 - loss 0.24745322 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:07:30,261 epoch 10 - iter 5/7 - loss 0.26367252 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 11:07:30,313 epoch 10 - iter 6/7 - loss 0.30532288 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 11:07:30,365 epoch 10 - iter 7/7 - loss 0.28572551 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 11:07:30,366 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:30,366 EPOCH 10 done: loss 0.2857 - lr 0.0200000\n",
-      "2021-09-08 11:07:30,400 DEV : loss 0.45554232597351074 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:07:30,400 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:07:34,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:07:34,735 Testing using best model ...\n",
-      "2021-09-08 11:07:34,736 loading file None/best-model.pt\n",
+      "2021-09-21 19:42:11,666 epoch 8 - iter 3/7 - loss 0.46780721 - samples/sec: 20.23 - lr: 0.010000\n",
+      "2021-09-21 19:42:11,717 epoch 8 - iter 4/7 - loss 0.45390096 - samples/sec: 20.00 - lr: 0.010000\n",
+      "2021-09-21 19:42:11,767 epoch 8 - iter 5/7 - loss 0.44466392 - samples/sec: 20.28 - lr: 0.010000\n",
+      "2021-09-21 19:42:11,822 epoch 8 - iter 6/7 - loss 0.46973271 - samples/sec: 18.26 - lr: 0.010000\n",
+      "2021-09-21 19:42:11,870 epoch 8 - iter 7/7 - loss 0.45780971 - samples/sec: 21.00 - lr: 0.010000\n",
+      "2021-09-21 19:42:11,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:11,871 EPOCH 8 done: loss 0.4578 - lr 0.0100000\n",
+      "2021-09-21 19:42:11,903 DEV : loss 0.4886457622051239 - score 0.0\n",
+      "2021-09-21 19:42:11,903 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:42:11,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:11,979 epoch 9 - iter 1/7 - loss 0.23173934 - samples/sec: 20.21 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,032 epoch 9 - iter 2/7 - loss 0.29837234 - samples/sec: 19.34 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,082 epoch 9 - iter 3/7 - loss 0.27660964 - samples/sec: 20.07 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,132 epoch 9 - iter 4/7 - loss 0.26774702 - samples/sec: 20.01 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,181 epoch 9 - iter 5/7 - loss 0.28397539 - samples/sec: 20.49 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,232 epoch 9 - iter 6/7 - loss 0.35374682 - samples/sec: 20.03 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,279 epoch 9 - iter 7/7 - loss 0.37132421 - samples/sec: 21.50 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:12,280 EPOCH 9 done: loss 0.3713 - lr 0.0100000\n",
+      "2021-09-21 19:42:12,316 DEV : loss 0.7299208045005798 - score 0.0\n",
+      "2021-09-21 19:42:12,316 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:42:12,318 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:12,387 epoch 10 - iter 1/7 - loss 0.29082265 - samples/sec: 20.41 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,440 epoch 10 - iter 2/7 - loss 0.45096023 - samples/sec: 19.16 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,490 epoch 10 - iter 3/7 - loss 0.42793415 - samples/sec: 20.29 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,540 epoch 10 - iter 4/7 - loss 0.41817478 - samples/sec: 20.01 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,596 epoch 10 - iter 5/7 - loss 0.39395509 - samples/sec: 18.05 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,645 epoch 10 - iter 6/7 - loss 0.36609933 - samples/sec: 20.58 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,693 epoch 10 - iter 7/7 - loss 0.36050586 - samples/sec: 21.12 - lr: 0.010000\n",
+      "2021-09-21 19:42:12,694 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:12,694 EPOCH 10 done: loss 0.3605 - lr 0.0100000\n",
+      "2021-09-21 19:42:12,728 DEV : loss 0.7711986303329468 - score 0.0\n",
+      "2021-09-21 19:42:12,729 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:42:16,629 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:16,630 Testing using best model ...\n",
+      "2021-09-21 19:42:16,631 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:07:39,358 \t0.0\n",
-      "2021-09-08 11:07:39,359 \n",
+      "2021-09-21 19:42:21,469 \t0.0\n",
+      "2021-09-21 19:42:21,469 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -7028,35 +7067,35 @@
       "                                            the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
       "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         0\n",
       "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         0\n",
-      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         0\n",
       "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
+      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
       "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
       "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         0\n",
-      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         1\n",
+      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                                                 micro avg     0.0000    0.0000    0.0000         1\n",
       "                                                                 macro avg     0.0000    0.0000    0.0000         1\n",
       "                                                              weighted avg     0.0000    0.0000    0.0000         1\n",
       "                                                               samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:07:39,359 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:24,314 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:42:21,470 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:11,817 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:08:28,273 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:43:16,019 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 23109.11it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 30012.91it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:08:28,275 [b'the emotion of great happiness', b'a strong positive emotion of regard and affection', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being', b'strong feelings of dislike']\n"
+      "2021-09-21 19:43:16,021 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'strong feelings of dislike', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'remorse caused by feeling responsible for some offense']\n"
      ]
     },
     {
@@ -7070,8 +7109,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:08:28,873 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:28,875 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:43:17,523 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,525 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7384,202 +7423,203 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:08:28,876 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:28,876 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:08:28,876 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:28,877 Parameters:\n",
-      "2021-09-08 11:08:28,877  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:08:28,877  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:08:28,877  - patience: \"3\"\n",
-      "2021-09-08 11:08:28,878  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:08:28,878  - max_epochs: \"10\"\n",
-      "2021-09-08 11:08:28,878  - shuffle: \"True\"\n",
-      "2021-09-08 11:08:28,879  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:08:28,879  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:08:28,879 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:28,879 Model training base path: \"None\"\n",
-      "2021-09-08 11:08:28,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:28,880 Device: cuda:1\n",
-      "2021-09-08 11:08:28,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:28,880 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:08:29,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:29,698 epoch 1 - iter 1/7 - loss 0.60864562 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 11:08:29,749 epoch 1 - iter 2/7 - loss 0.58315170 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 11:08:29,801 epoch 1 - iter 3/7 - loss 0.57492143 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 11:08:29,852 epoch 1 - iter 4/7 - loss 0.66735429 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 11:08:29,903 epoch 1 - iter 5/7 - loss 0.67432432 - samples/sec: 19.51 - lr: 0.020000\n",
-      "2021-09-08 11:08:29,954 epoch 1 - iter 6/7 - loss 0.68164301 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 11:08:30,005 epoch 1 - iter 7/7 - loss 0.67848380 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 11:08:30,005 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:30,006 EPOCH 1 done: loss 0.6785 - lr 0.0200000\n",
-      "2021-09-08 11:08:30,822 DEV : loss 0.17850683629512787 - score 0.0\n",
-      "2021-09-08 11:08:30,823 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:43:17,525 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,526 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:43:17,526 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,526 Parameters:\n",
+      "2021-09-21 19:43:17,527  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:43:17,527  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:43:17,527  - patience: \"3\"\n",
+      "2021-09-21 19:43:17,527  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:43:17,528  - max_epochs: \"10\"\n",
+      "2021-09-21 19:43:17,528  - shuffle: \"True\"\n",
+      "2021-09-21 19:43:17,528  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:43:17,528  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:43:17,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,529 Model training base path: \"None\"\n",
+      "2021-09-21 19:43:17,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,530 Device: cuda:0\n",
+      "2021-09-21 19:43:17,530 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,530 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:43:17,560 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,631 epoch 1 - iter 1/7 - loss 0.52132100 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 19:43:17,694 epoch 1 - iter 2/7 - loss 0.61374265 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 19:43:17,755 epoch 1 - iter 3/7 - loss 0.61941673 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 19:43:17,808 epoch 1 - iter 4/7 - loss 0.69024685 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 19:43:17,865 epoch 1 - iter 5/7 - loss 0.69179558 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 19:43:17,921 epoch 1 - iter 6/7 - loss 0.67155075 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 19:43:17,976 epoch 1 - iter 7/7 - loss 0.66477620 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 19:43:17,977 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,977 EPOCH 1 done: loss 0.6648 - lr 0.0200000\n",
+      "2021-09-21 19:43:18,006 DEV : loss 0.4704247713088989 - score 0.0\n",
+      "2021-09-21 19:43:18,006 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:08:34,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:35,039 epoch 2 - iter 1/7 - loss 0.59565681 - samples/sec: 18.33 - lr: 0.020000\n",
-      "2021-09-08 11:08:35,089 epoch 2 - iter 2/7 - loss 0.76786825 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 11:08:35,139 epoch 2 - iter 3/7 - loss 0.72072069 - samples/sec: 20.05 - lr: 0.020000\n",
-      "2021-09-08 11:08:35,193 epoch 2 - iter 4/7 - loss 0.69716503 - samples/sec: 18.88 - lr: 0.020000\n",
-      "2021-09-08 11:08:35,244 epoch 2 - iter 5/7 - loss 0.68188875 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 11:08:35,295 epoch 2 - iter 6/7 - loss 0.66457898 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 11:08:35,346 epoch 2 - iter 7/7 - loss 0.72400417 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 11:08:35,347 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:35,347 EPOCH 2 done: loss 0.7240 - lr 0.0200000\n",
-      "2021-09-08 11:08:35,376 DEV : loss 0.1584436297416687 - score 0.0\n",
-      "2021-09-08 11:08:35,377 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:43:29,333 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:29,407 epoch 2 - iter 1/7 - loss 0.55772388 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 19:43:29,459 epoch 2 - iter 2/7 - loss 0.66091007 - samples/sec: 19.33 - lr: 0.020000\n",
+      "2021-09-21 19:43:29,511 epoch 2 - iter 3/7 - loss 0.61457419 - samples/sec: 19.41 - lr: 0.020000\n",
+      "2021-09-21 19:43:29,564 epoch 2 - iter 4/7 - loss 0.69188541 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 19:43:29,617 epoch 2 - iter 5/7 - loss 0.69081517 - samples/sec: 19.06 - lr: 0.020000\n",
+      "2021-09-21 19:43:29,669 epoch 2 - iter 6/7 - loss 0.69476390 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 19:43:29,719 epoch 2 - iter 7/7 - loss 0.68250454 - samples/sec: 20.15 - lr: 0.020000\n",
+      "2021-09-21 19:43:29,720 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:29,720 EPOCH 2 done: loss 0.6825 - lr 0.0200000\n",
+      "2021-09-21 19:43:29,757 DEV : loss 0.381639689207077 - score 0.0\n",
+      "2021-09-21 19:43:29,758 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:08:39,613 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:39,680 epoch 3 - iter 1/7 - loss 0.71699572 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 11:08:39,732 epoch 3 - iter 2/7 - loss 0.69009158 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 11:08:39,782 epoch 3 - iter 3/7 - loss 0.67402383 - samples/sec: 20.07 - lr: 0.020000\n",
-      "2021-09-08 11:08:39,836 epoch 3 - iter 4/7 - loss 0.69932625 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 11:08:39,889 epoch 3 - iter 5/7 - loss 0.70721296 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 11:08:39,937 epoch 3 - iter 6/7 - loss 0.70034931 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 11:08:39,987 epoch 3 - iter 7/7 - loss 0.70345898 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 11:08:39,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:39,989 EPOCH 3 done: loss 0.7035 - lr 0.0200000\n",
-      "2021-09-08 11:08:40,018 DEV : loss 0.6880404949188232 - score 0.0\n",
-      "2021-09-08 11:08:40,019 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:08:40,023 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:40,084 epoch 4 - iter 1/7 - loss 0.71472472 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,131 epoch 4 - iter 2/7 - loss 0.74045870 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,183 epoch 4 - iter 3/7 - loss 0.75431122 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,231 epoch 4 - iter 4/7 - loss 0.72130911 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,278 epoch 4 - iter 5/7 - loss 0.68961624 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,329 epoch 4 - iter 6/7 - loss 0.68334764 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,380 epoch 4 - iter 7/7 - loss 0.66251091 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:40,381 EPOCH 4 done: loss 0.6625 - lr 0.0200000\n",
-      "2021-09-08 11:08:40,413 DEV : loss 0.2989574074745178 - score 0.0\n",
-      "2021-09-08 11:08:40,413 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:08:40,417 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:40,483 epoch 5 - iter 1/7 - loss 0.73651582 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,530 epoch 5 - iter 2/7 - loss 0.66792738 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,577 epoch 5 - iter 3/7 - loss 0.63729354 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,627 epoch 5 - iter 4/7 - loss 0.62738295 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,678 epoch 5 - iter 5/7 - loss 0.64160478 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,728 epoch 5 - iter 6/7 - loss 0.63971240 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,776 epoch 5 - iter 7/7 - loss 0.63739099 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:40,778 EPOCH 5 done: loss 0.6374 - lr 0.0200000\n",
-      "2021-09-08 11:08:40,807 DEV : loss 0.4580262303352356 - score 0.0\n",
-      "2021-09-08 11:08:40,808 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:08:40,811 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:40,875 epoch 6 - iter 1/7 - loss 0.62621003 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,922 epoch 6 - iter 2/7 - loss 0.62603599 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 11:08:40,973 epoch 6 - iter 3/7 - loss 0.58088720 - samples/sec: 19.86 - lr: 0.020000\n",
-      "2021-09-08 11:08:41,020 epoch 6 - iter 4/7 - loss 0.59589313 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 11:08:41,070 epoch 6 - iter 5/7 - loss 0.61658974 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 11:08:41,120 epoch 6 - iter 6/7 - loss 0.63638063 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 11:08:41,172 epoch 6 - iter 7/7 - loss 0.63298036 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 11:08:41,173 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:41,173 EPOCH 6 done: loss 0.6330 - lr 0.0200000\n",
-      "2021-09-08 11:08:41,205 DEV : loss 0.1192137822508812 - score 0.0\n",
-      "2021-09-08 11:08:41,206 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:43:34,037 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:34,111 epoch 3 - iter 1/7 - loss 0.67824674 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 19:43:34,163 epoch 3 - iter 2/7 - loss 0.68226516 - samples/sec: 19.69 - lr: 0.020000\n",
+      "2021-09-21 19:43:34,214 epoch 3 - iter 3/7 - loss 0.68942902 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 19:43:34,263 epoch 3 - iter 4/7 - loss 0.68731128 - samples/sec: 20.46 - lr: 0.020000\n",
+      "2021-09-21 19:43:34,314 epoch 3 - iter 5/7 - loss 0.69025080 - samples/sec: 19.61 - lr: 0.020000\n",
+      "2021-09-21 19:43:34,363 epoch 3 - iter 6/7 - loss 0.67307012 - samples/sec: 20.70 - lr: 0.020000\n",
+      "2021-09-21 19:43:34,413 epoch 3 - iter 7/7 - loss 0.67443720 - samples/sec: 20.30 - lr: 0.020000\n",
+      "2021-09-21 19:43:34,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:34,414 EPOCH 3 done: loss 0.6744 - lr 0.0200000\n",
+      "2021-09-21 19:43:34,704 DEV : loss 0.2625247836112976 - score 0.0\n",
+      "2021-09-21 19:43:34,705 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:08:45,340 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:45,409 epoch 7 - iter 1/7 - loss 0.81221598 - samples/sec: 18.62 - lr: 0.020000\n"
+      "2021-09-21 19:43:39,352 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:39,420 epoch 4 - iter 1/7 - loss 0.55408758 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,475 epoch 4 - iter 2/7 - loss 0.61927664 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,526 epoch 4 - iter 3/7 - loss 0.63257694 - samples/sec: 19.59 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,579 epoch 4 - iter 4/7 - loss 0.63037965 - samples/sec: 18.91 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,628 epoch 4 - iter 5/7 - loss 0.65076345 - samples/sec: 20.98 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,680 epoch 4 - iter 6/7 - loss 0.64942903 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,733 epoch 4 - iter 7/7 - loss 0.65111574 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:39,734 EPOCH 4 done: loss 0.6511 - lr 0.0200000\n",
+      "2021-09-21 19:43:39,772 DEV : loss 0.4750176966190338 - score 0.0\n",
+      "2021-09-21 19:43:39,773 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:43:39,789 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:39,857 epoch 5 - iter 1/7 - loss 0.60694337 - samples/sec: 20.70 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,906 epoch 5 - iter 2/7 - loss 0.54635085 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 19:43:39,961 epoch 5 - iter 3/7 - loss 0.56132463 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,011 epoch 5 - iter 4/7 - loss 0.55208705 - samples/sec: 20.09 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,063 epoch 5 - iter 5/7 - loss 0.55963716 - samples/sec: 19.43 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,114 epoch 5 - iter 6/7 - loss 0.55630325 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,163 epoch 5 - iter 7/7 - loss 0.56084723 - samples/sec: 20.58 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,164 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:40,164 EPOCH 5 done: loss 0.5608 - lr 0.0200000\n",
+      "2021-09-21 19:43:40,204 DEV : loss 0.35845014452934265 - score 0.0\n",
+      "2021-09-21 19:43:40,204 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:43:40,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:40,280 epoch 6 - iter 1/7 - loss 0.40555534 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,333 epoch 6 - iter 2/7 - loss 0.61513595 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,383 epoch 6 - iter 3/7 - loss 0.66962076 - samples/sec: 20.39 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,433 epoch 6 - iter 4/7 - loss 0.66563610 - samples/sec: 20.17 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,485 epoch 6 - iter 5/7 - loss 0.70432442 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,535 epoch 6 - iter 6/7 - loss 0.66329190 - samples/sec: 20.43 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,586 epoch 6 - iter 7/7 - loss 0.63265714 - samples/sec: 19.56 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:40,587 EPOCH 6 done: loss 0.6327 - lr 0.0200000\n",
+      "2021-09-21 19:43:40,621 DEV : loss 0.32975828647613525 - score 0.0\n",
+      "2021-09-21 19:43:40,621 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:43:40,623 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:40,689 epoch 7 - iter 1/7 - loss 0.26023245 - samples/sec: 21.32 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:08:45,460 epoch 7 - iter 2/7 - loss 0.73073646 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,513 epoch 7 - iter 3/7 - loss 0.69341795 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,564 epoch 7 - iter 4/7 - loss 0.69134994 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,615 epoch 7 - iter 5/7 - loss 0.78480407 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,665 epoch 7 - iter 6/7 - loss 0.75109014 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,712 epoch 7 - iter 7/7 - loss 0.73349137 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,713 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:45,713 EPOCH 7 done: loss 0.7335 - lr 0.0200000\n",
-      "2021-09-08 11:08:45,743 DEV : loss 0.5160879492759705 - score 0.0\n",
-      "2021-09-08 11:08:45,743 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:08:45,748 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:45,809 epoch 8 - iter 1/7 - loss 0.61771905 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,860 epoch 8 - iter 2/7 - loss 0.60428897 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,907 epoch 8 - iter 3/7 - loss 0.60088978 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:08:45,954 epoch 8 - iter 4/7 - loss 0.61417988 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,001 epoch 8 - iter 5/7 - loss 0.64943537 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,049 epoch 8 - iter 6/7 - loss 0.64537617 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,103 epoch 8 - iter 7/7 - loss 0.64840208 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,104 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:46,104 EPOCH 8 done: loss 0.6484 - lr 0.0200000\n",
-      "2021-09-08 11:08:46,132 DEV : loss 0.46143391728401184 - score 0.0\n",
-      "2021-09-08 11:08:46,133 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:08:46,135 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:46,196 epoch 9 - iter 1/7 - loss 0.69005442 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,247 epoch 9 - iter 2/7 - loss 0.65877989 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,295 epoch 9 - iter 3/7 - loss 0.64624671 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,342 epoch 9 - iter 4/7 - loss 0.63842750 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,389 epoch 9 - iter 5/7 - loss 0.64461478 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,440 epoch 9 - iter 6/7 - loss 0.62531922 - samples/sec: 20.07 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,487 epoch 9 - iter 7/7 - loss 0.63420576 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,488 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:46,488 EPOCH 9 done: loss 0.6342 - lr 0.0200000\n",
-      "2021-09-08 11:08:46,517 DEV : loss 0.5488693714141846 - score 0.0\n",
-      "2021-09-08 11:08:46,518 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:08:46,520 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:46,582 epoch 10 - iter 1/7 - loss 0.63430077 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,630 epoch 10 - iter 2/7 - loss 0.65195039 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,677 epoch 10 - iter 3/7 - loss 0.65507642 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,724 epoch 10 - iter 4/7 - loss 0.65826879 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,771 epoch 10 - iter 5/7 - loss 0.65806508 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,818 epoch 10 - iter 6/7 - loss 0.67110553 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,867 epoch 10 - iter 7/7 - loss 0.67030795 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 11:08:46,868 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:46,868 EPOCH 10 done: loss 0.6703 - lr 0.0200000\n",
-      "2021-09-08 11:08:46,897 DEV : loss 0.5512229204177856 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:08:46,898 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:08:50,541 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:08:50,542 Testing using best model ...\n",
-      "2021-09-08 11:08:50,544 loading file None/best-model.pt\n",
+      "2021-09-21 19:43:40,743 epoch 7 - iter 2/7 - loss 0.40829420 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,794 epoch 7 - iter 3/7 - loss 0.42622753 - samples/sec: 19.63 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,850 epoch 7 - iter 4/7 - loss 0.45876612 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,899 epoch 7 - iter 5/7 - loss 0.48476251 - samples/sec: 20.49 - lr: 0.020000\n",
+      "2021-09-21 19:43:40,950 epoch 7 - iter 6/7 - loss 0.46873008 - samples/sec: 19.92 - lr: 0.020000\n",
+      "2021-09-21 19:43:41,002 epoch 7 - iter 7/7 - loss 0.47440519 - samples/sec: 19.45 - lr: 0.020000\n",
+      "2021-09-21 19:43:41,003 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:41,003 EPOCH 7 done: loss 0.4744 - lr 0.0200000\n",
+      "2021-09-21 19:43:41,036 DEV : loss 0.46890729665756226 - score 0.0\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:43:41,036 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:43:41,038 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:41,105 epoch 8 - iter 1/7 - loss 0.35784188 - samples/sec: 20.22 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,153 epoch 8 - iter 2/7 - loss 0.57615094 - samples/sec: 20.94 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,200 epoch 8 - iter 3/7 - loss 0.51874188 - samples/sec: 21.52 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,255 epoch 8 - iter 4/7 - loss 0.47167138 - samples/sec: 18.43 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,308 epoch 8 - iter 5/7 - loss 0.52009413 - samples/sec: 19.11 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,358 epoch 8 - iter 6/7 - loss 0.54009527 - samples/sec: 19.94 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,411 epoch 8 - iter 7/7 - loss 0.50507683 - samples/sec: 19.30 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,412 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:41,412 EPOCH 8 done: loss 0.5051 - lr 0.0100000\n",
+      "2021-09-21 19:43:41,446 DEV : loss 0.5427753925323486 - score 0.0\n",
+      "2021-09-21 19:43:41,446 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:43:41,448 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:41,515 epoch 9 - iter 1/7 - loss 0.48464882 - samples/sec: 20.18 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,566 epoch 9 - iter 2/7 - loss 0.46276926 - samples/sec: 20.10 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,617 epoch 9 - iter 3/7 - loss 0.55171751 - samples/sec: 19.58 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,667 epoch 9 - iter 4/7 - loss 0.50059840 - samples/sec: 20.40 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,719 epoch 9 - iter 5/7 - loss 0.47092959 - samples/sec: 19.07 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,768 epoch 9 - iter 6/7 - loss 0.42734218 - samples/sec: 20.81 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,821 epoch 9 - iter 7/7 - loss 0.48614589 - samples/sec: 18.84 - lr: 0.010000\n",
+      "2021-09-21 19:43:41,822 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:41,823 EPOCH 9 done: loss 0.4861 - lr 0.0100000\n",
+      "2021-09-21 19:43:41,859 DEV : loss 0.24925899505615234 - score 0.0\n",
+      "2021-09-21 19:43:41,859 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:43:46,319 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:46,385 epoch 10 - iter 1/7 - loss 0.34696117 - samples/sec: 19.93 - lr: 0.010000\n",
+      "2021-09-21 19:43:46,432 epoch 10 - iter 2/7 - loss 0.29592311 - samples/sec: 21.41 - lr: 0.010000\n",
+      "2021-09-21 19:43:46,480 epoch 10 - iter 3/7 - loss 0.38763185 - samples/sec: 21.25 - lr: 0.010000\n",
+      "2021-09-21 19:43:46,523 epoch 10 - iter 4/7 - loss 0.32256214 - samples/sec: 23.22 - lr: 0.010000\n",
+      "2021-09-21 19:43:46,567 epoch 10 - iter 5/7 - loss 0.28342925 - samples/sec: 23.00 - lr: 0.010000\n",
+      "2021-09-21 19:43:46,614 epoch 10 - iter 6/7 - loss 0.33572209 - samples/sec: 21.44 - lr: 0.010000\n",
+      "2021-09-21 19:43:46,660 epoch 10 - iter 7/7 - loss 0.31464245 - samples/sec: 21.72 - lr: 0.010000\n",
+      "2021-09-21 19:43:46,661 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:46,662 EPOCH 10 done: loss 0.3146 - lr 0.0100000\n",
+      "2021-09-21 19:43:46,690 DEV : loss 0.5420421957969666 - score 0.0\n",
+      "2021-09-21 19:43:46,691 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:43:55,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:55,955 Testing using best model ...\n",
+      "2021-09-21 19:43:55,956 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:08:55,096 \t0.0\n",
-      "2021-09-08 11:08:55,097 \n",
+      "2021-09-21 19:44:01,054 \t0.0\n",
+      "2021-09-21 19:44:01,055 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
       "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
-      "                                                                        precision    recall  f1-score   support\n",
+      "                                                                            precision    recall  f1-score   support\n",
       "\n",
-      "                                        the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
-      "                     a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         0\n",
-      "an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
-      "     a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
-      "                remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
-      "  a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         0\n",
-      "                emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         0\n",
-      "                                            strong feelings of dislike     0.0000    0.0000    0.0000         1\n",
+      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
+      "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         0\n",
+      "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         0\n",
+      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         0\n",
+      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
+      "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
+      "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         0\n",
+      "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                             micro avg     0.0000    0.0000    0.0000         1\n",
-      "                                                             macro avg     0.0000    0.0000    0.0000         1\n",
-      "                                                          weighted avg     0.0000    0.0000    0.0000         1\n",
-      "                                                           samples avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                 micro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                 macro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                              weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                                                               samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:08:55,097 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:38,935 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
+      "2021-09-21 19:44:01,055 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:50,972 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_semeval/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:09:42,875 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:44:54,994 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 20585.54it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 27060.03it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:09:42,877 [b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'an emotion experienced in anticipation of some specific pain or danger', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being', b'strong feelings of dislike']\n",
-      "2021-09-08 11:09:42,887 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:42,889 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:44:54,996 [b'the emotion of great happiness', b'the astonishment you feel when something totally unexpected happens to you', b'a strong positive emotion of regard and affection', b'a feeling that is oriented toward some real or supposed grievance', b'remorse caused by feeling responsible for some offense', b'a painful emotion resulting from an awareness of inadequacy or guilt', b'emotions experienced when not in a state of well-being', b'an emotion experienced in anticipation of some specific pain or danger']\n",
+      "2021-09-21 19:44:55,122 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,123 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7892,28 +7932,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:09:42,890 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:42,890 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:09:42,890 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:42,890 Parameters:\n",
-      "2021-09-08 11:09:42,891  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:09:42,891  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:09:42,891  - patience: \"3\"\n",
-      "2021-09-08 11:09:42,891  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:09:42,892  - max_epochs: \"10\"\n",
-      "2021-09-08 11:09:42,892  - shuffle: \"True\"\n",
-      "2021-09-08 11:09:42,892  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:09:42,893  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:09:42,893 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:42,893 Model training base path: \"None\"\n",
-      "2021-09-08 11:09:42,893 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:42,894 Device: cuda:1\n",
-      "2021-09-08 11:09:42,894 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:42,894 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:09:42,901 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:42,958 epoch 1 - iter 1/7 - loss 0.64207721 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 11:09:43,009 epoch 1 - iter 2/7 - loss 0.65142497 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:09:43,060 epoch 1 - iter 3/7 - loss 0.77492120 - samples/sec: 19.87 - lr: 0.020000\n"
+      "2021-09-21 19:44:55,124 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,124 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 19:44:55,125 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,125 Parameters:\n",
+      "2021-09-21 19:44:55,125  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:44:55,126  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:44:55,126  - patience: \"3\"\n",
+      "2021-09-21 19:44:55,126  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:44:55,126  - max_epochs: \"10\"\n",
+      "2021-09-21 19:44:55,127  - shuffle: \"True\"\n",
+      "2021-09-21 19:44:55,127  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:44:55,127  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:44:55,128 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,128 Model training base path: \"None\"\n",
+      "2021-09-21 19:44:55,128 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,128 Device: cuda:0\n",
+      "2021-09-21 19:44:55,129 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,129 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -7927,138 +7963,142 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:09:43,110 epoch 1 - iter 4/7 - loss 0.74021992 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 11:09:43,161 epoch 1 - iter 5/7 - loss 0.76784865 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 11:09:43,211 epoch 1 - iter 6/7 - loss 0.74421229 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 11:09:43,263 epoch 1 - iter 7/7 - loss 0.73559994 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 11:09:43,264 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:43,265 EPOCH 1 done: loss 0.7356 - lr 0.0200000\n",
-      "2021-09-08 11:09:43,292 DEV : loss 0.37830764055252075 - score 0.0\n",
-      "2021-09-08 11:09:43,293 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:44:55,307 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,363 epoch 1 - iter 1/7 - loss 0.60555166 - samples/sec: 24.69 - lr: 0.020000\n",
+      "2021-09-21 19:44:55,410 epoch 1 - iter 2/7 - loss 0.56664020 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 19:44:55,457 epoch 1 - iter 3/7 - loss 0.59134124 - samples/sec: 21.50 - lr: 0.020000\n",
+      "2021-09-21 19:44:55,507 epoch 1 - iter 4/7 - loss 0.60578659 - samples/sec: 20.13 - lr: 0.020000\n",
+      "2021-09-21 19:44:55,556 epoch 1 - iter 5/7 - loss 0.61030957 - samples/sec: 20.60 - lr: 0.020000\n",
+      "2021-09-21 19:44:55,603 epoch 1 - iter 6/7 - loss 0.61960044 - samples/sec: 21.45 - lr: 0.020000\n",
+      "2021-09-21 19:44:55,650 epoch 1 - iter 7/7 - loss 0.63522159 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 19:44:55,651 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:55,652 EPOCH 1 done: loss 0.6352 - lr 0.0200000\n",
+      "2021-09-21 19:44:55,794 DEV : loss 0.38814830780029297 - score 0.0\n",
+      "2021-09-21 19:44:55,794 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:09:47,354 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:47,421 epoch 2 - iter 1/7 - loss 0.79370993 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 11:09:47,472 epoch 2 - iter 2/7 - loss 0.71344382 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:09:47,523 epoch 2 - iter 3/7 - loss 0.69287167 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 11:09:47,573 epoch 2 - iter 4/7 - loss 0.68071146 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 11:09:47,623 epoch 2 - iter 5/7 - loss 0.70359428 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 11:09:47,674 epoch 2 - iter 6/7 - loss 0.73121008 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 11:09:47,724 epoch 2 - iter 7/7 - loss 0.72734480 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 11:09:47,725 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:47,725 EPOCH 2 done: loss 0.7273 - lr 0.0200000\n",
-      "2021-09-08 11:09:47,753 DEV : loss 0.30222779512405396 - score 0.0\n",
-      "2021-09-08 11:09:47,754 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:45:01,112 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:01,220 epoch 2 - iter 1/7 - loss 0.62501943 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,274 epoch 2 - iter 2/7 - loss 0.70600349 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,322 epoch 2 - iter 3/7 - loss 0.65625829 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,369 epoch 2 - iter 4/7 - loss 0.64789447 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,418 epoch 2 - iter 5/7 - loss 0.65815747 - samples/sec: 20.80 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,469 epoch 2 - iter 6/7 - loss 0.66108661 - samples/sec: 19.76 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,517 epoch 2 - iter 7/7 - loss 0.65311107 - samples/sec: 21.02 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,518 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:01,518 EPOCH 2 done: loss 0.6531 - lr 0.0200000\n",
+      "2021-09-21 19:45:01,549 DEV : loss 0.5142110586166382 - score 0.0\n",
+      "2021-09-21 19:45:01,550 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:45:01,552 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:01,615 epoch 3 - iter 1/7 - loss 0.63701969 - samples/sec: 20.77 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,665 epoch 3 - iter 2/7 - loss 0.69464642 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,715 epoch 3 - iter 3/7 - loss 0.71671482 - samples/sec: 20.31 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,763 epoch 3 - iter 4/7 - loss 0.68758735 - samples/sec: 20.90 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,812 epoch 3 - iter 5/7 - loss 0.68759934 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,862 epoch 3 - iter 6/7 - loss 0.67915497 - samples/sec: 20.57 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,911 epoch 3 - iter 7/7 - loss 0.66620419 - samples/sec: 20.54 - lr: 0.020000\n",
+      "2021-09-21 19:45:01,912 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:01,912 EPOCH 3 done: loss 0.6662 - lr 0.0200000\n",
+      "2021-09-21 19:45:02,000 DEV : loss 0.3826568126678467 - score 0.0\n",
+      "2021-09-21 19:45:02,001 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:09:51,801 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:51,869 epoch 3 - iter 1/7 - loss 0.57876080 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 11:09:51,920 epoch 3 - iter 2/7 - loss 0.60657030 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 11:09:51,971 epoch 3 - iter 3/7 - loss 0.69675100 - samples/sec: 19.86 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,021 epoch 3 - iter 4/7 - loss 0.70777550 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,071 epoch 3 - iter 5/7 - loss 0.68339864 - samples/sec: 20.13 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,122 epoch 3 - iter 6/7 - loss 0.66747149 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,172 epoch 3 - iter 7/7 - loss 0.65604161 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,173 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:52,174 EPOCH 3 done: loss 0.6560 - lr 0.0200000\n",
-      "2021-09-08 11:09:52,202 DEV : loss 0.3276754319667816 - score 0.0\n",
-      "2021-09-08 11:09:52,203 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:09:52,206 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:52,270 epoch 4 - iter 1/7 - loss 0.62405413 - samples/sec: 20.12 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,317 epoch 4 - iter 2/7 - loss 0.53678378 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,364 epoch 4 - iter 3/7 - loss 0.55933269 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,411 epoch 4 - iter 4/7 - loss 0.56980342 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,460 epoch 4 - iter 5/7 - loss 0.58822699 - samples/sec: 20.67 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,507 epoch 4 - iter 6/7 - loss 0.60819054 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,554 epoch 4 - iter 7/7 - loss 0.61011835 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,555 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:52,555 EPOCH 4 done: loss 0.6101 - lr 0.0200000\n",
-      "2021-09-08 11:09:52,583 DEV : loss 0.43227362632751465 - score 0.0\n",
-      "2021-09-08 11:09:52,583 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:09:52,585 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:52,646 epoch 5 - iter 1/7 - loss 0.57127142 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,692 epoch 5 - iter 2/7 - loss 0.59720424 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,739 epoch 5 - iter 3/7 - loss 0.60336596 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,786 epoch 5 - iter 4/7 - loss 0.58749869 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,833 epoch 5 - iter 5/7 - loss 0.62915027 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,881 epoch 5 - iter 6/7 - loss 0.62407721 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,927 epoch 5 - iter 7/7 - loss 0.61184926 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 11:09:52,928 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:52,928 EPOCH 5 done: loss 0.6118 - lr 0.0200000\n",
-      "2021-09-08 11:09:52,958 DEV : loss 0.3207490146160126 - score 0.0\n",
-      "2021-09-08 11:09:52,958 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:09:52,961 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:53,022 epoch 6 - iter 1/7 - loss 0.82843328 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:09:53,068 epoch 6 - iter 2/7 - loss 0.69028088 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 11:09:53,115 epoch 6 - iter 3/7 - loss 0.65729624 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:09:53,163 epoch 6 - iter 4/7 - loss 0.60904355 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 11:09:53,210 epoch 6 - iter 5/7 - loss 0.64472567 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 11:09:53,257 epoch 6 - iter 6/7 - loss 0.61854026 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 11:09:53,303 epoch 6 - iter 7/7 - loss 0.60306040 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 11:09:53,304 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:53,305 EPOCH 6 done: loss 0.6031 - lr 0.0200000\n",
-      "2021-09-08 11:09:53,333 DEV : loss 0.3182356655597687 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:09:53,334 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:09:53,337 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:53,398 epoch 7 - iter 1/7 - loss 0.38768184 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,445 epoch 7 - iter 2/7 - loss 0.44170164 - samples/sec: 21.58 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,492 epoch 7 - iter 3/7 - loss 0.41853721 - samples/sec: 21.56 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,539 epoch 7 - iter 4/7 - loss 0.44924314 - samples/sec: 21.46 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,585 epoch 7 - iter 5/7 - loss 0.44788340 - samples/sec: 21.69 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,632 epoch 7 - iter 6/7 - loss 0.44083759 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,679 epoch 7 - iter 7/7 - loss 0.44727228 - samples/sec: 21.59 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,680 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:53,680 EPOCH 7 done: loss 0.4473 - lr 0.0100000\n",
-      "2021-09-08 11:09:53,708 DEV : loss 0.4173254370689392 - score 0.0\n",
-      "2021-09-08 11:09:53,708 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:09:53,711 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:53,771 epoch 8 - iter 1/7 - loss 0.52616304 - samples/sec: 21.76 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,820 epoch 8 - iter 2/7 - loss 0.49645351 - samples/sec: 20.49 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,867 epoch 8 - iter 3/7 - loss 0.47005828 - samples/sec: 21.61 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,914 epoch 8 - iter 4/7 - loss 0.49478160 - samples/sec: 21.46 - lr: 0.010000\n",
-      "2021-09-08 11:09:53,961 epoch 8 - iter 5/7 - loss 0.45084201 - samples/sec: 21.72 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,007 epoch 8 - iter 6/7 - loss 0.43404581 - samples/sec: 21.75 - lr: 0.010000\n"
+      "2021-09-21 19:45:09,664 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:09,729 epoch 4 - iter 1/7 - loss 0.63972837 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 19:45:09,777 epoch 4 - iter 2/7 - loss 0.65589276 - samples/sec: 21.25 - lr: 0.020000\n",
+      "2021-09-21 19:45:09,824 epoch 4 - iter 3/7 - loss 0.63681879 - samples/sec: 21.22 - lr: 0.020000\n",
+      "2021-09-21 19:45:09,872 epoch 4 - iter 4/7 - loss 0.56921919 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 19:45:09,919 epoch 4 - iter 5/7 - loss 0.57700040 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 19:45:09,966 epoch 4 - iter 6/7 - loss 0.57720486 - samples/sec: 21.48 - lr: 0.020000\n",
+      "2021-09-21 19:45:10,014 epoch 4 - iter 7/7 - loss 0.57551136 - samples/sec: 20.93 - lr: 0.020000\n",
+      "2021-09-21 19:45:10,015 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:10,016 EPOCH 4 done: loss 0.5755 - lr 0.0200000\n",
+      "2021-09-21 19:45:14,149 DEV : loss 0.5223841667175293 - score 0.0\n",
+      "2021-09-21 19:45:14,150 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:45:14,193 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:14,268 epoch 5 - iter 1/7 - loss 0.66522878 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,318 epoch 5 - iter 2/7 - loss 0.59507799 - samples/sec: 20.00 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,370 epoch 5 - iter 3/7 - loss 0.48270654 - samples/sec: 19.63 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,421 epoch 5 - iter 4/7 - loss 0.50889499 - samples/sec: 19.50 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,473 epoch 5 - iter 5/7 - loss 0.54052826 - samples/sec: 19.61 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,522 epoch 5 - iter 6/7 - loss 0.55394193 - samples/sec: 20.54 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,576 epoch 5 - iter 7/7 - loss 0.53943818 - samples/sec: 18.63 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,577 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:14,577 EPOCH 5 done: loss 0.5394 - lr 0.0200000\n",
+      "2021-09-21 19:45:14,613 DEV : loss 0.45506131649017334 - score 0.0\n",
+      "2021-09-21 19:45:14,614 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:45:14,724 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:14,795 epoch 6 - iter 1/7 - loss 0.38916835 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,846 epoch 6 - iter 2/7 - loss 0.48305549 - samples/sec: 19.74 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,901 epoch 6 - iter 3/7 - loss 0.53177268 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 19:45:14,952 epoch 6 - iter 4/7 - loss 0.56195433 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,003 epoch 6 - iter 5/7 - loss 0.48781814 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,060 epoch 6 - iter 6/7 - loss 0.50548809 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,114 epoch 6 - iter 7/7 - loss 0.50801208 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,115 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:15,116 EPOCH 6 done: loss 0.5080 - lr 0.0200000\n",
+      "2021-09-21 19:45:15,154 DEV : loss 0.4658677279949188 - score 0.0\n",
+      "2021-09-21 19:45:15,154 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:45:15,156 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:15,228 epoch 7 - iter 1/7 - loss 0.42177212 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,281 epoch 7 - iter 2/7 - loss 0.36698477 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,336 epoch 7 - iter 3/7 - loss 0.36856478 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,390 epoch 7 - iter 4/7 - loss 0.40312472 - samples/sec: 18.83 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,438 epoch 7 - iter 5/7 - loss 0.33513007 - samples/sec: 21.17 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,487 epoch 7 - iter 6/7 - loss 0.35686343 - samples/sec: 20.40 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,543 epoch 7 - iter 7/7 - loss 0.36309297 - samples/sec: 17.84 - lr: 0.020000\n",
+      "2021-09-21 19:45:15,544 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:15,545 EPOCH 7 done: loss 0.3631 - lr 0.0200000\n",
+      "2021-09-21 19:45:15,580 DEV : loss 0.26209166646003723 - score 0.0\n",
+      "2021-09-21 19:45:15,581 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:45:19,769 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:19,838 epoch 8 - iter 1/7 - loss 0.18849505 - samples/sec: 19.80 - lr: 0.020000\n",
+      "2021-09-21 19:45:19,889 epoch 8 - iter 2/7 - loss 0.41549552 - samples/sec: 19.80 - lr: 0.020000\n",
+      "2021-09-21 19:45:19,949 epoch 8 - iter 3/7 - loss 0.48311917 - samples/sec: 16.85 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:09:54,054 epoch 8 - iter 7/7 - loss 0.40849266 - samples/sec: 21.43 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:54,055 EPOCH 8 done: loss 0.4085 - lr 0.0100000\n",
-      "2021-09-08 11:09:54,084 DEV : loss 0.36456698179244995 - score 0.0\n",
-      "2021-09-08 11:09:54,084 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:09:54,086 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:54,146 epoch 9 - iter 1/7 - loss 0.28789178 - samples/sec: 21.82 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,193 epoch 9 - iter 2/7 - loss 0.43355729 - samples/sec: 21.49 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,239 epoch 9 - iter 3/7 - loss 0.46887236 - samples/sec: 21.69 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,286 epoch 9 - iter 4/7 - loss 0.42778826 - samples/sec: 21.48 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,334 epoch 9 - iter 5/7 - loss 0.41171501 - samples/sec: 21.35 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,380 epoch 9 - iter 6/7 - loss 0.39750735 - samples/sec: 21.55 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,427 epoch 9 - iter 7/7 - loss 0.39857342 - samples/sec: 21.53 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,428 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:54,429 EPOCH 9 done: loss 0.3986 - lr 0.0100000\n",
-      "2021-09-08 11:09:54,456 DEV : loss 0.3532329797744751 - score 0.0\n",
-      "2021-09-08 11:09:54,457 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:09:54,459 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:54,522 epoch 10 - iter 1/7 - loss 0.52224463 - samples/sec: 20.45 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,569 epoch 10 - iter 2/7 - loss 0.39724174 - samples/sec: 21.55 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,616 epoch 10 - iter 3/7 - loss 0.32320521 - samples/sec: 21.39 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,663 epoch 10 - iter 4/7 - loss 0.30391150 - samples/sec: 21.62 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,709 epoch 10 - iter 5/7 - loss 0.26887486 - samples/sec: 21.54 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,756 epoch 10 - iter 6/7 - loss 0.25546132 - samples/sec: 21.72 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,803 epoch 10 - iter 7/7 - loss 0.29664355 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 11:09:54,804 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:54,804 EPOCH 10 done: loss 0.2966 - lr 0.0100000\n",
-      "2021-09-08 11:09:54,832 DEV : loss 0.42837122082710266 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:09:54,833 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:09:59,051 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:09:59,051 Testing using best model ...\n",
-      "2021-09-08 11:09:59,053 loading file None/best-model.pt\n",
+      "2021-09-21 19:45:20,002 epoch 8 - iter 4/7 - loss 0.41158848 - samples/sec: 19.02 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,054 epoch 8 - iter 5/7 - loss 0.40752251 - samples/sec: 19.45 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,107 epoch 8 - iter 6/7 - loss 0.37121565 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,156 epoch 8 - iter 7/7 - loss 0.36072424 - samples/sec: 20.37 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,157 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:20,158 EPOCH 8 done: loss 0.3607 - lr 0.0200000\n",
+      "2021-09-21 19:45:20,193 DEV : loss 0.4421858489513397 - score 0.0\n",
+      "2021-09-21 19:45:20,194 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:45:20,198 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:20,261 epoch 9 - iter 1/7 - loss 0.07965274 - samples/sec: 21.11 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,315 epoch 9 - iter 2/7 - loss 0.40855701 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,368 epoch 9 - iter 3/7 - loss 0.32007016 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,418 epoch 9 - iter 4/7 - loss 0.33815464 - samples/sec: 20.25 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,461 epoch 9 - iter 5/7 - loss 0.27414234 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,511 epoch 9 - iter 6/7 - loss 0.28952503 - samples/sec: 20.22 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,568 epoch 9 - iter 7/7 - loss 0.32200365 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,569 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:20,569 EPOCH 9 done: loss 0.3220 - lr 0.0200000\n",
+      "2021-09-21 19:45:20,604 DEV : loss 0.32844263315200806 - score 0.0\n",
+      "2021-09-21 19:45:20,605 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:45:20,607 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:20,677 epoch 10 - iter 1/7 - loss 0.47669014 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,724 epoch 10 - iter 2/7 - loss 0.25485724 - samples/sec: 21.49 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,773 epoch 10 - iter 3/7 - loss 0.40943596 - samples/sec: 20.81 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,831 epoch 10 - iter 4/7 - loss 0.41690025 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,881 epoch 10 - iter 5/7 - loss 0.37700563 - samples/sec: 20.00 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,931 epoch 10 - iter 6/7 - loss 0.36789714 - samples/sec: 20.20 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,979 epoch 10 - iter 7/7 - loss 0.39292042 - samples/sec: 21.00 - lr: 0.020000\n",
+      "2021-09-21 19:45:20,980 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:20,981 EPOCH 10 done: loss 0.3929 - lr 0.0200000\n",
+      "2021-09-21 19:45:21,015 DEV : loss 0.13260860741138458 - score 0.0\n",
+      "2021-09-21 19:45:21,015 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:45:30,342 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:30,343 Testing using best model ...\n",
+      "2021-09-21 19:45:30,345 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:10:03,579 \t0.0\n",
-      "2021-09-08 11:10:03,579 \n",
+      "2021-09-21 19:45:35,166 \t0.0\n",
+      "2021-09-21 19:45:35,166 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -8067,22 +8107,22 @@
       "By class:\n",
       "                                                                            precision    recall  f1-score   support\n",
       "\n",
+      "                                            the emotion of great happiness     0.0000    0.0000    0.0000         0\n",
       "the astonishment you feel when something totally unexpected happens to you     0.0000    0.0000    0.0000         0\n",
       "                         a strong positive emotion of regard and affection     0.0000    0.0000    0.0000         0\n",
-      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         0\n",
       "         a feeling that is oriented toward some real or supposed grievance     0.0000    0.0000    0.0000         0\n",
       "                    remorse caused by feeling responsible for some offense     0.0000    0.0000    0.0000         0\n",
       "      a painful emotion resulting from an awareness of inadequacy or guilt     0.0000    0.0000    0.0000         0\n",
       "                    emotions experienced when not in a state of well-being     0.0000    0.0000    0.0000         0\n",
-      "                                                strong feelings of dislike     0.0000    0.0000    0.0000         1\n",
+      "    an emotion experienced in anticipation of some specific pain or danger     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                                                 micro avg     0.0000    0.0000    0.0000         1\n",
       "                                                                 macro avg     0.0000    0.0000    0.0000         1\n",
       "                                                              weighted avg     0.0000    0.0000    0.0000         1\n",
       "                                                               samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:10:03,579 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.17259475218658893\n"
+      "2021-09-21 19:45:35,167 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.15568513119533528\n"
      ]
     }
    ],
@@ -8156,6 +8196,26 @@
     "\n",
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}')"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f049cb8c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.13119533527696792, 0.18594104308390022, 0.1564625850340136, 0.17103984450923226, 0.13378684807256236]\n",
+      "0.02112381010840729\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/oneshot/emotion_yinetall_oneshot.ipynb b/oneshot/emotion_yinetall_oneshot.ipynb
index bfd7140..655708d 100644
--- a/oneshot/emotion_yinetall_oneshot.ipynb
+++ b/oneshot/emotion_yinetall_oneshot.ipynb
@@ -37,7 +37,7 @@
    "source": [
     "# GRAKA auswählen\n",
     "import flair, torch\n",
-    "flair.device = torch.device('cuda:1') "
+    "flair.device = torch.device('cuda:0') "
    ]
   },
   {
@@ -70,38 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:02,173 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:12:56,182 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:48:12,012 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:13:07,185 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 17137.09it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 20116.57it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:12,016 [b'sadness', b'optimism', b'anger', b'joy']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:48:12,324 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,327 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:13:07,188 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 19:13:07,191 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,193 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -414,133 +401,140 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:12,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,328 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:48:12,328 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,328 Parameters:\n",
-      "2021-09-08 10:48:12,329  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:48:12,329  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:48:12,329  - patience: \"3\"\n",
-      "2021-09-08 10:48:12,330  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:48:12,330  - max_epochs: \"10\"\n",
-      "2021-09-08 10:48:12,330  - shuffle: \"True\"\n",
-      "2021-09-08 10:48:12,331  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:48:12,331  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:48:12,331 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,332 Model training base path: \"None1\"\n",
-      "2021-09-08 10:48:12,332 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,332 Device: cuda:1\n",
-      "2021-09-08 10:48:12,333 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,333 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:48:12,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,659 epoch 1 - iter 1/4 - loss 0.06978030 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 10:48:12,707 epoch 1 - iter 2/4 - loss 0.74462894 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 10:48:12,751 epoch 1 - iter 3/4 - loss 0.50279373 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 10:48:12,798 epoch 1 - iter 4/4 - loss 0.53417001 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 10:48:12,799 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:12,799 EPOCH 1 done: loss 0.5342 - lr 0.0200000\n",
-      "2021-09-08 10:48:12,800 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:48:22,643 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:22,713 epoch 2 - iter 1/4 - loss 0.13765635 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 10:48:22,772 epoch 2 - iter 2/4 - loss 0.06920646 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 10:48:22,819 epoch 2 - iter 3/4 - loss 0.05378522 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 10:48:22,873 epoch 2 - iter 4/4 - loss 0.15791376 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 10:48:22,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:22,875 EPOCH 2 done: loss 0.1579 - lr 0.0200000\n",
-      "2021-09-08 10:48:22,875 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:48:22,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:22,954 epoch 3 - iter 1/4 - loss 0.63991266 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 10:48:23,008 epoch 3 - iter 2/4 - loss 0.49026933 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 10:48:23,055 epoch 3 - iter 3/4 - loss 0.32707657 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 10:48:23,111 epoch 3 - iter 4/4 - loss 0.56152178 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 10:48:23,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:23,113 EPOCH 3 done: loss 0.5615 - lr 0.0200000\n",
-      "2021-09-08 10:48:23,114 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:48:27,509 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:27,581 epoch 4 - iter 1/4 - loss 0.01232226 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,632 epoch 4 - iter 2/4 - loss 0.37612481 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,682 epoch 4 - iter 3/4 - loss 0.27975118 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,732 epoch 4 - iter 4/4 - loss 0.21195426 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:27,734 EPOCH 4 done: loss 0.2120 - lr 0.0200000\n",
-      "2021-09-08 10:48:27,734 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:48:27,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:27,798 epoch 5 - iter 1/4 - loss 0.00378920 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,847 epoch 5 - iter 2/4 - loss 0.01509892 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,901 epoch 5 - iter 3/4 - loss 0.01239062 - samples/sec: 18.93 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,955 epoch 5 - iter 4/4 - loss 0.13840864 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 10:48:27,956 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:27,956 EPOCH 5 done: loss 0.1384 - lr 0.0200000\n",
-      "2021-09-08 10:48:27,957 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:48:27,959 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,028 epoch 6 - iter 1/4 - loss 0.00240279 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,090 epoch 6 - iter 2/4 - loss 0.09763840 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,135 epoch 6 - iter 3/4 - loss 0.06547471 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,185 epoch 6 - iter 4/4 - loss 0.05111325 - samples/sec: 20.33 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,186 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,187 EPOCH 6 done: loss 0.0511 - lr 0.0200000\n",
-      "2021-09-08 10:48:28,187 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:48:28,189 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,256 epoch 7 - iter 1/4 - loss 0.00025359 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,313 epoch 7 - iter 2/4 - loss 0.51714893 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,374 epoch 7 - iter 3/4 - loss 0.56630916 - samples/sec: 16.72 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,431 epoch 7 - iter 4/4 - loss 0.42533731 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 10:48:28,432 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,433 EPOCH 7 done: loss 0.4253 - lr 0.0200000\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:48:28,433 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:48:28,435 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,496 epoch 8 - iter 1/4 - loss 0.52660638 - samples/sec: 21.37 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,542 epoch 8 - iter 2/4 - loss 0.26364650 - samples/sec: 22.52 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,589 epoch 8 - iter 3/4 - loss 0.17761661 - samples/sec: 21.48 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,639 epoch 8 - iter 4/4 - loss 0.14471392 - samples/sec: 20.27 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,641 EPOCH 8 done: loss 0.1447 - lr 0.0100000\n",
-      "2021-09-08 10:48:28,641 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:48:28,643 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,711 epoch 9 - iter 1/4 - loss 0.00064994 - samples/sec: 21.38 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,756 epoch 9 - iter 2/4 - loss 0.00061803 - samples/sec: 22.75 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,815 epoch 9 - iter 3/4 - loss 0.11318250 - samples/sec: 17.09 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,862 epoch 9 - iter 4/4 - loss 0.09222154 - samples/sec: 21.95 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:28,863 EPOCH 9 done: loss 0.0922 - lr 0.0100000\n",
-      "2021-09-08 10:48:28,864 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:48:28,866 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 19:13:07,193 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,194 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:13:07,194 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,194 Parameters:\n",
+      "2021-09-21 19:13:07,195  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:13:07,195  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:13:07,195  - patience: \"3\"\n",
+      "2021-09-21 19:13:07,195  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:13:07,196  - max_epochs: \"10\"\n",
+      "2021-09-21 19:13:07,196  - shuffle: \"True\"\n",
+      "2021-09-21 19:13:07,196  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:13:07,197  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:13:07,197 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,197 Model training base path: \"None1\"\n",
+      "2021-09-21 19:13:07,197 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,198 Device: cuda:0\n",
+      "2021-09-21 19:13:07,198 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,198 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:13:07,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,308 epoch 1 - iter 1/4 - loss 1.72306585 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 19:13:07,373 epoch 1 - iter 2/4 - loss 0.98057619 - samples/sec: 15.57 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:28,931 epoch 10 - iter 1/4 - loss 0.00169913 - samples/sec: 20.76 - lr: 0.010000\n",
-      "2021-09-08 10:48:28,978 epoch 10 - iter 2/4 - loss 0.00629722 - samples/sec: 21.53 - lr: 0.010000\n",
-      "2021-09-08 10:48:29,031 epoch 10 - iter 3/4 - loss 0.02212269 - samples/sec: 18.95 - lr: 0.010000\n",
-      "2021-09-08 10:48:29,080 epoch 10 - iter 4/4 - loss 0.01677314 - samples/sec: 20.83 - lr: 0.010000\n",
-      "2021-09-08 10:48:29,082 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:29,082 EPOCH 10 done: loss 0.0168 - lr 0.0100000\n",
-      "2021-09-08 10:48:29,082 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:48:33,098 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:48:39,892 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:13:07,437 epoch 1 - iter 3/4 - loss 0.76616873 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 19:13:07,516 epoch 1 - iter 4/4 - loss 0.73698037 - samples/sec: 12.76 - lr: 0.020000\n",
+      "2021-09-21 19:13:07,517 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:07,518 EPOCH 1 done: loss 0.7370 - lr 0.0200000\n",
+      "2021-09-21 19:13:07,518 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:13:11,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:11,806 epoch 2 - iter 1/4 - loss 1.29345095 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 19:13:11,893 epoch 2 - iter 2/4 - loss 0.82024954 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 19:13:11,961 epoch 2 - iter 3/4 - loss 0.55497800 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,042 epoch 2 - iter 4/4 - loss 0.63144088 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,043 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:12,044 EPOCH 2 done: loss 0.6314 - lr 0.0200000\n",
+      "2021-09-21 19:13:12,044 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:13:12,047 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:12,127 epoch 3 - iter 1/4 - loss 0.36053625 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,183 epoch 3 - iter 2/4 - loss 0.19135139 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,248 epoch 3 - iter 3/4 - loss 0.26866739 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,322 epoch 3 - iter 4/4 - loss 0.32985502 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,322 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:12,323 EPOCH 3 done: loss 0.3299 - lr 0.0200000\n",
+      "2021-09-21 19:13:12,323 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:13:12,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:12,445 epoch 4 - iter 1/4 - loss 0.11293006 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,506 epoch 4 - iter 2/4 - loss 0.51493847 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,578 epoch 4 - iter 3/4 - loss 0.40529953 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,631 epoch 4 - iter 4/4 - loss 0.30584684 - samples/sec: 18.95 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,632 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:12,632 EPOCH 4 done: loss 0.3058 - lr 0.0200000\n",
+      "2021-09-21 19:13:12,632 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:13:12,635 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:12,725 epoch 5 - iter 1/4 - loss 1.19784844 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,787 epoch 5 - iter 2/4 - loss 0.60024714 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,859 epoch 5 - iter 3/4 - loss 0.46557322 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,938 epoch 5 - iter 4/4 - loss 0.37990256 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 19:13:12,939 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:12,939 EPOCH 5 done: loss 0.3799 - lr 0.0200000\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:13:12,940 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:13:12,942 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:13,042 epoch 6 - iter 1/4 - loss 0.00661627 - samples/sec: 18.46 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,111 epoch 6 - iter 2/4 - loss 0.06699503 - samples/sec: 14.44 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,170 epoch 6 - iter 3/4 - loss 0.05500157 - samples/sec: 17.10 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,226 epoch 6 - iter 4/4 - loss 0.22655267 - samples/sec: 18.16 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,227 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:13,227 EPOCH 6 done: loss 0.2266 - lr 0.0100000\n",
+      "2021-09-21 19:13:13,228 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:13:13,229 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:13,323 epoch 7 - iter 1/4 - loss 0.03001245 - samples/sec: 19.14 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,382 epoch 7 - iter 2/4 - loss 0.16415275 - samples/sec: 16.96 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,450 epoch 7 - iter 3/4 - loss 0.12038076 - samples/sec: 14.98 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,506 epoch 7 - iter 4/4 - loss 0.09204610 - samples/sec: 17.87 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,507 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:13,507 EPOCH 7 done: loss 0.0920 - lr 0.0100000\n",
+      "2021-09-21 19:13:13,508 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:13:13,510 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:13,593 epoch 8 - iter 1/4 - loss 0.24610084 - samples/sec: 17.78 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,653 epoch 8 - iter 2/4 - loss 0.12498182 - samples/sec: 16.86 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,734 epoch 8 - iter 3/4 - loss 0.08437068 - samples/sec: 12.43 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,790 epoch 8 - iter 4/4 - loss 0.06781039 - samples/sec: 17.97 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,791 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:13,791 EPOCH 8 done: loss 0.0678 - lr 0.0100000\n",
+      "2021-09-21 19:13:13,791 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:13:13,793 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:13,893 epoch 9 - iter 1/4 - loss 0.00342144 - samples/sec: 15.08 - lr: 0.010000\n",
+      "2021-09-21 19:13:13,946 epoch 9 - iter 2/4 - loss 0.01399849 - samples/sec: 19.07 - lr: 0.010000\n",
+      "2021-09-21 19:13:14,011 epoch 9 - iter 3/4 - loss 0.03312675 - samples/sec: 15.61 - lr: 0.010000\n",
+      "2021-09-21 19:13:14,070 epoch 9 - iter 4/4 - loss 0.02582041 - samples/sec: 16.90 - lr: 0.010000\n",
+      "2021-09-21 19:13:14,071 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:14,071 EPOCH 9 done: loss 0.0258 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:13:14,072 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:13:14,076 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:14,178 epoch 10 - iter 1/4 - loss 0.00603087 - samples/sec: 16.39 - lr: 0.005000\n",
+      "2021-09-21 19:13:14,238 epoch 10 - iter 2/4 - loss 0.03390477 - samples/sec: 16.72 - lr: 0.005000\n",
+      "2021-09-21 19:13:14,311 epoch 10 - iter 3/4 - loss 0.02308681 - samples/sec: 13.76 - lr: 0.005000\n",
+      "2021-09-21 19:13:14,371 epoch 10 - iter 4/4 - loss 0.01831053 - samples/sec: 16.98 - lr: 0.005000\n",
+      "2021-09-21 19:13:14,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:14,372 EPOCH 10 done: loss 0.0183 - lr 0.0050000\n",
+      "2021-09-21 19:13:14,372 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:13:19,096 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:13:30,313 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:48:44,101 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:13:34,726 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 20687.07it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 23045.63it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:44,103 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 10:48:44,114 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,116 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:13:34,727 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 19:13:34,737 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,738 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -853,27 +847,28 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:44,117 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,117 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:48:44,117 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,118 Parameters:\n",
-      "2021-09-08 10:48:44,118  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:48:44,118  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:48:44,119  - patience: \"3\"\n",
-      "2021-09-08 10:48:44,119  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:48:44,119  - max_epochs: \"10\"\n",
-      "2021-09-08 10:48:44,120  - shuffle: \"True\"\n",
-      "2021-09-08 10:48:44,120  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:48:44,120  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:48:44,121 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,121 Model training base path: \"None1\"\n",
-      "2021-09-08 10:48:44,121 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,122 Device: cuda:1\n",
-      "2021-09-08 10:48:44,122 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,123 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:48:44,129 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,196 epoch 1 - iter 1/4 - loss 0.50905991 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 10:48:44,246 epoch 1 - iter 2/4 - loss 1.21448296 - samples/sec: 20.29 - lr: 0.020000\n"
+      "2021-09-21 19:13:34,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,739 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:13:34,740 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,740 Parameters:\n",
+      "2021-09-21 19:13:34,740  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:13:34,740  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:13:34,741  - patience: \"3\"\n",
+      "2021-09-21 19:13:34,741  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:13:34,741  - max_epochs: \"10\"\n",
+      "2021-09-21 19:13:34,742  - shuffle: \"True\"\n",
+      "2021-09-21 19:13:34,742  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:13:34,742  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:13:34,742 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,743 Model training base path: \"None1\"\n",
+      "2021-09-21 19:13:34,743 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,743 Device: cuda:0\n",
+      "2021-09-21 19:13:34,743 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,744 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:13:34,750 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,817 epoch 1 - iter 1/4 - loss 1.42105949 - samples/sec: 22.59 - lr: 0.020000\n",
+      "2021-09-21 19:13:34,868 epoch 1 - iter 2/4 - loss 1.36266202 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 19:13:34,925 epoch 1 - iter 3/4 - loss 1.20553197 - samples/sec: 17.90 - lr: 0.020000\n"
      ]
     },
     {
@@ -887,106 +882,105 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:48:44,304 epoch 1 - iter 3/4 - loss 0.83304690 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 10:48:44,355 epoch 1 - iter 4/4 - loss 0.74393614 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 10:48:44,356 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:44,357 EPOCH 1 done: loss 0.7439 - lr 0.0200000\n",
-      "2021-09-08 10:48:44,357 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:13:34,989 epoch 1 - iter 4/4 - loss 0.96938229 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 19:13:34,990 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:34,990 EPOCH 1 done: loss 0.9694 - lr 0.0200000\n",
+      "2021-09-21 19:13:34,990 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:48:48,846 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:48,922 epoch 2 - iter 1/4 - loss 0.03779131 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 10:48:48,980 epoch 2 - iter 2/4 - loss 0.11665129 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,035 epoch 2 - iter 3/4 - loss 0.39547896 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,088 epoch 2 - iter 4/4 - loss 0.38881769 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,089 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,090 EPOCH 2 done: loss 0.3888 - lr 0.0200000\n",
-      "2021-09-08 10:48:49,090 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:48:49,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,160 epoch 3 - iter 1/4 - loss 0.03871313 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,217 epoch 3 - iter 2/4 - loss 0.05552267 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,269 epoch 3 - iter 3/4 - loss 0.03836373 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,321 epoch 3 - iter 4/4 - loss 0.24163680 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,323 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,323 EPOCH 3 done: loss 0.2416 - lr 0.0200000\n",
-      "2021-09-08 10:48:49,323 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:48:49,325 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,392 epoch 4 - iter 1/4 - loss 0.64254856 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,440 epoch 4 - iter 2/4 - loss 0.32295290 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,488 epoch 4 - iter 3/4 - loss 0.22303612 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,543 epoch 4 - iter 4/4 - loss 0.30869455 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,544 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,545 EPOCH 4 done: loss 0.3087 - lr 0.0200000\n",
-      "2021-09-08 10:48:49,545 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:48:49,547 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,617 epoch 5 - iter 1/4 - loss 0.31214663 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,664 epoch 5 - iter 2/4 - loss 0.17459296 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,716 epoch 5 - iter 3/4 - loss 0.12361846 - samples/sec: 19.87 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,767 epoch 5 - iter 4/4 - loss 0.09493417 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 10:48:49,769 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,769 EPOCH 5 done: loss 0.0949 - lr 0.0200000\n",
+      "2021-09-21 19:13:38,677 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:38,761 epoch 2 - iter 1/4 - loss 0.07047740 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 19:13:38,814 epoch 2 - iter 2/4 - loss 0.39450555 - samples/sec: 18.99 - lr: 0.020000\n",
+      "2021-09-21 19:13:38,876 epoch 2 - iter 3/4 - loss 0.30242924 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 19:13:38,940 epoch 2 - iter 4/4 - loss 0.34911152 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 19:13:38,941 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:38,942 EPOCH 2 done: loss 0.3491 - lr 0.0200000\n",
+      "2021-09-21 19:13:38,942 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:13:38,944 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,015 epoch 3 - iter 1/4 - loss 0.86739087 - samples/sec: 18.96 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,070 epoch 3 - iter 2/4 - loss 0.52786683 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,130 epoch 3 - iter 3/4 - loss 0.37250371 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,189 epoch 3 - iter 4/4 - loss 0.71283357 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,191 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,191 EPOCH 3 done: loss 0.7128 - lr 0.0200000\n",
+      "2021-09-21 19:13:39,191 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:13:39,193 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,278 epoch 4 - iter 1/4 - loss 2.13587880 - samples/sec: 17.98 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,327 epoch 4 - iter 2/4 - loss 1.09107374 - samples/sec: 20.39 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,384 epoch 4 - iter 3/4 - loss 0.91928488 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,438 epoch 4 - iter 4/4 - loss 0.71230851 - samples/sec: 18.91 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,439 EPOCH 4 done: loss 0.7123 - lr 0.0200000\n",
+      "2021-09-21 19:13:39,440 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:13:39,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,513 epoch 5 - iter 1/4 - loss 0.01041352 - samples/sec: 20.49 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,567 epoch 5 - iter 2/4 - loss 0.13625480 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,625 epoch 5 - iter 3/4 - loss 0.30755423 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,684 epoch 5 - iter 4/4 - loss 0.23684249 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 19:13:39,685 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,685 EPOCH 5 done: loss 0.2368 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:48:49,770 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:48:49,772 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,838 epoch 6 - iter 1/4 - loss 0.13865930 - samples/sec: 20.13 - lr: 0.010000\n",
-      "2021-09-08 10:48:49,894 epoch 6 - iter 2/4 - loss 0.11624585 - samples/sec: 18.05 - lr: 0.010000\n",
-      "2021-09-08 10:48:49,941 epoch 6 - iter 3/4 - loss 0.08593873 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 10:48:49,986 epoch 6 - iter 4/4 - loss 0.06513359 - samples/sec: 22.61 - lr: 0.010000\n",
-      "2021-09-08 10:48:49,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:49,988 EPOCH 6 done: loss 0.0651 - lr 0.0100000\n",
-      "2021-09-08 10:48:49,988 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:48:49,990 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,056 epoch 7 - iter 1/4 - loss 0.12570250 - samples/sec: 20.09 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,107 epoch 7 - iter 2/4 - loss 0.06300472 - samples/sec: 20.30 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,157 epoch 7 - iter 3/4 - loss 0.05246535 - samples/sec: 20.33 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,207 epoch 7 - iter 4/4 - loss 0.04042697 - samples/sec: 20.10 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,208 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,209 EPOCH 7 done: loss 0.0404 - lr 0.0100000\n",
-      "2021-09-08 10:48:50,209 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:48:50,212 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,274 epoch 8 - iter 1/4 - loss 0.01671899 - samples/sec: 21.11 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,322 epoch 8 - iter 2/4 - loss 0.00908397 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,372 epoch 8 - iter 3/4 - loss 0.03244632 - samples/sec: 20.07 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,419 epoch 8 - iter 4/4 - loss 0.02444343 - samples/sec: 21.66 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,420 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,421 EPOCH 8 done: loss 0.0244 - lr 0.0100000\n",
-      "2021-09-08 10:48:50,421 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:48:50,424 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,491 epoch 9 - iter 1/4 - loss 0.00178072 - samples/sec: 19.82 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,538 epoch 9 - iter 2/4 - loss 0.00097340 - samples/sec: 21.49 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,596 epoch 9 - iter 3/4 - loss 0.00364559 - samples/sec: 17.72 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,650 epoch 9 - iter 4/4 - loss 0.02002558 - samples/sec: 18.64 - lr: 0.010000\n",
-      "2021-09-08 10:48:50,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,652 EPOCH 9 done: loss 0.0200 - lr 0.0100000\n",
+      "2021-09-21 19:13:39,685 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:13:39,687 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,774 epoch 6 - iter 1/4 - loss 0.00802058 - samples/sec: 16.50 - lr: 0.010000\n",
+      "2021-09-21 19:13:39,837 epoch 6 - iter 2/4 - loss 0.01173905 - samples/sec: 16.02 - lr: 0.010000\n",
+      "2021-09-21 19:13:39,893 epoch 6 - iter 3/4 - loss 0.09777194 - samples/sec: 17.92 - lr: 0.010000\n",
+      "2021-09-21 19:13:39,957 epoch 6 - iter 4/4 - loss 0.09844066 - samples/sec: 15.69 - lr: 0.010000\n",
+      "2021-09-21 19:13:39,958 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:39,959 EPOCH 6 done: loss 0.0984 - lr 0.0100000\n",
+      "2021-09-21 19:13:39,959 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:13:39,961 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,037 epoch 7 - iter 1/4 - loss 0.10677942 - samples/sec: 19.75 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,094 epoch 7 - iter 2/4 - loss 0.07807808 - samples/sec: 17.51 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,150 epoch 7 - iter 3/4 - loss 0.05701443 - samples/sec: 18.05 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,202 epoch 7 - iter 4/4 - loss 0.04442787 - samples/sec: 19.55 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,203 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,203 EPOCH 7 done: loss 0.0444 - lr 0.0100000\n",
+      "2021-09-21 19:13:40,204 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:13:40,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,275 epoch 8 - iter 1/4 - loss 0.00922738 - samples/sec: 21.07 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,340 epoch 8 - iter 2/4 - loss 0.00797967 - samples/sec: 15.66 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,394 epoch 8 - iter 3/4 - loss 0.03257104 - samples/sec: 18.57 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,453 epoch 8 - iter 4/4 - loss 0.23941665 - samples/sec: 17.03 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,454 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,454 EPOCH 8 done: loss 0.2394 - lr 0.0100000\n",
+      "2021-09-21 19:13:40,455 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:13:40,456 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,542 epoch 9 - iter 1/4 - loss 0.00176561 - samples/sec: 20.23 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,589 epoch 9 - iter 2/4 - loss 0.00701069 - samples/sec: 21.41 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,648 epoch 9 - iter 3/4 - loss 0.01331926 - samples/sec: 17.02 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,699 epoch 9 - iter 4/4 - loss 0.01519229 - samples/sec: 19.54 - lr: 0.010000\n",
+      "2021-09-21 19:13:40,700 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,701 EPOCH 9 done: loss 0.0152 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:48:50,653 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:48:50,656 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,729 epoch 10 - iter 1/4 - loss 0.00194601 - samples/sec: 18.66 - lr: 0.005000\n",
-      "2021-09-08 10:48:50,784 epoch 10 - iter 2/4 - loss 0.00617637 - samples/sec: 18.37 - lr: 0.005000\n",
-      "2021-09-08 10:48:50,840 epoch 10 - iter 3/4 - loss 0.00422161 - samples/sec: 17.99 - lr: 0.005000\n",
-      "2021-09-08 10:48:50,893 epoch 10 - iter 4/4 - loss 0.00688167 - samples/sec: 19.23 - lr: 0.005000\n",
-      "2021-09-08 10:48:50,895 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:48:50,895 EPOCH 10 done: loss 0.0069 - lr 0.0050000\n",
-      "2021-09-08 10:48:50,896 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:48:56,869 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:49:03,171 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:13:40,701 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:13:40,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,792 epoch 10 - iter 1/4 - loss 0.77283114 - samples/sec: 17.40 - lr: 0.005000\n",
+      "2021-09-21 19:13:40,841 epoch 10 - iter 2/4 - loss 0.38853506 - samples/sec: 20.50 - lr: 0.005000\n",
+      "2021-09-21 19:13:40,905 epoch 10 - iter 3/4 - loss 0.27686569 - samples/sec: 15.76 - lr: 0.005000\n",
+      "2021-09-21 19:13:40,959 epoch 10 - iter 4/4 - loss 0.21015801 - samples/sec: 18.89 - lr: 0.005000\n",
+      "2021-09-21 19:13:40,960 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:13:40,960 EPOCH 10 done: loss 0.2102 - lr 0.0050000\n",
+      "2021-09-21 19:13:40,960 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:13:47,664 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:13:57,139 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:49:07,188 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:14:01,870 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 17982.01it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 21050.46it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:07,190 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 10:49:07,198 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,200 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:14:01,871 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 19:14:01,880 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:01,882 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1299,28 +1293,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:07,201 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,201 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:49:07,202 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,202 Parameters:\n",
-      "2021-09-08 10:49:07,203  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:49:07,203  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:49:07,203  - patience: \"3\"\n",
-      "2021-09-08 10:49:07,204  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:49:07,204  - max_epochs: \"10\"\n",
-      "2021-09-08 10:49:07,204  - shuffle: \"True\"\n",
-      "2021-09-08 10:49:07,205  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:49:07,205  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:49:07,205 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,206 Model training base path: \"None1\"\n",
-      "2021-09-08 10:49:07,206 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,206 Device: cuda:1\n",
-      "2021-09-08 10:49:07,207 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,207 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:49:07,213 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,269 epoch 1 - iter 1/4 - loss 1.82768631 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 10:49:07,321 epoch 1 - iter 2/4 - loss 1.46838582 - samples/sec: 19.75 - lr: 0.020000\n",
-      "2021-09-08 10:49:07,370 epoch 1 - iter 3/4 - loss 1.11232874 - samples/sec: 20.56 - lr: 0.020000\n"
+      "2021-09-21 19:14:01,882 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:01,882 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:14:01,883 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:01,883 Parameters:\n",
+      "2021-09-21 19:14:01,883  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:14:01,884  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:14:01,884  - patience: \"3\"\n",
+      "2021-09-21 19:14:01,884  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:14:01,884  - max_epochs: \"10\"\n",
+      "2021-09-21 19:14:01,885  - shuffle: \"True\"\n",
+      "2021-09-21 19:14:01,885  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:14:01,885  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:14:01,885 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:01,886 Model training base path: \"None1\"\n",
+      "2021-09-21 19:14:01,886 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:01,886 Device: cuda:0\n",
+      "2021-09-21 19:14:01,887 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:01,887 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:14:01,893 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:01,991 epoch 1 - iter 1/4 - loss 0.31411779 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 19:14:02,061 epoch 1 - iter 2/4 - loss 0.63411707 - samples/sec: 14.37 - lr: 0.020000\n"
      ]
     },
     {
@@ -1334,103 +1327,104 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:07,418 epoch 1 - iter 4/4 - loss 1.34515760 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 10:49:07,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:07,420 EPOCH 1 done: loss 1.3452 - lr 0.0200000\n",
-      "2021-09-08 10:49:07,420 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:14:02,151 epoch 1 - iter 3/4 - loss 0.46644819 - samples/sec: 11.08 - lr: 0.020000\n",
+      "2021-09-21 19:14:02,222 epoch 1 - iter 4/4 - loss 0.40167432 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 19:14:02,223 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:02,223 EPOCH 1 done: loss 0.4017 - lr 0.0200000\n",
+      "2021-09-21 19:14:02,224 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:49:18,937 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:18,999 epoch 2 - iter 1/4 - loss 0.67518419 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 10:49:19,048 epoch 2 - iter 2/4 - loss 0.50304055 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 10:49:19,095 epoch 2 - iter 3/4 - loss 0.64670912 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 10:49:19,142 epoch 2 - iter 4/4 - loss 0.50572816 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 10:49:19,144 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:19,145 EPOCH 2 done: loss 0.5057 - lr 0.0200000\n",
-      "2021-09-08 10:49:19,146 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:49:20,789 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:20,850 epoch 3 - iter 1/4 - loss 0.66043639 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 10:49:20,898 epoch 3 - iter 2/4 - loss 0.35631587 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 10:49:20,948 epoch 3 - iter 3/4 - loss 0.27945519 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 10:49:20,999 epoch 3 - iter 4/4 - loss 0.37767636 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,001 EPOCH 3 done: loss 0.3777 - lr 0.0200000\n",
-      "2021-09-08 10:49:21,002 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:49:21,005 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,065 epoch 4 - iter 1/4 - loss 0.00484250 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,113 epoch 4 - iter 2/4 - loss 0.01373703 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,163 epoch 4 - iter 3/4 - loss 0.10362962 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,212 epoch 4 - iter 4/4 - loss 0.11877508 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,213 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,213 EPOCH 4 done: loss 0.1188 - lr 0.0200000\n",
-      "2021-09-08 10:49:21,214 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:49:21,216 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,309 epoch 5 - iter 1/4 - loss 0.07862494 - samples/sec: 14.16 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,362 epoch 5 - iter 2/4 - loss 0.10212554 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,410 epoch 5 - iter 3/4 - loss 0.06914991 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,459 epoch 5 - iter 4/4 - loss 0.05471104 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,461 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,461 EPOCH 5 done: loss 0.0547 - lr 0.0200000\n",
+      "2021-09-21 19:14:21,922 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:21,994 epoch 2 - iter 1/4 - loss 0.02801433 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,065 epoch 2 - iter 2/4 - loss 0.26080175 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,132 epoch 2 - iter 3/4 - loss 0.52486111 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,214 epoch 2 - iter 4/4 - loss 0.39599476 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:22,215 EPOCH 2 done: loss 0.3960 - lr 0.0200000\n",
+      "2021-09-21 19:14:22,216 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:14:22,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:22,340 epoch 3 - iter 1/4 - loss 0.63828677 - samples/sec: 12.56 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,419 epoch 3 - iter 2/4 - loss 0.49872084 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,497 epoch 3 - iter 3/4 - loss 0.33615800 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,569 epoch 3 - iter 4/4 - loss 0.25388236 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,571 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:22,571 EPOCH 3 done: loss 0.2539 - lr 0.0200000\n",
+      "2021-09-21 19:14:22,572 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:14:22,574 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:22,717 epoch 4 - iter 1/4 - loss 0.27567038 - samples/sec: 10.62 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,780 epoch 4 - iter 2/4 - loss 0.13971566 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,843 epoch 4 - iter 3/4 - loss 0.21837667 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,899 epoch 4 - iter 4/4 - loss 0.16485111 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 19:14:22,901 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:22,902 EPOCH 4 done: loss 0.1649 - lr 0.0200000\n",
+      "2021-09-21 19:14:22,902 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:14:22,978 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:23,063 epoch 5 - iter 1/4 - loss 0.06181758 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 19:14:23,130 epoch 5 - iter 2/4 - loss 0.08476968 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 19:14:23,193 epoch 5 - iter 3/4 - loss 0.05735688 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 19:14:23,261 epoch 5 - iter 4/4 - loss 0.29887000 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 19:14:23,262 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:23,263 EPOCH 5 done: loss 0.2989 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:49:21,462 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:49:21,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,527 epoch 6 - iter 1/4 - loss 1.18354940 - samples/sec: 20.27 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,575 epoch 6 - iter 2/4 - loss 0.88141534 - samples/sec: 21.17 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,621 epoch 6 - iter 3/4 - loss 0.58994482 - samples/sec: 22.13 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,672 epoch 6 - iter 4/4 - loss 0.95184372 - samples/sec: 20.07 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,674 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,674 EPOCH 6 done: loss 0.9518 - lr 0.0100000\n",
-      "2021-09-08 10:49:21,674 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:49:21,782 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,840 epoch 7 - iter 1/4 - loss 0.02705105 - samples/sec: 22.57 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,888 epoch 7 - iter 2/4 - loss 0.17199690 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,932 epoch 7 - iter 3/4 - loss 0.11520491 - samples/sec: 22.92 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,980 epoch 7 - iter 4/4 - loss 0.33834453 - samples/sec: 21.24 - lr: 0.010000\n",
-      "2021-09-08 10:49:21,982 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,982 EPOCH 7 done: loss 0.3383 - lr 0.0100000\n",
-      "2021-09-08 10:49:21,982 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:49:22,102 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:22,161 epoch 8 - iter 1/4 - loss 0.15834804 - samples/sec: 21.51 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,208 epoch 8 - iter 2/4 - loss 0.36250285 - samples/sec: 21.62 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,256 epoch 8 - iter 3/4 - loss 0.34620683 - samples/sec: 21.22 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,300 epoch 8 - iter 4/4 - loss 0.26023867 - samples/sec: 23.06 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:22,302 EPOCH 8 done: loss 0.2602 - lr 0.0100000\n",
-      "2021-09-08 10:49:22,303 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:49:22,415 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:22,471 epoch 9 - iter 1/4 - loss 0.05113875 - samples/sec: 23.40 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,520 epoch 9 - iter 2/4 - loss 0.10876831 - samples/sec: 20.62 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,564 epoch 9 - iter 3/4 - loss 0.07950366 - samples/sec: 23.26 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,608 epoch 9 - iter 4/4 - loss 0.06006842 - samples/sec: 23.17 - lr: 0.010000\n",
-      "2021-09-08 10:49:22,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:22,611 EPOCH 9 done: loss 0.0601 - lr 0.0100000\n",
+      "2021-09-21 19:14:23,263 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:14:23,360 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:23,448 epoch 6 - iter 1/4 - loss 0.00702540 - samples/sec: 15.11 - lr: 0.010000\n",
+      "2021-09-21 19:14:23,518 epoch 6 - iter 2/4 - loss 0.21672595 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 19:14:23,578 epoch 6 - iter 3/4 - loss 0.39007354 - samples/sec: 16.82 - lr: 0.010000\n",
+      "2021-09-21 19:14:23,638 epoch 6 - iter 4/4 - loss 0.29351753 - samples/sec: 16.77 - lr: 0.010000\n",
+      "2021-09-21 19:14:23,640 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:23,640 EPOCH 6 done: loss 0.2935 - lr 0.0100000\n",
+      "2021-09-21 19:14:23,641 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:14:23,757 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:23,849 epoch 7 - iter 1/4 - loss 0.00636672 - samples/sec: 17.44 - lr: 0.010000\n",
+      "2021-09-21 19:14:23,913 epoch 7 - iter 2/4 - loss 0.10651387 - samples/sec: 15.63 - lr: 0.010000\n",
+      "2021-09-21 19:14:23,971 epoch 7 - iter 3/4 - loss 0.07679529 - samples/sec: 17.70 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,033 epoch 7 - iter 4/4 - loss 0.06306193 - samples/sec: 16.07 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,035 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:24,035 EPOCH 7 done: loss 0.0631 - lr 0.0100000\n",
+      "2021-09-21 19:14:24,035 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:14:24,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:24,241 epoch 8 - iter 1/4 - loss 0.55852008 - samples/sec: 15.93 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,300 epoch 8 - iter 2/4 - loss 0.28839913 - samples/sec: 17.07 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,354 epoch 8 - iter 3/4 - loss 0.19369561 - samples/sec: 18.52 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,408 epoch 8 - iter 4/4 - loss 0.14638707 - samples/sec: 18.90 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,409 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:24,409 EPOCH 8 done: loss 0.1464 - lr 0.0100000\n",
+      "2021-09-21 19:14:24,409 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:14:24,521 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:24,616 epoch 9 - iter 1/4 - loss 0.27592352 - samples/sec: 16.45 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,663 epoch 9 - iter 2/4 - loss 0.13895626 - samples/sec: 21.63 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,717 epoch 9 - iter 3/4 - loss 0.09706436 - samples/sec: 18.75 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,772 epoch 9 - iter 4/4 - loss 0.07371982 - samples/sec: 18.27 - lr: 0.010000\n",
+      "2021-09-21 19:14:24,773 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:24,774 EPOCH 9 done: loss 0.0737 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:49:22,612 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:49:22,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:22,790 epoch 10 - iter 1/4 - loss 0.03071760 - samples/sec: 23.19 - lr: 0.005000\n",
-      "2021-09-08 10:49:22,834 epoch 10 - iter 2/4 - loss 0.01608550 - samples/sec: 23.10 - lr: 0.005000\n",
-      "2021-09-08 10:49:22,882 epoch 10 - iter 3/4 - loss 0.03562668 - samples/sec: 21.18 - lr: 0.005000\n",
-      "2021-09-08 10:49:22,928 epoch 10 - iter 4/4 - loss 0.03006898 - samples/sec: 22.55 - lr: 0.005000\n",
-      "2021-09-08 10:49:22,929 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:22,930 EPOCH 10 done: loss 0.0301 - lr 0.0050000\n",
-      "2021-09-08 10:49:22,930 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:49:43,080 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:49:48,527 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:14:24,774 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:14:24,890 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:24,967 epoch 10 - iter 1/4 - loss 0.03157984 - samples/sec: 16.03 - lr: 0.005000\n",
+      "2021-09-21 19:14:25,020 epoch 10 - iter 2/4 - loss 0.01778185 - samples/sec: 19.18 - lr: 0.005000\n",
+      "2021-09-21 19:14:25,079 epoch 10 - iter 3/4 - loss 0.06698207 - samples/sec: 16.83 - lr: 0.005000\n",
+      "2021-09-21 19:14:25,138 epoch 10 - iter 4/4 - loss 0.05072090 - samples/sec: 17.11 - lr: 0.005000\n",
+      "2021-09-21 19:14:25,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:14:25,140 EPOCH 10 done: loss 0.0507 - lr 0.0050000\n",
+      "2021-09-21 19:14:25,140 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:14:42,048 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:14:56,116 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:49:52,706 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:15:00,730 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 18436.50it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 24855.13it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:52,708 [b'sadness', b'optimism', b'anger', b'joy']\n"
+      "2021-09-21 19:15:00,732 [b'sadness', b'optimism', b'anger', b'joy']\n"
      ]
     },
     {
@@ -1444,8 +1438,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:57,689 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,692 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:15:02,742 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:02,744 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1758,133 +1752,133 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:57,692 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,692 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:49:57,693 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,693 Parameters:\n",
-      "2021-09-08 10:49:57,694  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:49:57,694  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:49:57,694  - patience: \"3\"\n",
-      "2021-09-08 10:49:57,695  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:49:57,695  - max_epochs: \"10\"\n",
-      "2021-09-08 10:49:57,695  - shuffle: \"True\"\n",
-      "2021-09-08 10:49:57,696  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:49:57,696  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:49:57,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,697 Model training base path: \"None1\"\n",
-      "2021-09-08 10:49:57,697 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,699 Device: cuda:1\n",
-      "2021-09-08 10:49:57,699 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,700 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:49:57,720 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:57,813 epoch 1 - iter 1/4 - loss 1.01540136 - samples/sec: 15.09 - lr: 0.020000\n",
-      "2021-09-08 10:49:57,880 epoch 1 - iter 2/4 - loss 1.07576942 - samples/sec: 15.10 - lr: 0.020000\n",
-      "2021-09-08 10:49:57,942 epoch 1 - iter 3/4 - loss 0.75730583 - samples/sec: 16.52 - lr: 0.020000\n",
-      "2021-09-08 10:49:58,004 epoch 1 - iter 4/4 - loss 0.97998447 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 10:49:58,005 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:58,005 EPOCH 1 done: loss 0.9800 - lr 0.0200000\n",
-      "2021-09-08 10:49:58,006 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:15:02,745 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:02,745 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:15:02,745 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:02,745 Parameters:\n",
+      "2021-09-21 19:15:02,746  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:15:02,746  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:15:02,746  - patience: \"3\"\n",
+      "2021-09-21 19:15:02,747  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:15:02,747  - max_epochs: \"10\"\n",
+      "2021-09-21 19:15:02,747  - shuffle: \"True\"\n",
+      "2021-09-21 19:15:02,747  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:15:02,748  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:15:02,748 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:02,748 Model training base path: \"None1\"\n",
+      "2021-09-21 19:15:02,749 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:02,749 Device: cuda:0\n",
+      "2021-09-21 19:15:02,749 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:02,749 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:15:02,778 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:02,873 epoch 1 - iter 1/4 - loss 0.21876095 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 19:15:02,957 epoch 1 - iter 2/4 - loss 0.39045086 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 19:15:03,039 epoch 1 - iter 3/4 - loss 0.37127503 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 19:15:03,107 epoch 1 - iter 4/4 - loss 0.32030256 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 19:15:03,109 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:03,109 EPOCH 1 done: loss 0.3203 - lr 0.0200000\n",
+      "2021-09-21 19:15:03,109 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:50:07,627 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:07,705 epoch 2 - iter 1/4 - loss 0.29327884 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 10:50:07,768 epoch 2 - iter 2/4 - loss 0.51220985 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 10:50:07,832 epoch 2 - iter 3/4 - loss 0.36842623 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 10:50:07,895 epoch 2 - iter 4/4 - loss 0.43540637 - samples/sec: 16.04 - lr: 0.020000\n",
-      "2021-09-08 10:50:07,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:07,897 EPOCH 2 done: loss 0.4354 - lr 0.0200000\n",
-      "2021-09-08 10:50:07,897 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:09,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:09,884 epoch 3 - iter 1/4 - loss 0.83994848 - samples/sec: 16.19 - lr: 0.020000\n",
-      "2021-09-08 10:50:09,949 epoch 3 - iter 2/4 - loss 0.61850610 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,011 epoch 3 - iter 3/4 - loss 0.41807800 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,074 epoch 3 - iter 4/4 - loss 0.59503020 - samples/sec: 15.94 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,076 EPOCH 3 done: loss 0.5950 - lr 0.0200000\n",
-      "2021-09-08 10:50:10,076 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:10,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,151 epoch 4 - iter 1/4 - loss 1.12206304 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,211 epoch 4 - iter 2/4 - loss 0.60238121 - samples/sec: 16.62 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,269 epoch 4 - iter 3/4 - loss 0.63952898 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,329 epoch 4 - iter 4/4 - loss 0.49303866 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,330 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,331 EPOCH 4 done: loss 0.4930 - lr 0.0200000\n",
-      "2021-09-08 10:50:10,331 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:10,333 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,413 epoch 5 - iter 1/4 - loss 0.65021610 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,468 epoch 5 - iter 2/4 - loss 0.33278268 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,525 epoch 5 - iter 3/4 - loss 0.22728618 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,583 epoch 5 - iter 4/4 - loss 0.50619611 - samples/sec: 17.27 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,584 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,585 EPOCH 5 done: loss 0.5062 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:50:10,585 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:50:10,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,669 epoch 6 - iter 1/4 - loss 0.44713250 - samples/sec: 16.51 - lr: 0.010000\n",
-      "2021-09-08 10:50:10,721 epoch 6 - iter 2/4 - loss 0.23760930 - samples/sec: 19.40 - lr: 0.010000\n",
-      "2021-09-08 10:50:10,782 epoch 6 - iter 3/4 - loss 0.15955564 - samples/sec: 16.63 - lr: 0.010000\n",
-      "2021-09-08 10:50:10,842 epoch 6 - iter 4/4 - loss 0.18090465 - samples/sec: 16.66 - lr: 0.010000\n",
-      "2021-09-08 10:50:10,843 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,844 EPOCH 6 done: loss 0.1809 - lr 0.0100000\n",
-      "2021-09-08 10:50:10,844 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:10,846 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,928 epoch 7 - iter 1/4 - loss 0.00517096 - samples/sec: 17.06 - lr: 0.010000\n",
-      "2021-09-08 10:50:10,992 epoch 7 - iter 2/4 - loss 0.39665589 - samples/sec: 15.78 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,051 epoch 7 - iter 3/4 - loss 0.26722657 - samples/sec: 17.12 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,111 epoch 7 - iter 4/4 - loss 0.27902454 - samples/sec: 17.05 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,112 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,112 EPOCH 7 done: loss 0.2790 - lr 0.0100000\n",
-      "2021-09-08 10:50:11,112 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:11,115 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,183 epoch 8 - iter 1/4 - loss 0.00691680 - samples/sec: 19.13 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,243 epoch 8 - iter 2/4 - loss 0.10036415 - samples/sec: 16.97 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,303 epoch 8 - iter 3/4 - loss 0.16455299 - samples/sec: 16.86 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,357 epoch 8 - iter 4/4 - loss 0.12430440 - samples/sec: 18.58 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,359 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,359 EPOCH 8 done: loss 0.1243 - lr 0.0100000\n",
-      "2021-09-08 10:50:11,359 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:11,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,441 epoch 9 - iter 1/4 - loss 0.07558522 - samples/sec: 16.97 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,501 epoch 9 - iter 2/4 - loss 0.16958842 - samples/sec: 16.78 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,564 epoch 9 - iter 3/4 - loss 0.11741564 - samples/sec: 16.07 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,619 epoch 9 - iter 4/4 - loss 0.09221016 - samples/sec: 18.81 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,620 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,620 EPOCH 9 done: loss 0.0922 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:50:11,621 BAD EPOCHS (no improvement): 4\n"
+      "2021-09-21 19:15:09,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:09,987 epoch 2 - iter 1/4 - loss 0.12247598 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,046 epoch 2 - iter 2/4 - loss 0.24646612 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,100 epoch 2 - iter 3/4 - loss 0.32696273 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,154 epoch 2 - iter 4/4 - loss 0.25366734 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,155 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:10,156 EPOCH 2 done: loss 0.2537 - lr 0.0200000\n",
+      "2021-09-21 19:15:10,156 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:15:10,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:10,230 epoch 3 - iter 1/4 - loss 0.01192332 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,284 epoch 3 - iter 2/4 - loss 0.02608051 - samples/sec: 18.91 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,344 epoch 3 - iter 3/4 - loss 0.03453820 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,400 epoch 3 - iter 4/4 - loss 0.19755968 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,402 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:10,402 EPOCH 3 done: loss 0.1976 - lr 0.0200000\n",
+      "2021-09-21 19:15:10,403 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:15:10,405 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:10,472 epoch 4 - iter 1/4 - loss 0.46192017 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,526 epoch 4 - iter 2/4 - loss 0.84936519 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,589 epoch 4 - iter 3/4 - loss 0.57569242 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,642 epoch 4 - iter 4/4 - loss 0.43273264 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 19:15:10,643 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:10,643 EPOCH 4 done: loss 0.4327 - lr 0.0200000\n",
+      "2021-09-21 19:15:10,643 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:15:29,285 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:29,353 epoch 5 - iter 1/4 - loss 0.00975284 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,409 epoch 5 - iter 2/4 - loss 0.00743649 - samples/sec: 18.38 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,490 epoch 5 - iter 3/4 - loss 0.06099089 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,574 epoch 5 - iter 4/4 - loss 0.04842625 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,575 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:29,575 EPOCH 5 done: loss 0.0484 - lr 0.0200000\n",
+      "2021-09-21 19:15:29,576 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:15:29,585 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:29,684 epoch 6 - iter 1/4 - loss 0.00738318 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,762 epoch 6 - iter 2/4 - loss 0.00748166 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,835 epoch 6 - iter 3/4 - loss 0.00928589 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,911 epoch 6 - iter 4/4 - loss 0.06380147 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 19:15:29,913 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:29,913 EPOCH 6 done: loss 0.0638 - lr 0.0200000\n",
+      "2021-09-21 19:15:29,913 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:15:29,920 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:30,036 epoch 7 - iter 1/4 - loss 0.00171358 - samples/sec: 15.76 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,110 epoch 7 - iter 2/4 - loss 0.00361089 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,181 epoch 7 - iter 3/4 - loss 0.00637005 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,266 epoch 7 - iter 4/4 - loss 0.00632736 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,267 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:30,268 EPOCH 7 done: loss 0.0063 - lr 0.0200000\n",
+      "2021-09-21 19:15:30,268 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:15:30,270 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:30,371 epoch 8 - iter 1/4 - loss 0.00128050 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,438 epoch 8 - iter 2/4 - loss 0.00911462 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,525 epoch 8 - iter 3/4 - loss 0.00632063 - samples/sec: 11.58 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,616 epoch 8 - iter 4/4 - loss 0.00554431 - samples/sec: 11.08 - lr: 0.020000\n",
+      "2021-09-21 19:15:30,617 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:30,617 EPOCH 8 done: loss 0.0055 - lr 0.0200000\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:15:30,617 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:15:30,625 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:30,735 epoch 9 - iter 1/4 - loss 0.00230101 - samples/sec: 14.69 - lr: 0.010000\n",
+      "2021-09-21 19:15:30,821 epoch 9 - iter 2/4 - loss 0.00335613 - samples/sec: 11.73 - lr: 0.010000\n",
+      "2021-09-21 19:15:30,880 epoch 9 - iter 3/4 - loss 0.00279861 - samples/sec: 16.96 - lr: 0.010000\n",
+      "2021-09-21 19:15:30,952 epoch 9 - iter 4/4 - loss 0.00240804 - samples/sec: 14.12 - lr: 0.010000\n",
+      "2021-09-21 19:15:30,953 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:30,953 EPOCH 9 done: loss 0.0024 - lr 0.0100000\n",
+      "2021-09-21 19:15:30,953 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:15:30,959 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:11,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,703 epoch 10 - iter 1/4 - loss 0.02315517 - samples/sec: 16.97 - lr: 0.005000\n",
-      "2021-09-08 10:50:11,765 epoch 10 - iter 2/4 - loss 0.06375698 - samples/sec: 16.19 - lr: 0.005000\n",
-      "2021-09-08 10:50:11,820 epoch 10 - iter 3/4 - loss 0.04365858 - samples/sec: 18.38 - lr: 0.005000\n",
-      "2021-09-08 10:50:11,881 epoch 10 - iter 4/4 - loss 0.03446719 - samples/sec: 16.65 - lr: 0.005000\n",
-      "2021-09-08 10:50:11,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,883 EPOCH 10 done: loss 0.0345 - lr 0.0050000\n",
-      "2021-09-08 10:50:11,883 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:34,139 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:50:40,648 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:15:31,090 epoch 10 - iter 1/4 - loss 0.00359663 - samples/sec: 12.98 - lr: 0.010000\n",
+      "2021-09-21 19:15:31,171 epoch 10 - iter 2/4 - loss 0.00282805 - samples/sec: 12.44 - lr: 0.010000\n",
+      "2021-09-21 19:15:31,251 epoch 10 - iter 3/4 - loss 0.00806495 - samples/sec: 12.46 - lr: 0.010000\n",
+      "2021-09-21 19:15:31,345 epoch 10 - iter 4/4 - loss 0.00643193 - samples/sec: 10.75 - lr: 0.010000\n",
+      "2021-09-21 19:15:31,346 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:15:31,346 EPOCH 10 done: loss 0.0064 - lr 0.0100000\n",
+      "2021-09-21 19:15:31,346 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:15:48,762 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:15:55,942 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:50:45,107 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:16:00,353 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 19807.81it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 20092.47it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:45,109 [b'sadness', b'optimism', b'anger', b'joy']\n",
-      "2021-09-08 10:50:45,251 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,253 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:16:00,355 [b'sadness', b'optimism', b'anger', b'joy']\n",
+      "2021-09-21 19:16:00,364 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,365 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2197,24 +2191,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:45,253 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,254 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:50:45,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,254 Parameters:\n",
-      "2021-09-08 10:50:45,255  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:50:45,255  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:50:45,255  - patience: \"3\"\n",
-      "2021-09-08 10:50:45,256  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:50:45,256  - max_epochs: \"10\"\n",
-      "2021-09-08 10:50:45,257  - shuffle: \"True\"\n",
-      "2021-09-08 10:50:45,257  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:50:45,257  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:50:45,258 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,258 Model training base path: \"None1\"\n",
-      "2021-09-08 10:50:45,258 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,259 Device: cuda:1\n",
-      "2021-09-08 10:50:45,259 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,259 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:16:00,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,366 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:16:00,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,367 Parameters:\n",
+      "2021-09-21 19:16:00,367  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:16:00,367  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:16:00,368  - patience: \"3\"\n",
+      "2021-09-21 19:16:00,368  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:16:00,368  - max_epochs: \"10\"\n",
+      "2021-09-21 19:16:00,368  - shuffle: \"True\"\n",
+      "2021-09-21 19:16:00,369  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:16:00,369  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:16:00,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,369 Model training base path: \"None1\"\n",
+      "2021-09-21 19:16:00,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,370 Device: cuda:0\n",
+      "2021-09-21 19:16:00,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,371 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:16:00,377 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,467 epoch 1 - iter 1/4 - loss 0.65484267 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 19:16:00,544 epoch 1 - iter 2/4 - loss 0.95204732 - samples/sec: 13.01 - lr: 0.020000\n"
      ]
     },
     {
@@ -2228,91 +2225,88 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:45,450 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,521 epoch 1 - iter 1/4 - loss 0.04117078 - samples/sec: 18.88 - lr: 0.020000\n",
-      "2021-09-08 10:50:45,579 epoch 1 - iter 2/4 - loss 1.57246682 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 10:50:45,642 epoch 1 - iter 3/4 - loss 1.10010732 - samples/sec: 16.08 - lr: 0.020000\n",
-      "2021-09-08 10:50:45,702 epoch 1 - iter 4/4 - loss 0.97098663 - samples/sec: 16.97 - lr: 0.020000\n",
-      "2021-09-08 10:50:45,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:45,703 EPOCH 1 done: loss 0.9710 - lr 0.0200000\n",
-      "2021-09-08 10:50:45,704 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:16:00,622 epoch 1 - iter 3/4 - loss 0.64259303 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 19:16:00,703 epoch 1 - iter 4/4 - loss 0.73839975 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 19:16:00,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:00,705 EPOCH 1 done: loss 0.7384 - lr 0.0200000\n",
+      "2021-09-21 19:16:00,705 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:50:52,076 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,159 epoch 2 - iter 1/4 - loss 2.01574111 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,220 epoch 2 - iter 2/4 - loss 1.27221212 - samples/sec: 16.71 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,278 epoch 2 - iter 3/4 - loss 1.03537313 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,336 epoch 2 - iter 4/4 - loss 0.83598063 - samples/sec: 17.34 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,337 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,338 EPOCH 2 done: loss 0.8360 - lr 0.0200000\n",
-      "2021-09-08 10:50:52,338 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:52,341 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,417 epoch 3 - iter 1/4 - loss 0.02609970 - samples/sec: 17.50 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,477 epoch 3 - iter 2/4 - loss 0.27653025 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,540 epoch 3 - iter 3/4 - loss 0.21293912 - samples/sec: 16.00 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,595 epoch 3 - iter 4/4 - loss 0.17980528 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,597 EPOCH 3 done: loss 0.1798 - lr 0.0200000\n",
-      "2021-09-08 10:50:52,597 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:52,599 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,683 epoch 4 - iter 1/4 - loss 0.01231149 - samples/sec: 16.04 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,741 epoch 4 - iter 2/4 - loss 0.55595572 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,802 epoch 4 - iter 3/4 - loss 0.38041173 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,858 epoch 4 - iter 4/4 - loss 0.29469042 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,859 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,859 EPOCH 4 done: loss 0.2947 - lr 0.0200000\n",
-      "2021-09-08 10:50:52,859 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:52,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,941 epoch 5 - iter 1/4 - loss 0.00754189 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,997 epoch 5 - iter 2/4 - loss 0.00702565 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 10:50:53,060 epoch 5 - iter 3/4 - loss 0.24964648 - samples/sec: 16.10 - lr: 0.020000\n",
-      "2021-09-08 10:50:53,120 epoch 5 - iter 4/4 - loss 0.19095418 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 10:50:53,121 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,121 EPOCH 5 done: loss 0.1910 - lr 0.0200000\n",
+      "2021-09-21 19:16:06,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:06,394 epoch 2 - iter 1/4 - loss 0.32836190 - samples/sec: 18.95 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,459 epoch 2 - iter 2/4 - loss 0.63887362 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,531 epoch 2 - iter 3/4 - loss 0.57778310 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,588 epoch 2 - iter 4/4 - loss 0.44071601 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,589 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:06,589 EPOCH 2 done: loss 0.4407 - lr 0.0200000\n",
+      "2021-09-21 19:16:06,589 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:16:06,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:06,753 epoch 3 - iter 1/4 - loss 0.43572617 - samples/sec: 9.47 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,843 epoch 3 - iter 2/4 - loss 0.34291391 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,915 epoch 3 - iter 3/4 - loss 0.23095812 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,985 epoch 3 - iter 4/4 - loss 0.54374538 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 19:16:06,986 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:06,986 EPOCH 3 done: loss 0.5437 - lr 0.0200000\n",
+      "2021-09-21 19:16:06,986 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:16:06,989 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:07,093 epoch 4 - iter 1/4 - loss 0.00220386 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,188 epoch 4 - iter 2/4 - loss 0.07697408 - samples/sec: 10.61 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,281 epoch 4 - iter 3/4 - loss 0.18704616 - samples/sec: 10.85 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,363 epoch 4 - iter 4/4 - loss 0.32260362 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,364 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:07,364 EPOCH 4 done: loss 0.3226 - lr 0.0200000\n",
+      "2021-09-21 19:16:07,364 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:16:07,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:07,499 epoch 5 - iter 1/4 - loss 0.00106820 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,590 epoch 5 - iter 2/4 - loss 0.11380107 - samples/sec: 11.10 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,675 epoch 5 - iter 3/4 - loss 0.28335883 - samples/sec: 11.73 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,765 epoch 5 - iter 4/4 - loss 0.23413350 - samples/sec: 11.18 - lr: 0.020000\n",
+      "2021-09-21 19:16:07,766 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:07,767 EPOCH 5 done: loss 0.2341 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:50:53,122 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:50:53,124 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,207 epoch 6 - iter 1/4 - loss 0.00531752 - samples/sec: 17.71 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,266 epoch 6 - iter 2/4 - loss 0.00595630 - samples/sec: 17.16 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,319 epoch 6 - iter 3/4 - loss 0.01287017 - samples/sec: 18.97 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,378 epoch 6 - iter 4/4 - loss 0.19033231 - samples/sec: 17.29 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,379 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,380 EPOCH 6 done: loss 0.1903 - lr 0.0100000\n",
-      "2021-09-08 10:50:53,380 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:53,382 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,462 epoch 7 - iter 1/4 - loss 0.01344742 - samples/sec: 17.21 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,517 epoch 7 - iter 2/4 - loss 0.01256991 - samples/sec: 18.10 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,584 epoch 7 - iter 3/4 - loss 0.22149595 - samples/sec: 15.11 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,642 epoch 7 - iter 4/4 - loss 0.16773511 - samples/sec: 17.65 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,643 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,643 EPOCH 7 done: loss 0.1677 - lr 0.0100000\n",
-      "2021-09-08 10:50:53,644 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:53,646 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,721 epoch 8 - iter 1/4 - loss 0.00847603 - samples/sec: 18.44 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,780 epoch 8 - iter 2/4 - loss 0.14499510 - samples/sec: 17.26 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,835 epoch 8 - iter 3/4 - loss 0.09968615 - samples/sec: 18.32 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,896 epoch 8 - iter 4/4 - loss 0.07569905 - samples/sec: 16.58 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,898 EPOCH 8 done: loss 0.0757 - lr 0.0100000\n",
-      "2021-09-08 10:50:53,898 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:53,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:54,055 epoch 9 - iter 1/4 - loss 0.00816911 - samples/sec: 17.90 - lr: 0.010000\n",
-      "2021-09-08 10:50:54,115 epoch 9 - iter 2/4 - loss 0.00457566 - samples/sec: 16.93 - lr: 0.010000\n",
-      "2021-09-08 10:50:54,171 epoch 9 - iter 3/4 - loss 0.00412687 - samples/sec: 18.11 - lr: 0.010000\n",
-      "2021-09-08 10:50:54,231 epoch 9 - iter 4/4 - loss 0.09522136 - samples/sec: 16.72 - lr: 0.010000\n",
-      "2021-09-08 10:50:54,232 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:54,233 EPOCH 9 done: loss 0.0952 - lr 0.0100000\n",
+      "2021-09-21 19:16:07,767 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:16:07,769 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:07,862 epoch 6 - iter 1/4 - loss 0.00131769 - samples/sec: 14.88 - lr: 0.010000\n",
+      "2021-09-21 19:16:07,941 epoch 6 - iter 2/4 - loss 0.06147998 - samples/sec: 12.70 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,019 epoch 6 - iter 3/4 - loss 0.11624599 - samples/sec: 12.80 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,101 epoch 6 - iter 4/4 - loss 0.18390245 - samples/sec: 12.36 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,102 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:08,102 EPOCH 6 done: loss 0.1839 - lr 0.0100000\n",
+      "2021-09-21 19:16:08,102 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:16:08,106 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:08,247 epoch 7 - iter 1/4 - loss 0.02361200 - samples/sec: 12.22 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,346 epoch 7 - iter 2/4 - loss 0.49513056 - samples/sec: 10.14 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,433 epoch 7 - iter 3/4 - loss 0.33127266 - samples/sec: 11.54 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,516 epoch 7 - iter 4/4 - loss 0.32253830 - samples/sec: 12.13 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,517 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:08,517 EPOCH 7 done: loss 0.3225 - lr 0.0100000\n",
+      "2021-09-21 19:16:08,517 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:16:08,520 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:08,666 epoch 8 - iter 1/4 - loss 0.00055154 - samples/sec: 11.31 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,733 epoch 8 - iter 2/4 - loss 0.17500934 - samples/sec: 15.02 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,801 epoch 8 - iter 3/4 - loss 0.12908995 - samples/sec: 14.83 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,894 epoch 8 - iter 4/4 - loss 0.11183107 - samples/sec: 10.77 - lr: 0.010000\n",
+      "2021-09-21 19:16:08,895 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:08,896 EPOCH 8 done: loss 0.1118 - lr 0.0100000\n",
+      "2021-09-21 19:16:08,896 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:16:08,898 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:09,015 epoch 9 - iter 1/4 - loss 0.10209224 - samples/sec: 11.84 - lr: 0.010000\n",
+      "2021-09-21 19:16:09,095 epoch 9 - iter 2/4 - loss 0.05804429 - samples/sec: 12.63 - lr: 0.010000\n",
+      "2021-09-21 19:16:09,166 epoch 9 - iter 3/4 - loss 0.03901625 - samples/sec: 14.19 - lr: 0.010000\n",
+      "2021-09-21 19:16:09,240 epoch 9 - iter 4/4 - loss 0.08745791 - samples/sec: 13.61 - lr: 0.010000\n",
+      "2021-09-21 19:16:09,241 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:09,241 EPOCH 9 done: loss 0.0875 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:50:54,233 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:50:54,309 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:54,393 epoch 10 - iter 1/4 - loss 0.00493351 - samples/sec: 16.07 - lr: 0.005000\n",
-      "2021-09-08 10:50:54,448 epoch 10 - iter 2/4 - loss 0.00769085 - samples/sec: 18.30 - lr: 0.005000\n",
-      "2021-09-08 10:50:54,504 epoch 10 - iter 3/4 - loss 0.00576783 - samples/sec: 18.18 - lr: 0.005000\n",
-      "2021-09-08 10:50:54,564 epoch 10 - iter 4/4 - loss 0.10428722 - samples/sec: 16.94 - lr: 0.005000\n",
-      "2021-09-08 10:50:54,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:54,565 EPOCH 10 done: loss 0.1043 - lr 0.0050000\n",
-      "2021-09-08 10:50:54,566 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:00,519 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.5729119638826184\n"
+      "2021-09-21 19:16:09,241 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:16:09,243 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:09,366 epoch 10 - iter 1/4 - loss 0.00216976 - samples/sec: 12.49 - lr: 0.005000\n",
+      "2021-09-21 19:16:09,434 epoch 10 - iter 2/4 - loss 0.02198506 - samples/sec: 14.63 - lr: 0.005000\n",
+      "2021-09-21 19:16:09,498 epoch 10 - iter 3/4 - loss 0.02513035 - samples/sec: 15.92 - lr: 0.005000\n",
+      "2021-09-21 19:16:09,579 epoch 10 - iter 4/4 - loss 0.02994014 - samples/sec: 12.33 - lr: 0.005000\n",
+      "2021-09-21 19:16:09,580 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:09,581 EPOCH 10 done: loss 0.0299 - lr 0.0050000\n",
+      "2021-09-21 19:16:09,581 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:16:13,871 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.5697516930022574\n"
      ]
     }
    ],
@@ -2384,11 +2378,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "0c4025f0",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.5417607223476298, 0.5846501128668171, 0.5936794582392777, 0.5643340857787811, 0.5643340857787811]\n",
+      "0.01809251910565586\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -2400,7 +2406,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2408,25 +2414,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:08,131 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:16:25,125 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:51:12,505 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:16:29,391 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 47127.01it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 39527.47it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:12,508 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 10:51:12,585 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:12,587 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:16:29,393 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 19:16:29,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:29,404 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2739,24 +2745,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:12,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:12,587 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 10:51:12,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:12,588 Parameters:\n",
-      "2021-09-08 10:51:12,588  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:51:12,589  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:51:12,589  - patience: \"3\"\n",
-      "2021-09-08 10:51:12,589  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:51:12,590  - max_epochs: \"10\"\n",
-      "2021-09-08 10:51:12,590  - shuffle: \"True\"\n",
-      "2021-09-08 10:51:12,591  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:51:12,591  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:51:12,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:12,591 Model training base path: \"None1\"\n",
-      "2021-09-08 10:51:12,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:12,592 Device: cuda:1\n",
-      "2021-09-08 10:51:12,593 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:12,593 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:16:29,405 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:29,405 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 19:16:29,406 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:29,406 Parameters:\n",
+      "2021-09-21 19:16:29,406  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:16:29,406  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:16:29,407  - patience: \"3\"\n",
+      "2021-09-21 19:16:29,407  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:16:29,407  - max_epochs: \"10\"\n",
+      "2021-09-21 19:16:29,408  - shuffle: \"True\"\n",
+      "2021-09-21 19:16:29,408  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:16:29,408  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:16:29,408 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:29,409 Model training base path: \"None1\"\n",
+      "2021-09-21 19:16:29,409 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:29,409 Device: cuda:0\n",
+      "2021-09-21 19:16:29,409 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:29,410 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:16:29,417 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -2770,210 +2777,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:13,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:13,404 epoch 1 - iter 3/32 - loss 0.93428632 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,555 epoch 1 - iter 6/32 - loss 0.71609093 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,710 epoch 1 - iter 9/32 - loss 0.75153311 - samples/sec: 19.42 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,875 epoch 1 - iter 12/32 - loss 0.87777568 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,045 epoch 1 - iter 15/32 - loss 0.80559927 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,204 epoch 1 - iter 18/32 - loss 0.85125591 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,363 epoch 1 - iter 21/32 - loss 0.78918909 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,515 epoch 1 - iter 24/32 - loss 0.74277569 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,674 epoch 1 - iter 27/32 - loss 0.77361769 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,834 epoch 1 - iter 30/32 - loss 0.74018598 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,937 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:14,938 EPOCH 1 done: loss 0.7016 - lr 0.0200000\n",
-      "2021-09-08 10:51:15,017 DEV : loss 1.5290112495422363 - score 0.0\n",
-      "2021-09-08 10:51:15,018 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:51:21,702 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:21,858 epoch 2 - iter 3/32 - loss 1.18338470 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 10:51:21,998 epoch 2 - iter 6/32 - loss 0.81534112 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 10:51:22,138 epoch 2 - iter 9/32 - loss 0.65456769 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 10:51:22,278 epoch 2 - iter 12/32 - loss 0.66561709 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 10:51:22,417 epoch 2 - iter 15/32 - loss 0.65164584 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 10:51:22,560 epoch 2 - iter 18/32 - loss 0.62241871 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 10:51:22,711 epoch 2 - iter 21/32 - loss 0.63957937 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 10:51:22,857 epoch 2 - iter 24/32 - loss 0.63261301 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 10:51:23,000 epoch 2 - iter 27/32 - loss 0.61510329 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 10:51:23,143 epoch 2 - iter 30/32 - loss 0.60354965 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 10:51:23,240 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:23,241 EPOCH 2 done: loss 0.5822 - lr 0.0200000\n",
-      "2021-09-08 10:51:23,535 DEV : loss 0.7530866265296936 - score 0.75\n",
-      "2021-09-08 10:51:23,536 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:16:29,658 epoch 1 - iter 3/32 - loss 0.52732796 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 19:16:29,861 epoch 1 - iter 6/32 - loss 0.33702789 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 19:16:30,064 epoch 1 - iter 9/32 - loss 0.54609240 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 19:16:30,325 epoch 1 - iter 12/32 - loss 0.65740089 - samples/sec: 11.49 - lr: 0.020000\n",
+      "2021-09-21 19:16:30,541 epoch 1 - iter 15/32 - loss 0.71830430 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 19:16:30,787 epoch 1 - iter 18/32 - loss 0.67143990 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 19:16:31,009 epoch 1 - iter 21/32 - loss 0.65319988 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 19:16:31,226 epoch 1 - iter 24/32 - loss 0.57361924 - samples/sec: 13.89 - lr: 0.020000\n",
+      "2021-09-21 19:16:31,441 epoch 1 - iter 27/32 - loss 0.54172017 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 19:16:31,679 epoch 1 - iter 30/32 - loss 0.72638579 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 19:16:31,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:31,828 EPOCH 1 done: loss 0.7459 - lr 0.0200000\n",
+      "2021-09-21 19:16:31,990 DEV : loss 0.8095675706863403 - score 0.0\n",
+      "2021-09-21 19:16:31,990 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:29,172 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:29,327 epoch 3 - iter 3/32 - loss 0.54726383 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:51:29,466 epoch 3 - iter 6/32 - loss 0.44232629 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 10:51:29,604 epoch 3 - iter 9/32 - loss 0.35324845 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 10:51:29,742 epoch 3 - iter 12/32 - loss 0.34846343 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 10:51:29,881 epoch 3 - iter 15/32 - loss 0.30235754 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 10:51:30,020 epoch 3 - iter 18/32 - loss 0.35768068 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 10:51:30,155 epoch 3 - iter 21/32 - loss 0.35492468 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 10:51:30,293 epoch 3 - iter 24/32 - loss 0.37338189 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 10:51:30,429 epoch 3 - iter 27/32 - loss 0.39579807 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 10:51:30,564 epoch 3 - iter 30/32 - loss 0.41228558 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 10:51:30,660 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:30,660 EPOCH 3 done: loss 0.4263 - lr 0.0200000\n",
-      "2021-09-08 10:51:30,841 DEV : loss 0.9466863870620728 - score 0.25\n",
-      "2021-09-08 10:51:30,842 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:30,931 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:31,082 epoch 4 - iter 3/32 - loss 0.49649755 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 10:51:31,219 epoch 4 - iter 6/32 - loss 0.33493550 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 10:51:31,351 epoch 4 - iter 9/32 - loss 0.24133109 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 10:51:31,483 epoch 4 - iter 12/32 - loss 0.19199407 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 10:51:31,620 epoch 4 - iter 15/32 - loss 0.21892086 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 10:51:31,755 epoch 4 - iter 18/32 - loss 0.21141429 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 10:51:31,889 epoch 4 - iter 21/32 - loss 0.26803631 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 10:51:32,021 epoch 4 - iter 24/32 - loss 0.27668572 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 10:51:32,152 epoch 4 - iter 27/32 - loss 0.28529531 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 10:51:32,287 epoch 4 - iter 30/32 - loss 0.31712183 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 10:51:32,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:32,381 EPOCH 4 done: loss 0.3360 - lr 0.0200000\n",
-      "2021-09-08 10:51:33,340 DEV : loss 0.5571861863136292 - score 0.75\n",
-      "2021-09-08 10:51:33,341 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:16:36,164 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:36,430 epoch 2 - iter 3/32 - loss 0.72192355 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 19:16:36,667 epoch 2 - iter 6/32 - loss 0.37869290 - samples/sec: 12.66 - lr: 0.020000\n",
+      "2021-09-21 19:16:36,901 epoch 2 - iter 9/32 - loss 0.39632695 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 19:16:37,128 epoch 2 - iter 12/32 - loss 0.43744296 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 19:16:37,345 epoch 2 - iter 15/32 - loss 0.40574741 - samples/sec: 13.87 - lr: 0.020000\n",
+      "2021-09-21 19:16:37,582 epoch 2 - iter 18/32 - loss 0.37877226 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 19:16:37,830 epoch 2 - iter 21/32 - loss 0.44008398 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 19:16:38,062 epoch 2 - iter 24/32 - loss 0.52904613 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 19:16:38,265 epoch 2 - iter 27/32 - loss 0.48416576 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 19:16:38,480 epoch 2 - iter 30/32 - loss 0.52222573 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 19:16:38,628 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:38,628 EPOCH 2 done: loss 0.5159 - lr 0.0200000\n",
+      "2021-09-21 19:16:38,762 DEV : loss 0.663748025894165 - score 0.25\n",
+      "2021-09-21 19:16:38,763 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:43,973 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:44,128 epoch 5 - iter 3/32 - loss 0.83450648 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 10:51:44,264 epoch 5 - iter 6/32 - loss 0.44710719 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 10:51:44,405 epoch 5 - iter 9/32 - loss 0.40280350 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:51:44,539 epoch 5 - iter 12/32 - loss 0.31825280 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 10:51:44,673 epoch 5 - iter 15/32 - loss 0.33802234 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 10:51:44,808 epoch 5 - iter 18/32 - loss 0.30462468 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 10:51:44,943 epoch 5 - iter 21/32 - loss 0.28574780 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,100 epoch 5 - iter 24/32 - loss 0.42909253 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,246 epoch 5 - iter 27/32 - loss 0.38231522 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,407 epoch 5 - iter 30/32 - loss 0.35427971 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,501 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,501 EPOCH 5 done: loss 0.3865 - lr 0.0200000\n",
-      "2021-09-08 10:51:45,573 DEV : loss 0.9233506917953491 - score 0.5\n",
-      "2021-09-08 10:51:45,573 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:45,584 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,749 epoch 6 - iter 3/32 - loss 0.08430526 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,884 epoch 6 - iter 6/32 - loss 0.05352155 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,027 epoch 6 - iter 9/32 - loss 0.04487077 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,164 epoch 6 - iter 12/32 - loss 0.22743757 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,297 epoch 6 - iter 15/32 - loss 0.20476608 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,432 epoch 6 - iter 18/32 - loss 0.18357535 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,563 epoch 6 - iter 21/32 - loss 0.15909284 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,698 epoch 6 - iter 24/32 - loss 0.21196371 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,833 epoch 6 - iter 27/32 - loss 0.21117632 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 10:51:46,962 epoch 6 - iter 30/32 - loss 0.19072705 - samples/sec: 23.49 - lr: 0.020000\n"
+      "2021-09-21 19:16:45,114 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:45,408 epoch 3 - iter 3/32 - loss 0.47796092 - samples/sec: 11.43 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,713 epoch 3 - iter 6/32 - loss 0.29761479 - samples/sec: 9.86 - lr: 0.020000\n",
+      "2021-09-21 19:16:45,964 epoch 3 - iter 9/32 - loss 0.25012971 - samples/sec: 11.98 - lr: 0.020000\n",
+      "2021-09-21 19:16:46,185 epoch 3 - iter 12/32 - loss 0.31736097 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 19:16:46,402 epoch 3 - iter 15/32 - loss 0.27452734 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 19:16:46,605 epoch 3 - iter 18/32 - loss 0.24165304 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 19:16:46,827 epoch 3 - iter 21/32 - loss 0.35458832 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 19:16:47,021 epoch 3 - iter 24/32 - loss 0.37457029 - samples/sec: 15.45 - lr: 0.020000\n",
+      "2021-09-21 19:16:47,241 epoch 3 - iter 27/32 - loss 0.39165472 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 19:16:47,483 epoch 3 - iter 30/32 - loss 0.39657286 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 19:16:47,630 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:47,631 EPOCH 3 done: loss 0.3775 - lr 0.0200000\n",
+      "2021-09-21 19:16:47,858 DEV : loss 1.4352037906646729 - score 0.25\n",
+      "2021-09-21 19:16:47,861 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:16:47,937 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:48,189 epoch 4 - iter 3/32 - loss 0.55524567 - samples/sec: 13.03 - lr: 0.020000\n",
+      "2021-09-21 19:16:48,405 epoch 4 - iter 6/32 - loss 0.36351605 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 19:16:48,638 epoch 4 - iter 9/32 - loss 0.31052692 - samples/sec: 12.89 - lr: 0.020000\n",
+      "2021-09-21 19:16:48,869 epoch 4 - iter 12/32 - loss 0.29399901 - samples/sec: 13.03 - lr: 0.020000\n",
+      "2021-09-21 19:16:49,069 epoch 4 - iter 15/32 - loss 0.24047328 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 19:16:49,298 epoch 4 - iter 18/32 - loss 0.21418106 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 19:16:49,514 epoch 4 - iter 21/32 - loss 0.27782683 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 19:16:49,743 epoch 4 - iter 24/32 - loss 0.24632581 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 19:16:49,976 epoch 4 - iter 27/32 - loss 0.31356657 - samples/sec: 12.92 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,237 epoch 4 - iter 30/32 - loss 0.28391537 - samples/sec: 11.53 - lr: 0.020000\n",
+      "2021-09-21 19:16:50,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:50,414 EPOCH 4 done: loss 0.3128 - lr 0.0200000\n",
+      "2021-09-21 19:16:50,619 DEV : loss 0.8043038249015808 - score 0.25\n",
+      "2021-09-21 19:16:50,622 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:16:50,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:50,897 epoch 5 - iter 3/32 - loss 0.30525591 - samples/sec: 12.50 - lr: 0.020000\n",
+      "2021-09-21 19:16:51,124 epoch 5 - iter 6/32 - loss 0.16279014 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 19:16:51,338 epoch 5 - iter 9/32 - loss 0.36245785 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 19:16:51,561 epoch 5 - iter 12/32 - loss 0.36233851 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 19:16:51,765 epoch 5 - iter 15/32 - loss 0.29795416 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 19:16:51,976 epoch 5 - iter 18/32 - loss 0.24973451 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 19:16:52,198 epoch 5 - iter 21/32 - loss 0.22782703 - samples/sec: 13.54 - lr: 0.020000\n",
+      "2021-09-21 19:16:52,389 epoch 5 - iter 24/32 - loss 0.21612484 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 19:16:52,612 epoch 5 - iter 27/32 - loss 0.19555192 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 19:16:52,817 epoch 5 - iter 30/32 - loss 0.17628467 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 19:16:52,977 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:52,978 EPOCH 5 done: loss 0.1682 - lr 0.0200000\n",
+      "2021-09-21 19:16:53,188 DEV : loss 2.285797357559204 - score 0.25\n",
+      "2021-09-21 19:16:53,189 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:16:53,267 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:16:53,510 epoch 6 - iter 3/32 - loss 0.16632987 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 19:16:53,701 epoch 6 - iter 6/32 - loss 0.08574588 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 19:16:53,903 epoch 6 - iter 9/32 - loss 0.05939947 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 19:16:54,132 epoch 6 - iter 12/32 - loss 0.04896118 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 19:16:54,351 epoch 6 - iter 15/32 - loss 0.03976360 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 19:16:54,550 epoch 6 - iter 18/32 - loss 0.03360026 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 19:16:54,738 epoch 6 - iter 21/32 - loss 0.02909115 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 19:16:54,924 epoch 6 - iter 24/32 - loss 0.10794866 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 19:16:55,105 epoch 6 - iter 27/32 - loss 0.11785317 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 19:16:55,283 epoch 6 - iter 30/32 - loss 0.14826503 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 19:16:55,416 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:47,051 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:47,051 EPOCH 6 done: loss 0.1926 - lr 0.0200000\n",
-      "2021-09-08 10:51:47,210 DEV : loss 0.8695818185806274 - score 0.5\n",
-      "2021-09-08 10:51:47,211 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:51:47,297 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:47,439 epoch 7 - iter 3/32 - loss 0.00226447 - samples/sec: 23.44 - lr: 0.020000\n",
-      "2021-09-08 10:51:47,569 epoch 7 - iter 6/32 - loss 0.00254617 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 10:51:47,707 epoch 7 - iter 9/32 - loss 0.22920624 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 10:51:47,845 epoch 7 - iter 12/32 - loss 0.20681767 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 10:51:47,973 epoch 7 - iter 15/32 - loss 0.16961336 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 10:51:48,108 epoch 7 - iter 18/32 - loss 0.15544825 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 10:51:48,239 epoch 7 - iter 21/32 - loss 0.19482230 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 10:51:48,372 epoch 7 - iter 24/32 - loss 0.21010255 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 10:51:48,507 epoch 7 - iter 27/32 - loss 0.23572136 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 10:51:48,642 epoch 7 - iter 30/32 - loss 0.22080879 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 10:51:48,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:48,729 EPOCH 7 done: loss 0.2078 - lr 0.0200000\n",
-      "2021-09-08 10:51:48,893 DEV : loss 0.43090659379959106 - score 0.5\n",
-      "2021-09-08 10:51:48,893 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:51:48,980 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:49,131 epoch 8 - iter 3/32 - loss 0.60652302 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 10:51:49,262 epoch 8 - iter 6/32 - loss 0.31750659 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 10:51:49,402 epoch 8 - iter 9/32 - loss 0.33980674 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 10:51:49,533 epoch 8 - iter 12/32 - loss 0.26132026 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 10:51:49,665 epoch 8 - iter 15/32 - loss 0.24054201 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 10:51:49,797 epoch 8 - iter 18/32 - loss 0.20390159 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 10:51:49,926 epoch 8 - iter 21/32 - loss 0.17492344 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 10:51:50,060 epoch 8 - iter 24/32 - loss 0.15793253 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 10:51:50,195 epoch 8 - iter 27/32 - loss 0.14524385 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 10:51:50,327 epoch 8 - iter 30/32 - loss 0.17582032 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 10:51:50,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:50,417 EPOCH 8 done: loss 0.1656 - lr 0.0200000\n",
-      "2021-09-08 10:51:51,017 DEV : loss 1.6440520286560059 - score 0.25\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:51:51,018 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:51:51,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:51,268 epoch 9 - iter 3/32 - loss 0.07705724 - samples/sec: 18.89 - lr: 0.010000\n",
-      "2021-09-08 10:51:51,422 epoch 9 - iter 6/32 - loss 0.03951700 - samples/sec: 19.49 - lr: 0.010000\n",
-      "2021-09-08 10:51:51,555 epoch 9 - iter 9/32 - loss 0.17192298 - samples/sec: 22.81 - lr: 0.010000\n",
-      "2021-09-08 10:51:51,683 epoch 9 - iter 12/32 - loss 0.12970753 - samples/sec: 23.44 - lr: 0.010000\n",
-      "2021-09-08 10:51:51,815 epoch 9 - iter 15/32 - loss 0.11295785 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 10:51:51,953 epoch 9 - iter 18/32 - loss 0.10683915 - samples/sec: 21.93 - lr: 0.010000\n",
-      "2021-09-08 10:51:52,081 epoch 9 - iter 21/32 - loss 0.09174239 - samples/sec: 23.47 - lr: 0.010000\n",
-      "2021-09-08 10:51:52,227 epoch 9 - iter 24/32 - loss 0.08278573 - samples/sec: 20.54 - lr: 0.010000\n",
-      "2021-09-08 10:51:52,371 epoch 9 - iter 27/32 - loss 0.07602797 - samples/sec: 21.00 - lr: 0.010000\n",
-      "2021-09-08 10:51:52,512 epoch 9 - iter 30/32 - loss 0.07277691 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 10:51:52,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:52,610 EPOCH 9 done: loss 0.0690 - lr 0.0100000\n",
-      "2021-09-08 10:51:52,678 DEV : loss 1.1983312368392944 - score 0.25\n",
-      "2021-09-08 10:51:52,679 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:52,760 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:52,910 epoch 10 - iter 3/32 - loss 0.05418855 - samples/sec: 21.96 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,042 epoch 10 - iter 6/32 - loss 0.05453242 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,171 epoch 10 - iter 9/32 - loss 0.03965736 - samples/sec: 23.33 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,303 epoch 10 - iter 12/32 - loss 0.03872512 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,431 epoch 10 - iter 15/32 - loss 0.03307483 - samples/sec: 23.49 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,566 epoch 10 - iter 18/32 - loss 0.12078086 - samples/sec: 22.23 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,695 epoch 10 - iter 21/32 - loss 0.10401336 - samples/sec: 23.32 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,827 epoch 10 - iter 24/32 - loss 0.09153652 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 10:51:53,955 epoch 10 - iter 27/32 - loss 0.08170323 - samples/sec: 23.42 - lr: 0.010000\n",
-      "2021-09-08 10:51:54,084 epoch 10 - iter 30/32 - loss 0.07391036 - samples/sec: 23.41 - lr: 0.010000\n",
-      "2021-09-08 10:51:54,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:54,171 EPOCH 10 done: loss 0.0694 - lr 0.0100000\n",
-      "2021-09-08 10:51:54,338 DEV : loss 0.9976639151573181 - score 0.5\n",
-      "2021-09-08 10:51:54,339 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:51:58,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:58,462 Testing using best model ...\n",
-      "2021-09-08 10:51:58,464 loading file None1/best-model.pt\n",
+      "2021-09-21 19:16:55,416 EPOCH 6 done: loss 0.1422 - lr 0.0200000\n",
+      "2021-09-21 19:16:55,724 DEV : loss 1.2389121055603027 - score 0.75\n",
+      "2021-09-21 19:16:55,725 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:17:04,100 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:04,318 epoch 7 - iter 3/32 - loss 0.09284059 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 19:17:04,484 epoch 7 - iter 6/32 - loss 0.04656289 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 19:17:04,645 epoch 7 - iter 9/32 - loss 0.03283338 - samples/sec: 18.66 - lr: 0.020000\n",
+      "2021-09-21 19:17:04,807 epoch 7 - iter 12/32 - loss 0.02568424 - samples/sec: 18.63 - lr: 0.020000\n",
+      "2021-09-21 19:17:04,977 epoch 7 - iter 15/32 - loss 0.10428986 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 19:17:05,125 epoch 7 - iter 18/32 - loss 0.08761663 - samples/sec: 20.34 - lr: 0.020000\n",
+      "2021-09-21 19:17:05,301 epoch 7 - iter 21/32 - loss 0.08425994 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 19:17:05,462 epoch 7 - iter 24/32 - loss 0.11579406 - samples/sec: 18.71 - lr: 0.020000\n",
+      "2021-09-21 19:17:05,624 epoch 7 - iter 27/32 - loss 0.10369033 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 19:17:05,789 epoch 7 - iter 30/32 - loss 0.09504100 - samples/sec: 18.24 - lr: 0.020000\n",
+      "2021-09-21 19:17:05,902 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:05,903 EPOCH 7 done: loss 0.0891 - lr 0.0200000\n",
+      "2021-09-21 19:17:10,656 DEV : loss 1.1249895095825195 - score 0.5\n",
+      "2021-09-21 19:17:10,656 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:17:10,665 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:10,888 epoch 8 - iter 3/32 - loss 0.00150408 - samples/sec: 14.63 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,120 epoch 8 - iter 6/32 - loss 0.00102809 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,299 epoch 8 - iter 9/32 - loss 0.00134143 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,480 epoch 8 - iter 12/32 - loss 0.00113192 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,707 epoch 8 - iter 15/32 - loss 0.00110394 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 19:17:11,916 epoch 8 - iter 18/32 - loss 0.00113914 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,116 epoch 8 - iter 21/32 - loss 0.00169289 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,336 epoch 8 - iter 24/32 - loss 0.00236778 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,556 epoch 8 - iter 27/32 - loss 0.00227123 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,761 epoch 8 - iter 30/32 - loss 0.00383431 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 19:17:12,907 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:12,907 EPOCH 8 done: loss 0.0053 - lr 0.0200000\n",
+      "2021-09-21 19:17:13,041 DEV : loss 0.730638861656189 - score 0.75\n",
+      "2021-09-21 19:17:13,045 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:17:17,733 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:17,941 epoch 9 - iter 3/32 - loss 0.00146765 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 19:17:18,125 epoch 9 - iter 6/32 - loss 0.00259800 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 19:17:18,293 epoch 9 - iter 9/32 - loss 0.00212761 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 19:17:18,458 epoch 9 - iter 12/32 - loss 0.00163021 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 19:17:18,624 epoch 9 - iter 15/32 - loss 0.00149851 - samples/sec: 18.20 - lr: 0.020000\n",
+      "2021-09-21 19:17:18,817 epoch 9 - iter 18/32 - loss 0.00168330 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 19:17:19,012 epoch 9 - iter 21/32 - loss 0.00162037 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 19:17:19,182 epoch 9 - iter 24/32 - loss 0.00143346 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 19:17:19,347 epoch 9 - iter 27/32 - loss 0.00134534 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 19:17:19,526 epoch 9 - iter 30/32 - loss 0.00126354 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 19:17:19,639 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:19,640 EPOCH 9 done: loss 0.0012 - lr 0.0200000\n",
+      "2021-09-21 19:17:19,854 DEV : loss 0.9255843758583069 - score 0.75\n",
+      "2021-09-21 19:17:19,856 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:17:19,931 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:20,135 epoch 10 - iter 3/32 - loss 0.00057347 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 19:17:20,342 epoch 10 - iter 6/32 - loss 0.00169147 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 19:17:20,521 epoch 10 - iter 9/32 - loss 0.00132105 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 19:17:20,704 epoch 10 - iter 12/32 - loss 0.00112393 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 19:17:20,875 epoch 10 - iter 15/32 - loss 0.00106046 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 19:17:21,045 epoch 10 - iter 18/32 - loss 0.00109425 - samples/sec: 17.73 - lr: 0.020000\n",
+      "2021-09-21 19:17:21,252 epoch 10 - iter 21/32 - loss 0.00100645 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 19:17:21,415 epoch 10 - iter 24/32 - loss 0.00096574 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 19:17:21,607 epoch 10 - iter 27/32 - loss 0.00089479 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 19:17:21,779 epoch 10 - iter 30/32 - loss 0.00085232 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 19:17:21,908 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:21,908 EPOCH 10 done: loss 0.0010 - lr 0.0200000\n",
+      "2021-09-21 19:17:22,013 DEV : loss 0.8299611806869507 - score 0.75\n",
+      "2021-09-21 19:17:22,016 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:17:26,112 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:26,114 Testing using best model ...\n",
+      "2021-09-21 19:17:26,116 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:52:04,101 \t0.75\n",
-      "2021-09-08 10:52:04,101 \n",
+      "2021-09-21 19:17:31,535 \t0.75\n",
+      "2021-09-21 19:17:31,536 \n",
       "Results:\n",
       "- F-score (micro) 0.75\n",
-      "- F-score (macro) 0.6667\n",
+      "- F-score (macro) 0.5\n",
       "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
       " this text expresses sadness     1.0000    1.0000    1.0000         1\n",
-      "this text expresses optimism     1.0000    1.0000    1.0000         1\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      "     this text expresses joy     1.0000    0.5000    0.6667         2\n",
+      "this text expresses optimism     0.0000    0.0000    0.0000         0\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
+      "     this text expresses joy     1.0000    1.0000    1.0000         2\n",
       "\n",
       "                   micro avg     0.7500    0.7500    0.7500         4\n",
-      "                   macro avg     0.7500    0.6250    0.6667         4\n",
-      "                weighted avg     1.0000    0.7500    0.8333         4\n",
+      "                   macro avg     0.5000    0.5000    0.5000         4\n",
+      "                weighted avg     0.7500    0.7500    0.7500         4\n",
       "                 samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 10:52:04,102 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:11,323 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:17:31,536 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:46,807 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:52:15,700 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:17:51,164 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 41607.87it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 44965.74it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:15,703 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 10:52:15,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:15,865 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:17:51,166 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 19:17:51,175 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:51,177 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3286,24 +3292,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:15,866 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:15,866 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 10:52:15,866 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:15,867 Parameters:\n",
-      "2021-09-08 10:52:15,867  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:52:15,867  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:52:15,868  - patience: \"3\"\n",
-      "2021-09-08 10:52:15,868  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:52:15,868  - max_epochs: \"10\"\n",
-      "2021-09-08 10:52:15,868  - shuffle: \"True\"\n",
-      "2021-09-08 10:52:15,869  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:52:15,869  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:52:15,869 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:15,870 Model training base path: \"None1\"\n",
-      "2021-09-08 10:52:15,870 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:15,870 Device: cuda:1\n",
-      "2021-09-08 10:52:15,870 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:15,871 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:17:51,178 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:51,178 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 19:17:51,178 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:51,178 Parameters:\n",
+      "2021-09-21 19:17:51,179  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:17:51,179  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:17:51,179  - patience: \"3\"\n",
+      "2021-09-21 19:17:51,180  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:17:51,180  - max_epochs: \"10\"\n",
+      "2021-09-21 19:17:51,180  - shuffle: \"True\"\n",
+      "2021-09-21 19:17:51,180  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:17:51,181  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:17:51,181 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:51,181 Model training base path: \"None1\"\n",
+      "2021-09-21 19:17:51,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:51,182 Device: cuda:0\n",
+      "2021-09-21 19:17:51,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:51,182 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:17:51,189 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3317,213 +3324,208 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:16,073 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:16,240 epoch 1 - iter 3/32 - loss 0.31973156 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 10:52:16,388 epoch 1 - iter 6/32 - loss 0.32647429 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 10:52:16,543 epoch 1 - iter 9/32 - loss 0.61029188 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 10:52:16,698 epoch 1 - iter 12/32 - loss 0.69584602 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:52:16,859 epoch 1 - iter 15/32 - loss 0.73501327 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 10:52:17,005 epoch 1 - iter 18/32 - loss 0.71813030 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 10:52:17,156 epoch 1 - iter 21/32 - loss 0.64777482 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 10:52:17,304 epoch 1 - iter 24/32 - loss 0.63461624 - samples/sec: 20.41 - lr: 0.020000\n",
-      "2021-09-08 10:52:17,458 epoch 1 - iter 27/32 - loss 0.72209091 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 10:52:17,620 epoch 1 - iter 30/32 - loss 0.74551905 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 10:52:17,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:17,732 EPOCH 1 done: loss 0.7290 - lr 0.0200000\n",
-      "2021-09-08 10:52:17,939 DEV : loss 1.2516758441925049 - score 0.25\n",
-      "2021-09-08 10:52:17,940 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:52:28,380 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,563 epoch 2 - iter 3/32 - loss 1.06098701 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,715 epoch 2 - iter 6/32 - loss 0.87293597 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,877 epoch 2 - iter 9/32 - loss 0.80094756 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,023 epoch 2 - iter 12/32 - loss 0.72234686 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,190 epoch 2 - iter 15/32 - loss 0.66605774 - samples/sec: 18.12 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,346 epoch 2 - iter 18/32 - loss 0.68069064 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,507 epoch 2 - iter 21/32 - loss 0.66354470 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,668 epoch 2 - iter 24/32 - loss 0.68712628 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,805 epoch 2 - iter 27/32 - loss 0.61366510 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,968 epoch 2 - iter 30/32 - loss 0.58838226 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 10:52:30,070 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:30,071 EPOCH 2 done: loss 0.5663 - lr 0.0200000\n",
-      "2021-09-08 10:52:30,253 DEV : loss 0.4962933659553528 - score 0.75\n",
-      "2021-09-08 10:52:30,254 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:52:43,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,381 epoch 3 - iter 3/32 - loss 0.18606297 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,530 epoch 3 - iter 6/32 - loss 0.32513089 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,677 epoch 3 - iter 9/32 - loss 0.26992359 - samples/sec: 20.55 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,830 epoch 3 - iter 12/32 - loss 0.30037499 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,989 epoch 3 - iter 15/32 - loss 0.34457871 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 10:52:44,145 epoch 3 - iter 18/32 - loss 0.33306809 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 10:52:44,304 epoch 3 - iter 21/32 - loss 0.30764103 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 10:52:44,470 epoch 3 - iter 24/32 - loss 0.30610663 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 10:52:44,630 epoch 3 - iter 27/32 - loss 0.37438616 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 10:52:44,777 epoch 3 - iter 30/32 - loss 0.37972009 - samples/sec: 20.57 - lr: 0.020000\n",
-      "2021-09-08 10:52:44,886 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:44,886 EPOCH 3 done: loss 0.3751 - lr 0.0200000\n",
-      "2021-09-08 10:52:45,098 DEV : loss 0.6459803581237793 - score 0.5\n",
-      "2021-09-08 10:52:45,099 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:45,182 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:45,366 epoch 4 - iter 3/32 - loss 0.52723458 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 10:52:45,519 epoch 4 - iter 6/32 - loss 0.31234852 - samples/sec: 19.69 - lr: 0.020000\n",
-      "2021-09-08 10:52:45,673 epoch 4 - iter 9/32 - loss 0.48732595 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:52:45,834 epoch 4 - iter 12/32 - loss 0.40949099 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 10:52:45,992 epoch 4 - iter 15/32 - loss 0.38264600 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 10:52:46,137 epoch 4 - iter 18/32 - loss 0.33404205 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 10:52:46,303 epoch 4 - iter 21/32 - loss 0.38260264 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 10:52:46,462 epoch 4 - iter 24/32 - loss 0.35210626 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 10:52:46,612 epoch 4 - iter 27/32 - loss 0.31370768 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 10:52:46,761 epoch 4 - iter 30/32 - loss 0.29049525 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 10:52:46,864 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:46,864 EPOCH 4 done: loss 0.2814 - lr 0.0200000\n",
-      "2021-09-08 10:52:47,078 DEV : loss 0.3582381010055542 - score 0.75\n",
-      "2021-09-08 10:52:47,079 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:17:51,418 epoch 1 - iter 3/32 - loss 0.39139484 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 19:17:51,636 epoch 1 - iter 6/32 - loss 0.33817501 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 19:17:51,823 epoch 1 - iter 9/32 - loss 0.65664101 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 19:17:52,034 epoch 1 - iter 12/32 - loss 0.75854637 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 19:17:52,252 epoch 1 - iter 15/32 - loss 0.69675433 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 19:17:52,450 epoch 1 - iter 18/32 - loss 0.68226880 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:17:52,667 epoch 1 - iter 21/32 - loss 0.60578482 - samples/sec: 13.86 - lr: 0.020000\n",
+      "2021-09-21 19:17:52,865 epoch 1 - iter 24/32 - loss 0.65655341 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 19:17:53,093 epoch 1 - iter 27/32 - loss 0.68932428 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 19:17:53,293 epoch 1 - iter 30/32 - loss 0.70326892 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 19:17:53,431 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:53,431 EPOCH 1 done: loss 0.6817 - lr 0.0200000\n",
+      "2021-09-21 19:17:53,551 DEV : loss 0.3706888258457184 - score 0.75\n",
+      "2021-09-21 19:17:53,552 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:52:50,842 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:50,995 epoch 5 - iter 3/32 - loss 0.01347825 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 10:52:51,127 epoch 5 - iter 6/32 - loss 0.00975357 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 10:52:51,263 epoch 5 - iter 9/32 - loss 0.03558675 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 10:52:51,402 epoch 5 - iter 12/32 - loss 0.03184419 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 10:52:51,548 epoch 5 - iter 15/32 - loss 0.14438484 - samples/sec: 20.64 - lr: 0.020000\n",
-      "2021-09-08 10:52:51,684 epoch 5 - iter 18/32 - loss 0.19607582 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 10:52:51,817 epoch 5 - iter 21/32 - loss 0.20997219 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 10:52:51,949 epoch 5 - iter 24/32 - loss 0.20168212 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 10:52:52,083 epoch 5 - iter 27/32 - loss 0.18180369 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 10:52:52,222 epoch 5 - iter 30/32 - loss 0.18833367 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 10:52:52,310 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:52,311 EPOCH 5 done: loss 0.1768 - lr 0.0200000\n",
-      "2021-09-08 10:52:52,371 DEV : loss 0.07253511250019073 - score 1.0\n",
-      "2021-09-08 10:52:52,372 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:52:56,444 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:56,626 epoch 6 - iter 3/32 - loss 0.00744048 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 10:52:56,800 epoch 6 - iter 6/32 - loss 0.02138534 - samples/sec: 17.37 - lr: 0.020000\n",
-      "2021-09-08 10:52:56,978 epoch 6 - iter 9/32 - loss 0.14153165 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 10:52:57,144 epoch 6 - iter 12/32 - loss 0.16885283 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 10:52:57,298 epoch 6 - iter 15/32 - loss 0.13557034 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 10:52:57,456 epoch 6 - iter 18/32 - loss 0.21103859 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 10:52:57,610 epoch 6 - iter 21/32 - loss 0.18134297 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 10:52:57,772 epoch 6 - iter 24/32 - loss 0.24195239 - samples/sec: 18.58 - lr: 0.020000\n",
-      "2021-09-08 10:52:57,925 epoch 6 - iter 27/32 - loss 0.21552353 - samples/sec: 19.62 - lr: 0.020000\n"
+      "2021-09-21 19:17:57,678 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:57,890 epoch 2 - iter 3/32 - loss 0.67169846 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 19:17:58,121 epoch 2 - iter 6/32 - loss 0.59971812 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 19:17:58,329 epoch 2 - iter 9/32 - loss 0.80607705 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 19:17:58,543 epoch 2 - iter 12/32 - loss 0.73229034 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 19:17:58,777 epoch 2 - iter 15/32 - loss 0.62177496 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 19:17:58,954 epoch 2 - iter 18/32 - loss 0.56978726 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 19:17:59,156 epoch 2 - iter 21/32 - loss 0.53795469 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 19:17:59,372 epoch 2 - iter 24/32 - loss 0.50548270 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 19:17:59,558 epoch 2 - iter 27/32 - loss 0.48803195 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 19:17:59,755 epoch 2 - iter 30/32 - loss 0.50513493 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 19:17:59,883 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:17:59,884 EPOCH 2 done: loss 0.5318 - lr 0.0200000\n",
+      "2021-09-21 19:18:00,004 DEV : loss 0.4697152078151703 - score 0.75\n",
+      "2021-09-21 19:18:00,006 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:18:00,008 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:00,218 epoch 3 - iter 3/32 - loss 0.08709922 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 19:18:00,418 epoch 3 - iter 6/32 - loss 0.36934538 - samples/sec: 15.08 - lr: 0.020000\n",
+      "2021-09-21 19:18:00,633 epoch 3 - iter 9/32 - loss 0.73740561 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 19:18:00,828 epoch 3 - iter 12/32 - loss 0.60742505 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 19:18:01,015 epoch 3 - iter 15/32 - loss 0.52290573 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 19:18:01,228 epoch 3 - iter 18/32 - loss 0.45908207 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 19:18:01,451 epoch 3 - iter 21/32 - loss 0.43652740 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 19:18:01,684 epoch 3 - iter 24/32 - loss 0.44209507 - samples/sec: 12.90 - lr: 0.020000\n",
+      "2021-09-21 19:18:01,919 epoch 3 - iter 27/32 - loss 0.40224445 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 19:18:02,116 epoch 3 - iter 30/32 - loss 0.36423995 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 19:18:02,244 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:02,244 EPOCH 3 done: loss 0.3421 - lr 0.0200000\n",
+      "2021-09-21 19:18:02,382 DEV : loss 1.2029602527618408 - score 0.5\n",
+      "2021-09-21 19:18:02,383 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:18:02,385 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:02,598 epoch 4 - iter 3/32 - loss 0.00348854 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 19:18:02,825 epoch 4 - iter 6/32 - loss 0.00329334 - samples/sec: 13.27 - lr: 0.020000\n",
+      "2021-09-21 19:18:03,023 epoch 4 - iter 9/32 - loss 0.20134250 - samples/sec: 15.13 - lr: 0.020000\n",
+      "2021-09-21 19:18:03,218 epoch 4 - iter 12/32 - loss 0.15647200 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 19:18:03,406 epoch 4 - iter 15/32 - loss 0.22300991 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 19:18:03,621 epoch 4 - iter 18/32 - loss 0.23197866 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 19:18:03,840 epoch 4 - iter 21/32 - loss 0.23021234 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 19:18:04,032 epoch 4 - iter 24/32 - loss 0.23545912 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 19:18:04,260 epoch 4 - iter 27/32 - loss 0.26552635 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 19:18:04,461 epoch 4 - iter 30/32 - loss 0.29254860 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 19:18:04,611 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:04,611 EPOCH 4 done: loss 0.3071 - lr 0.0200000\n",
+      "2021-09-21 19:18:04,754 DEV : loss 0.7651877999305725 - score 0.5\n",
+      "2021-09-21 19:18:04,755 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:18:04,757 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:04,999 epoch 5 - iter 3/32 - loss 0.21788548 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 19:18:05,207 epoch 5 - iter 6/32 - loss 0.13459196 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 19:18:05,390 epoch 5 - iter 9/32 - loss 0.12416315 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 19:18:05,600 epoch 5 - iter 12/32 - loss 0.09718212 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 19:18:05,792 epoch 5 - iter 15/32 - loss 0.16392352 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 19:18:05,993 epoch 5 - iter 18/32 - loss 0.13907079 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 19:18:06,187 epoch 5 - iter 21/32 - loss 0.11972595 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 19:18:06,385 epoch 5 - iter 24/32 - loss 0.10696532 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:18:06,603 epoch 5 - iter 27/32 - loss 0.09550570 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 19:18:06,794 epoch 5 - iter 30/32 - loss 0.08698165 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 19:18:06,917 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:06,917 EPOCH 5 done: loss 0.0822 - lr 0.0200000\n",
+      "2021-09-21 19:18:07,063 DEV : loss 0.9024463891983032 - score 0.5\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:18:07,063 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:18:07,066 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:07,291 epoch 6 - iter 3/32 - loss 0.03025924 - samples/sec: 14.72 - lr: 0.010000\n",
+      "2021-09-21 19:18:07,469 epoch 6 - iter 6/32 - loss 0.02169499 - samples/sec: 16.97 - lr: 0.010000\n",
+      "2021-09-21 19:18:07,655 epoch 6 - iter 9/32 - loss 0.02435240 - samples/sec: 16.12 - lr: 0.010000\n",
+      "2021-09-21 19:18:07,888 epoch 6 - iter 12/32 - loss 0.01909418 - samples/sec: 12.93 - lr: 0.010000\n",
+      "2021-09-21 19:18:08,051 epoch 6 - iter 15/32 - loss 0.01615496 - samples/sec: 18.48 - lr: 0.010000\n",
+      "2021-09-21 19:18:08,256 epoch 6 - iter 18/32 - loss 0.01470449 - samples/sec: 14.69 - lr: 0.010000\n",
+      "2021-09-21 19:18:08,431 epoch 6 - iter 21/32 - loss 0.01282595 - samples/sec: 17.15 - lr: 0.010000\n",
+      "2021-09-21 19:18:08,652 epoch 6 - iter 24/32 - loss 0.01151195 - samples/sec: 13.63 - lr: 0.010000\n",
+      "2021-09-21 19:18:08,884 epoch 6 - iter 27/32 - loss 0.07073254 - samples/sec: 12.95 - lr: 0.010000\n",
+      "2021-09-21 19:18:09,092 epoch 6 - iter 30/32 - loss 0.09261719 - samples/sec: 14.48 - lr: 0.010000\n",
+      "2021-09-21 19:18:09,227 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:58,090 epoch 6 - iter 30/32 - loss 0.19681522 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 10:52:58,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:58,198 EPOCH 6 done: loss 0.1861 - lr 0.0200000\n",
-      "2021-09-08 10:52:58,362 DEV : loss 0.03242535889148712 - score 1.0\n",
-      "2021-09-08 10:52:58,363 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:53:03,334 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:03,506 epoch 7 - iter 3/32 - loss 0.00117657 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 10:53:03,673 epoch 7 - iter 6/32 - loss 0.24386965 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 10:53:03,827 epoch 7 - iter 9/32 - loss 0.16402240 - samples/sec: 19.54 - lr: 0.020000\n",
-      "2021-09-08 10:53:03,984 epoch 7 - iter 12/32 - loss 0.12842586 - samples/sec: 19.12 - lr: 0.020000\n",
-      "2021-09-08 10:53:04,141 epoch 7 - iter 15/32 - loss 0.10781068 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 10:53:04,293 epoch 7 - iter 18/32 - loss 0.08993363 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 10:53:04,451 epoch 7 - iter 21/32 - loss 0.08000453 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 10:53:04,604 epoch 7 - iter 24/32 - loss 0.07058474 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 10:53:04,757 epoch 7 - iter 27/32 - loss 0.06311333 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 10:53:04,913 epoch 7 - iter 30/32 - loss 0.05696314 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 10:53:05,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:05,016 EPOCH 7 done: loss 0.0535 - lr 0.0200000\n",
-      "2021-09-08 10:53:05,413 DEV : loss 0.0434693917632103 - score 1.0\n",
-      "2021-09-08 10:53:05,414 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:05,470 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:05,638 epoch 8 - iter 3/32 - loss 0.00133791 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 10:53:05,790 epoch 8 - iter 6/32 - loss 0.00140388 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 10:53:05,920 epoch 8 - iter 9/32 - loss 0.00159949 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,052 epoch 8 - iter 12/32 - loss 0.00137020 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,186 epoch 8 - iter 15/32 - loss 0.00317739 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,316 epoch 8 - iter 18/32 - loss 0.00379921 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,446 epoch 8 - iter 21/32 - loss 0.00356002 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,579 epoch 8 - iter 24/32 - loss 0.04035147 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,709 epoch 8 - iter 27/32 - loss 0.03607007 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,839 epoch 8 - iter 30/32 - loss 0.03307016 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 10:53:06,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:06,927 EPOCH 8 done: loss 0.0312 - lr 0.0200000\n",
-      "2021-09-08 10:53:07,700 DEV : loss 0.006393031217157841 - score 1.0\n",
-      "2021-09-08 10:53:07,701 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:53:14,512 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:14,657 epoch 9 - iter 3/32 - loss 0.00108534 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 10:53:14,788 epoch 9 - iter 6/32 - loss 0.00072720 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 10:53:14,919 epoch 9 - iter 9/32 - loss 0.00121089 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,049 epoch 9 - iter 12/32 - loss 0.00118496 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,179 epoch 9 - iter 15/32 - loss 0.00106902 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,310 epoch 9 - iter 18/32 - loss 0.00100235 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,439 epoch 9 - iter 21/32 - loss 0.00109650 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,569 epoch 9 - iter 24/32 - loss 0.00108508 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,727 epoch 9 - iter 27/32 - loss 0.00099792 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,889 epoch 9 - iter 30/32 - loss 0.00105814 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 10:53:15,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:15,995 EPOCH 9 done: loss 0.0010 - lr 0.0200000\n",
-      "2021-09-08 10:53:19,286 DEV : loss 0.0036298269405961037 - score 1.0\n",
-      "2021-09-08 10:53:19,287 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:53:30,196 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:30,343 epoch 10 - iter 3/32 - loss 0.00024838 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,476 epoch 10 - iter 6/32 - loss 0.00034732 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,642 epoch 10 - iter 9/32 - loss 0.00091479 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,808 epoch 10 - iter 12/32 - loss 0.00083344 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 10:53:30,952 epoch 10 - iter 15/32 - loss 0.00078461 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 10:53:31,082 epoch 10 - iter 18/32 - loss 0.00074292 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 10:53:31,212 epoch 10 - iter 21/32 - loss 0.00077499 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 10:53:31,341 epoch 10 - iter 24/32 - loss 0.00084729 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 10:53:31,471 epoch 10 - iter 27/32 - loss 0.00079984 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 10:53:31,600 epoch 10 - iter 30/32 - loss 0.00077832 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 10:53:31,687 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:31,687 EPOCH 10 done: loss 0.0007 - lr 0.0200000\n",
-      "2021-09-08 10:53:34,210 DEV : loss 0.01017171423882246 - score 1.0\n",
-      "2021-09-08 10:53:34,211 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:42,859 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:42,860 Testing using best model ...\n",
-      "2021-09-08 10:53:42,900 loading file None1/best-model.pt\n",
+      "2021-09-21 19:18:09,228 EPOCH 6 done: loss 0.0869 - lr 0.0100000\n",
+      "2021-09-21 19:18:09,374 DEV : loss 0.8058503270149231 - score 0.5\n",
+      "2021-09-21 19:18:09,375 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:18:09,376 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:09,639 epoch 7 - iter 3/32 - loss 0.00250834 - samples/sec: 13.81 - lr: 0.010000\n",
+      "2021-09-21 19:18:09,859 epoch 7 - iter 6/32 - loss 0.00652771 - samples/sec: 13.69 - lr: 0.010000\n",
+      "2021-09-21 19:18:10,036 epoch 7 - iter 9/32 - loss 0.00503317 - samples/sec: 16.99 - lr: 0.010000\n",
+      "2021-09-21 19:18:10,244 epoch 7 - iter 12/32 - loss 0.03008123 - samples/sec: 14.45 - lr: 0.010000\n",
+      "2021-09-21 19:18:10,438 epoch 7 - iter 15/32 - loss 0.02430288 - samples/sec: 15.56 - lr: 0.010000\n",
+      "2021-09-21 19:18:10,613 epoch 7 - iter 18/32 - loss 0.02086898 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 19:18:10,814 epoch 7 - iter 21/32 - loss 0.01816964 - samples/sec: 15.00 - lr: 0.010000\n",
+      "2021-09-21 19:18:11,014 epoch 7 - iter 24/32 - loss 0.01711728 - samples/sec: 15.00 - lr: 0.010000\n",
+      "2021-09-21 19:18:11,203 epoch 7 - iter 27/32 - loss 0.05487897 - samples/sec: 15.91 - lr: 0.010000\n",
+      "2021-09-21 19:18:11,387 epoch 7 - iter 30/32 - loss 0.04985561 - samples/sec: 16.40 - lr: 0.010000\n",
+      "2021-09-21 19:18:11,507 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:11,508 EPOCH 7 done: loss 0.0468 - lr 0.0100000\n",
+      "2021-09-21 19:18:11,656 DEV : loss 0.7894474267959595 - score 0.5\n",
+      "2021-09-21 19:18:11,656 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:18:11,658 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:11,863 epoch 8 - iter 3/32 - loss 0.00278679 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 19:18:12,042 epoch 8 - iter 6/32 - loss 0.00214201 - samples/sec: 16.77 - lr: 0.010000\n",
+      "2021-09-21 19:18:12,254 epoch 8 - iter 9/32 - loss 0.00414838 - samples/sec: 14.23 - lr: 0.010000\n",
+      "2021-09-21 19:18:12,455 epoch 8 - iter 12/32 - loss 0.09222767 - samples/sec: 14.95 - lr: 0.010000\n",
+      "2021-09-21 19:18:12,640 epoch 8 - iter 15/32 - loss 0.08011234 - samples/sec: 16.28 - lr: 0.010000\n",
+      "2021-09-21 19:18:12,840 epoch 8 - iter 18/32 - loss 0.06700494 - samples/sec: 14.99 - lr: 0.010000\n",
+      "2021-09-21 19:18:13,051 epoch 8 - iter 21/32 - loss 0.05744398 - samples/sec: 14.25 - lr: 0.010000\n",
+      "2021-09-21 19:18:13,248 epoch 8 - iter 24/32 - loss 0.05043041 - samples/sec: 15.30 - lr: 0.010000\n",
+      "2021-09-21 19:18:13,464 epoch 8 - iter 27/32 - loss 0.04509215 - samples/sec: 13.94 - lr: 0.010000\n",
+      "2021-09-21 19:18:13,649 epoch 8 - iter 30/32 - loss 0.04077739 - samples/sec: 16.19 - lr: 0.010000\n",
+      "2021-09-21 19:18:13,782 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:13,782 EPOCH 8 done: loss 0.0392 - lr 0.0100000\n",
+      "2021-09-21 19:18:13,926 DEV : loss 0.8120271563529968 - score 0.5\n",
+      "2021-09-21 19:18:13,927 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:18:13,930 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:14,151 epoch 9 - iter 3/32 - loss 0.00079226 - samples/sec: 15.04 - lr: 0.010000\n",
+      "2021-09-21 19:18:14,337 epoch 9 - iter 6/32 - loss 0.00092482 - samples/sec: 16.17 - lr: 0.010000\n",
+      "2021-09-21 19:18:14,540 epoch 9 - iter 9/32 - loss 0.00120737 - samples/sec: 14.85 - lr: 0.010000\n",
+      "2021-09-21 19:18:14,730 epoch 9 - iter 12/32 - loss 0.00104311 - samples/sec: 15.84 - lr: 0.010000\n",
+      "2021-09-21 19:18:14,939 epoch 9 - iter 15/32 - loss 0.00187427 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 19:18:15,114 epoch 9 - iter 18/32 - loss 0.00173242 - samples/sec: 17.22 - lr: 0.010000\n",
+      "2021-09-21 19:18:15,336 epoch 9 - iter 21/32 - loss 0.00786053 - samples/sec: 13.53 - lr: 0.010000\n",
+      "2021-09-21 19:18:15,568 epoch 9 - iter 24/32 - loss 0.00783749 - samples/sec: 12.93 - lr: 0.010000\n",
+      "2021-09-21 19:18:15,776 epoch 9 - iter 27/32 - loss 0.00720196 - samples/sec: 14.50 - lr: 0.010000\n",
+      "2021-09-21 19:18:15,981 epoch 9 - iter 30/32 - loss 0.00658546 - samples/sec: 14.67 - lr: 0.010000\n",
+      "2021-09-21 19:18:16,098 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:16,098 EPOCH 9 done: loss 0.0062 - lr 0.0100000\n",
+      "2021-09-21 19:18:16,251 DEV : loss 0.9216402173042297 - score 0.5\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:18:16,252 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:18:16,254 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:16,489 epoch 10 - iter 3/32 - loss 0.00054050 - samples/sec: 14.54 - lr: 0.005000\n",
+      "2021-09-21 19:18:16,698 epoch 10 - iter 6/32 - loss 0.00341866 - samples/sec: 14.41 - lr: 0.005000\n",
+      "2021-09-21 19:18:16,886 epoch 10 - iter 9/32 - loss 0.00249923 - samples/sec: 15.94 - lr: 0.005000\n",
+      "2021-09-21 19:18:17,055 epoch 10 - iter 12/32 - loss 0.00227377 - samples/sec: 17.90 - lr: 0.005000\n",
+      "2021-09-21 19:18:17,250 epoch 10 - iter 15/32 - loss 0.00206237 - samples/sec: 15.42 - lr: 0.005000\n",
+      "2021-09-21 19:18:17,439 epoch 10 - iter 18/32 - loss 0.00179410 - samples/sec: 15.89 - lr: 0.005000\n",
+      "2021-09-21 19:18:17,621 epoch 10 - iter 21/32 - loss 0.00177587 - samples/sec: 16.52 - lr: 0.005000\n",
+      "2021-09-21 19:18:17,809 epoch 10 - iter 24/32 - loss 0.00175715 - samples/sec: 16.04 - lr: 0.005000\n",
+      "2021-09-21 19:18:18,017 epoch 10 - iter 27/32 - loss 0.02079257 - samples/sec: 14.40 - lr: 0.005000\n",
+      "2021-09-21 19:18:18,196 epoch 10 - iter 30/32 - loss 0.01879088 - samples/sec: 16.80 - lr: 0.005000\n",
+      "2021-09-21 19:18:18,312 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:18,312 EPOCH 10 done: loss 0.0177 - lr 0.0050000\n",
+      "2021-09-21 19:18:18,442 DEV : loss 0.8285306692123413 - score 0.5\n",
+      "2021-09-21 19:18:18,442 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:18:22,441 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:22,442 Testing using best model ...\n",
+      "2021-09-21 19:18:22,444 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:53:53,116 \t0.5\n",
-      "2021-09-08 10:53:53,116 \n",
+      "2021-09-21 19:18:27,578 \t1.0\n",
+      "2021-09-21 19:18:27,578 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.2917\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 1.0\n",
+      "- F-score (macro) 0.5\n",
+      "- Accuracy 1.0\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     0.5000    0.5000    0.5000         2\n",
+      " this text expresses sadness     1.0000    1.0000    1.0000         2\n",
       "this text expresses optimism     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      "     this text expresses joy     1.0000    0.5000    0.6667         2\n",
+      "   this text expresses anger     1.0000    1.0000    1.0000         2\n",
+      "     this text expresses joy     0.0000    0.0000    0.0000         0\n",
       "\n",
-      "                   micro avg     0.5000    0.5000    0.5000         4\n",
-      "                   macro avg     0.3750    0.2500    0.2917         4\n",
-      "                weighted avg     0.7500    0.5000    0.5833         4\n",
-      "                 samples avg     0.5000    0.5000    0.5000         4\n",
+      "                   micro avg     1.0000    1.0000    1.0000         4\n",
+      "                   macro avg     0.5000    0.5000    0.5000         4\n",
+      "                weighted avg     1.0000    1.0000    1.0000         4\n",
+      "                 samples avg     1.0000    1.0000    1.0000         4\n",
       "\n",
-      "2021-09-08 10:53:53,117 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:03,189 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:18:27,579 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:41,270 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:54:07,426 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:18:45,353 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 35494.82it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 41199.17it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:07,429 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 10:54:07,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:07,593 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:18:45,356 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 19:18:45,365 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:45,367 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3836,24 +3838,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:07,594 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:07,594 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 10:54:07,595 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:07,595 Parameters:\n",
-      "2021-09-08 10:54:07,595  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:54:07,596  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:54:07,596  - patience: \"3\"\n",
-      "2021-09-08 10:54:07,597  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:54:07,597  - max_epochs: \"10\"\n",
-      "2021-09-08 10:54:07,597  - shuffle: \"True\"\n",
-      "2021-09-08 10:54:07,598  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:54:07,598  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:54:07,598 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:07,599 Model training base path: \"None1\"\n",
-      "2021-09-08 10:54:07,599 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:07,600 Device: cuda:1\n",
-      "2021-09-08 10:54:07,600 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:07,600 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:18:45,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:45,368 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 19:18:45,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:45,368 Parameters:\n",
+      "2021-09-21 19:18:45,368  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:18:45,369  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:18:45,369  - patience: \"3\"\n",
+      "2021-09-21 19:18:45,369  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:18:45,369  - max_epochs: \"10\"\n",
+      "2021-09-21 19:18:45,370  - shuffle: \"True\"\n",
+      "2021-09-21 19:18:45,370  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:18:45,370  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:18:45,371 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:45,371 Model training base path: \"None1\"\n",
+      "2021-09-21 19:18:45,371 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:45,371 Device: cuda:0\n",
+      "2021-09-21 19:18:45,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:45,372 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:18:45,378 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -3867,209 +3870,208 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:08,387 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:08,580 epoch 1 - iter 3/32 - loss 0.45366116 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 10:54:08,748 epoch 1 - iter 6/32 - loss 0.23898592 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 10:54:08,915 epoch 1 - iter 9/32 - loss 0.33235497 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 10:54:09,096 epoch 1 - iter 12/32 - loss 0.72011266 - samples/sec: 16.58 - lr: 0.020000\n",
-      "2021-09-08 10:54:09,273 epoch 1 - iter 15/32 - loss 0.66461182 - samples/sec: 17.03 - lr: 0.020000\n",
-      "2021-09-08 10:54:09,448 epoch 1 - iter 18/32 - loss 0.76851268 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 10:54:09,627 epoch 1 - iter 21/32 - loss 0.74361744 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 10:54:09,803 epoch 1 - iter 24/32 - loss 0.65873843 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 10:54:09,984 epoch 1 - iter 27/32 - loss 0.68793039 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 10:54:10,163 epoch 1 - iter 30/32 - loss 0.69777314 - samples/sec: 16.80 - lr: 0.020000\n",
-      "2021-09-08 10:54:10,274 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:10,275 EPOCH 1 done: loss 0.6714 - lr 0.0200000\n",
-      "2021-09-08 10:54:10,669 DEV : loss 0.5544171929359436 - score 0.75\n",
-      "2021-09-08 10:54:10,670 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:54:17,032 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:17,224 epoch 2 - iter 3/32 - loss 0.71958173 - samples/sec: 17.36 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,394 epoch 2 - iter 6/32 - loss 0.85092039 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,570 epoch 2 - iter 9/32 - loss 0.78299229 - samples/sec: 17.13 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,745 epoch 2 - iter 12/32 - loss 0.70242218 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,919 epoch 2 - iter 15/32 - loss 0.57304815 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 10:54:18,084 epoch 2 - iter 18/32 - loss 0.49074987 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 10:54:18,250 epoch 2 - iter 21/32 - loss 0.48260444 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 10:54:18,399 epoch 2 - iter 24/32 - loss 0.47720895 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 10:54:18,553 epoch 2 - iter 27/32 - loss 0.47238346 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 10:54:18,705 epoch 2 - iter 30/32 - loss 0.47092662 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 10:54:18,809 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:18,810 EPOCH 2 done: loss 0.4914 - lr 0.0200000\n",
-      "2021-09-08 10:54:19,150 DEV : loss 0.6848074793815613 - score 0.75\n",
-      "2021-09-08 10:54:19,151 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:19,227 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:19,391 epoch 3 - iter 3/32 - loss 0.43826695 - samples/sec: 20.58 - lr: 0.020000\n",
-      "2021-09-08 10:54:19,538 epoch 3 - iter 6/32 - loss 0.36218479 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 10:54:19,693 epoch 3 - iter 9/32 - loss 0.41878166 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 10:54:19,843 epoch 3 - iter 12/32 - loss 0.33788366 - samples/sec: 20.07 - lr: 0.020000\n",
-      "2021-09-08 10:54:19,993 epoch 3 - iter 15/32 - loss 0.33242269 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 10:54:20,146 epoch 3 - iter 18/32 - loss 0.41674723 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 10:54:20,307 epoch 3 - iter 21/32 - loss 0.42180631 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 10:54:20,456 epoch 3 - iter 24/32 - loss 0.37703840 - samples/sec: 20.26 - lr: 0.020000\n",
-      "2021-09-08 10:54:20,605 epoch 3 - iter 27/32 - loss 0.34568089 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 10:54:20,757 epoch 3 - iter 30/32 - loss 0.40743205 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 10:54:20,861 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:20,862 EPOCH 3 done: loss 0.4067 - lr 0.0200000\n",
-      "2021-09-08 10:54:21,037 DEV : loss 0.7385416030883789 - score 0.75\n",
-      "2021-09-08 10:54:21,038 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:21,114 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:21,292 epoch 4 - iter 3/32 - loss 0.21759888 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 10:54:21,438 epoch 4 - iter 6/32 - loss 0.10943134 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 10:54:21,594 epoch 4 - iter 9/32 - loss 0.08534580 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 10:54:21,749 epoch 4 - iter 12/32 - loss 0.16536626 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:54:21,896 epoch 4 - iter 15/32 - loss 0.13346128 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,048 epoch 4 - iter 18/32 - loss 0.12144044 - samples/sec: 19.86 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,198 epoch 4 - iter 21/32 - loss 0.10458155 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,354 epoch 4 - iter 24/32 - loss 0.19640959 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,505 epoch 4 - iter 27/32 - loss 0.18379923 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,662 epoch 4 - iter 30/32 - loss 0.17162743 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,779 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:22,780 EPOCH 4 done: loss 0.1824 - lr 0.0200000\n",
-      "2021-09-08 10:54:22,862 DEV : loss 0.4083033502101898 - score 0.75\n",
-      "2021-09-08 10:54:22,864 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:18:45,575 epoch 1 - iter 3/32 - loss 0.37283859 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 19:18:45,749 epoch 1 - iter 6/32 - loss 0.28554675 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 19:18:46,016 epoch 1 - iter 9/32 - loss 0.19943834 - samples/sec: 11.28 - lr: 0.020000\n",
+      "2021-09-21 19:18:46,176 epoch 1 - iter 12/32 - loss 0.63004509 - samples/sec: 18.87 - lr: 0.020000\n",
+      "2021-09-21 19:18:46,350 epoch 1 - iter 15/32 - loss 0.72151170 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 19:18:46,503 epoch 1 - iter 18/32 - loss 0.65158495 - samples/sec: 19.81 - lr: 0.020000\n",
+      "2021-09-21 19:18:46,674 epoch 1 - iter 21/32 - loss 0.57820247 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 19:18:46,826 epoch 1 - iter 24/32 - loss 0.54872230 - samples/sec: 19.89 - lr: 0.020000\n",
+      "2021-09-21 19:18:46,982 epoch 1 - iter 27/32 - loss 0.51172417 - samples/sec: 19.30 - lr: 0.020000\n",
+      "2021-09-21 19:18:47,143 epoch 1 - iter 30/32 - loss 0.52654050 - samples/sec: 18.65 - lr: 0.020000\n",
+      "2021-09-21 19:18:47,259 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:47,260 EPOCH 1 done: loss 0.5102 - lr 0.0200000\n",
+      "2021-09-21 19:18:47,340 DEV : loss 0.5302480459213257 - score 0.75\n",
+      "2021-09-21 19:18:47,341 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:54:31,238 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:31,405 epoch 5 - iter 3/32 - loss 0.00189259 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,555 epoch 5 - iter 6/32 - loss 0.04149177 - samples/sec: 20.13 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,705 epoch 5 - iter 9/32 - loss 0.03900183 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,856 epoch 5 - iter 12/32 - loss 0.09937055 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,010 epoch 5 - iter 15/32 - loss 0.08035029 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,168 epoch 5 - iter 18/32 - loss 0.12426677 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,318 epoch 5 - iter 21/32 - loss 0.18375737 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,476 epoch 5 - iter 24/32 - loss 0.16922005 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,623 epoch 5 - iter 27/32 - loss 0.15280144 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,776 epoch 5 - iter 30/32 - loss 0.15530896 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:32,874 EPOCH 5 done: loss 0.1457 - lr 0.0200000\n",
-      "2021-09-08 10:54:34,223 DEV : loss 0.7897307872772217 - score 0.75\n",
-      "2021-09-08 10:54:34,224 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:34,241 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:34,389 epoch 6 - iter 3/32 - loss 0.02789472 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,523 epoch 6 - iter 6/32 - loss 0.01689189 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,662 epoch 6 - iter 9/32 - loss 0.03252800 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,796 epoch 6 - iter 12/32 - loss 0.05344204 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,933 epoch 6 - iter 15/32 - loss 0.06231067 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 10:54:35,065 epoch 6 - iter 18/32 - loss 0.05212948 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 10:54:35,196 epoch 6 - iter 21/32 - loss 0.04505487 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 10:54:35,337 epoch 6 - iter 24/32 - loss 0.05022633 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 10:54:35,480 epoch 6 - iter 27/32 - loss 0.04479383 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 10:54:35,612 epoch 6 - iter 30/32 - loss 0.04095469 - samples/sec: 23.05 - lr: 0.020000\n"
+      "2021-09-21 19:18:51,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:51,511 epoch 2 - iter 3/32 - loss 0.68589846 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:18:51,705 epoch 2 - iter 6/32 - loss 0.67316009 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 19:18:51,897 epoch 2 - iter 9/32 - loss 0.58293671 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 19:18:52,095 epoch 2 - iter 12/32 - loss 0.65933437 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:18:52,298 epoch 2 - iter 15/32 - loss 0.55183983 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 19:18:52,490 epoch 2 - iter 18/32 - loss 0.52013588 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 19:18:52,682 epoch 2 - iter 21/32 - loss 0.50883706 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 19:18:52,872 epoch 2 - iter 24/32 - loss 0.56560258 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 19:18:53,066 epoch 2 - iter 27/32 - loss 0.51156925 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 19:18:53,261 epoch 2 - iter 30/32 - loss 0.50304399 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 19:18:53,383 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:53,384 EPOCH 2 done: loss 0.5237 - lr 0.0200000\n",
+      "2021-09-21 19:18:53,591 DEV : loss 0.8483306169509888 - score 0.25\n",
+      "2021-09-21 19:18:53,592 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:18:53,667 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:53,905 epoch 3 - iter 3/32 - loss 0.24191380 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 19:18:54,091 epoch 3 - iter 6/32 - loss 0.30116344 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 19:18:54,287 epoch 3 - iter 9/32 - loss 0.26468068 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 19:18:54,486 epoch 3 - iter 12/32 - loss 0.32807229 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 19:18:54,678 epoch 3 - iter 15/32 - loss 0.27821854 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 19:18:54,875 epoch 3 - iter 18/32 - loss 0.29899761 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 19:18:55,063 epoch 3 - iter 21/32 - loss 0.27889291 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 19:18:55,247 epoch 3 - iter 24/32 - loss 0.32229888 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 19:18:55,486 epoch 3 - iter 27/32 - loss 0.29391023 - samples/sec: 12.57 - lr: 0.020000\n",
+      "2021-09-21 19:18:55,742 epoch 3 - iter 30/32 - loss 0.27843647 - samples/sec: 11.73 - lr: 0.020000\n",
+      "2021-09-21 19:18:55,916 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:55,916 EPOCH 3 done: loss 0.2623 - lr 0.0200000\n",
+      "2021-09-21 19:18:56,104 DEV : loss 1.0329084396362305 - score 0.5\n",
+      "2021-09-21 19:18:56,104 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:18:56,107 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:56,415 epoch 4 - iter 3/32 - loss 0.00695283 - samples/sec: 10.74 - lr: 0.020000\n",
+      "2021-09-21 19:18:56,687 epoch 4 - iter 6/32 - loss 0.01813346 - samples/sec: 11.02 - lr: 0.020000\n",
+      "2021-09-21 19:18:56,911 epoch 4 - iter 9/32 - loss 0.16214314 - samples/sec: 13.43 - lr: 0.020000\n",
+      "2021-09-21 19:18:57,145 epoch 4 - iter 12/32 - loss 0.17601671 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 19:18:57,388 epoch 4 - iter 15/32 - loss 0.14257774 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 19:18:57,635 epoch 4 - iter 18/32 - loss 0.11985630 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 19:18:57,893 epoch 4 - iter 21/32 - loss 0.11850827 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 19:18:58,141 epoch 4 - iter 24/32 - loss 0.10491440 - samples/sec: 12.13 - lr: 0.020000\n",
+      "2021-09-21 19:18:58,390 epoch 4 - iter 27/32 - loss 0.09386733 - samples/sec: 12.09 - lr: 0.020000\n",
+      "2021-09-21 19:18:58,664 epoch 4 - iter 30/32 - loss 0.08677364 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 19:18:58,838 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:58,839 EPOCH 4 done: loss 0.1170 - lr 0.0200000\n",
+      "2021-09-21 19:18:59,018 DEV : loss 0.910800039768219 - score 0.5\n",
+      "2021-09-21 19:18:59,018 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:18:59,020 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:18:59,332 epoch 5 - iter 3/32 - loss 0.00554573 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 19:18:59,588 epoch 5 - iter 6/32 - loss 0.03582997 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 19:18:59,806 epoch 5 - iter 9/32 - loss 0.02645914 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 19:19:00,032 epoch 5 - iter 12/32 - loss 0.02115686 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 19:19:00,227 epoch 5 - iter 15/32 - loss 0.02899944 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 19:19:00,444 epoch 5 - iter 18/32 - loss 0.08587512 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 19:19:00,679 epoch 5 - iter 21/32 - loss 0.07572840 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 19:19:00,902 epoch 5 - iter 24/32 - loss 0.07801695 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 19:19:01,148 epoch 5 - iter 27/32 - loss 0.13491603 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 19:19:01,343 epoch 5 - iter 30/32 - loss 0.16817579 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 19:19:01,467 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:01,467 EPOCH 5 done: loss 0.1577 - lr 0.0200000\n",
+      "2021-09-21 19:19:01,580 DEV : loss 0.780403733253479 - score 0.5\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:19:01,581 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:19:01,583 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:01,780 epoch 6 - iter 3/32 - loss 0.00348203 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 19:19:01,964 epoch 6 - iter 6/32 - loss 0.00298348 - samples/sec: 16.34 - lr: 0.010000\n",
+      "2021-09-21 19:19:02,144 epoch 6 - iter 9/32 - loss 0.00224070 - samples/sec: 16.70 - lr: 0.010000\n",
+      "2021-09-21 19:19:02,351 epoch 6 - iter 12/32 - loss 0.01034799 - samples/sec: 14.55 - lr: 0.010000\n",
+      "2021-09-21 19:19:02,545 epoch 6 - iter 15/32 - loss 0.00857616 - samples/sec: 15.54 - lr: 0.010000\n",
+      "2021-09-21 19:19:02,723 epoch 6 - iter 18/32 - loss 0.01373534 - samples/sec: 16.90 - lr: 0.010000\n",
+      "2021-09-21 19:19:02,917 epoch 6 - iter 21/32 - loss 0.01346127 - samples/sec: 15.51 - lr: 0.010000\n",
+      "2021-09-21 19:19:03,102 epoch 6 - iter 24/32 - loss 0.01275709 - samples/sec: 16.36 - lr: 0.010000\n",
+      "2021-09-21 19:19:03,283 epoch 6 - iter 27/32 - loss 0.01197034 - samples/sec: 16.64 - lr: 0.010000\n",
+      "2021-09-21 19:19:03,485 epoch 6 - iter 30/32 - loss 0.04901557 - samples/sec: 14.86 - lr: 0.010000\n",
+      "2021-09-21 19:19:03,605 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:35,700 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:35,701 EPOCH 6 done: loss 0.0385 - lr 0.0200000\n",
-      "2021-09-08 10:54:35,912 DEV : loss 0.9334942698478699 - score 0.5\n",
-      "2021-09-08 10:54:35,912 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:36,014 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:36,159 epoch 7 - iter 3/32 - loss 0.00145146 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 10:54:36,292 epoch 7 - iter 6/32 - loss 0.22385137 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 10:54:36,422 epoch 7 - iter 9/32 - loss 0.14965755 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 10:54:36,551 epoch 7 - iter 12/32 - loss 0.11399983 - samples/sec: 23.31 - lr: 0.020000\n",
-      "2021-09-08 10:54:36,684 epoch 7 - iter 15/32 - loss 0.09453973 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 10:54:36,816 epoch 7 - iter 18/32 - loss 0.08298014 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 10:54:36,952 epoch 7 - iter 21/32 - loss 0.07733376 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 10:54:37,090 epoch 7 - iter 24/32 - loss 0.07782428 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 10:54:37,223 epoch 7 - iter 27/32 - loss 0.17396845 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 10:54:37,358 epoch 7 - iter 30/32 - loss 0.15760867 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 10:54:37,446 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:37,447 EPOCH 7 done: loss 0.1483 - lr 0.0200000\n",
-      "2021-09-08 10:54:37,619 DEV : loss 0.7847719788551331 - score 0.75\n",
-      "2021-09-08 10:54:37,620 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:54:37,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:37,842 epoch 8 - iter 3/32 - loss 0.01844538 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 10:54:37,975 epoch 8 - iter 6/32 - loss 0.01265700 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 10:54:38,104 epoch 8 - iter 9/32 - loss 0.00872698 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 10:54:38,234 epoch 8 - iter 12/32 - loss 0.00744390 - samples/sec: 23.12 - lr: 0.020000\n",
-      "2021-09-08 10:54:38,364 epoch 8 - iter 15/32 - loss 0.00649238 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 10:54:38,497 epoch 8 - iter 18/32 - loss 0.00725565 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 10:54:38,632 epoch 8 - iter 21/32 - loss 0.06496209 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 10:54:38,765 epoch 8 - iter 24/32 - loss 0.05809495 - samples/sec: 22.78 - lr: 0.020000\n",
-      "2021-09-08 10:54:38,899 epoch 8 - iter 27/32 - loss 0.05169557 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 10:54:39,032 epoch 8 - iter 30/32 - loss 0.04656638 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 10:54:39,121 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:39,121 EPOCH 8 done: loss 0.0437 - lr 0.0200000\n",
-      "2021-09-08 10:54:39,306 DEV : loss 0.6014853715896606 - score 0.75\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:54:39,307 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:54:39,413 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:39,561 epoch 9 - iter 3/32 - loss 0.00238883 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 10:54:39,711 epoch 9 - iter 6/32 - loss 0.00170011 - samples/sec: 20.20 - lr: 0.010000\n",
-      "2021-09-08 10:54:39,843 epoch 9 - iter 9/32 - loss 0.00125979 - samples/sec: 22.78 - lr: 0.010000\n",
-      "2021-09-08 10:54:39,978 epoch 9 - iter 12/32 - loss 0.04778161 - samples/sec: 22.35 - lr: 0.010000\n",
-      "2021-09-08 10:54:40,112 epoch 9 - iter 15/32 - loss 0.03838085 - samples/sec: 22.57 - lr: 0.010000\n",
-      "2021-09-08 10:54:40,245 epoch 9 - iter 18/32 - loss 0.06816384 - samples/sec: 22.70 - lr: 0.010000\n",
-      "2021-09-08 10:54:40,375 epoch 9 - iter 21/32 - loss 0.05872175 - samples/sec: 23.21 - lr: 0.010000\n",
-      "2021-09-08 10:54:40,506 epoch 9 - iter 24/32 - loss 0.05167732 - samples/sec: 23.17 - lr: 0.010000\n",
-      "2021-09-08 10:54:40,635 epoch 9 - iter 27/32 - loss 0.04667127 - samples/sec: 23.33 - lr: 0.010000\n",
-      "2021-09-08 10:54:40,765 epoch 9 - iter 30/32 - loss 0.04246405 - samples/sec: 23.28 - lr: 0.010000\n",
-      "2021-09-08 10:54:40,853 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:40,854 EPOCH 9 done: loss 0.0399 - lr 0.0100000\n",
-      "2021-09-08 10:54:44,478 DEV : loss 0.7788462042808533 - score 0.5\n",
-      "2021-09-08 10:54:44,479 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:44,493 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,674 epoch 10 - iter 3/32 - loss 0.00029735 - samples/sec: 18.23 - lr: 0.010000\n",
-      "2021-09-08 10:54:44,849 epoch 10 - iter 6/32 - loss 0.00154982 - samples/sec: 17.19 - lr: 0.010000\n",
-      "2021-09-08 10:54:44,992 epoch 10 - iter 9/32 - loss 0.00218577 - samples/sec: 21.14 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,136 epoch 10 - iter 12/32 - loss 0.02216662 - samples/sec: 20.92 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,277 epoch 10 - iter 15/32 - loss 0.01902194 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,419 epoch 10 - iter 18/32 - loss 0.01589329 - samples/sec: 21.20 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,560 epoch 10 - iter 21/32 - loss 0.01370041 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,694 epoch 10 - iter 24/32 - loss 0.01205047 - samples/sec: 22.47 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,824 epoch 10 - iter 27/32 - loss 0.01083825 - samples/sec: 23.27 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,955 epoch 10 - iter 30/32 - loss 0.00989480 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,042 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:46,043 EPOCH 10 done: loss 0.0093 - lr 0.0100000\n",
-      "2021-09-08 10:54:46,205 DEV : loss 1.0104566812515259 - score 0.5\n",
-      "2021-09-08 10:54:46,206 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:57,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:57,269 Testing using best model ...\n",
-      "2021-09-08 10:54:57,271 loading file None1/best-model.pt\n",
+      "2021-09-21 19:19:03,606 EPOCH 6 done: loss 0.0462 - lr 0.0100000\n",
+      "2021-09-21 19:19:03,803 DEV : loss 1.3499435186386108 - score 0.5\n",
+      "2021-09-21 19:19:03,803 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:19:03,891 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:04,102 epoch 7 - iter 3/32 - loss 0.00185135 - samples/sec: 15.82 - lr: 0.010000\n",
+      "2021-09-21 19:19:04,295 epoch 7 - iter 6/32 - loss 0.02102205 - samples/sec: 15.63 - lr: 0.010000\n",
+      "2021-09-21 19:19:04,468 epoch 7 - iter 9/32 - loss 0.03065151 - samples/sec: 17.40 - lr: 0.010000\n",
+      "2021-09-21 19:19:04,655 epoch 7 - iter 12/32 - loss 0.02418067 - samples/sec: 16.17 - lr: 0.010000\n",
+      "2021-09-21 19:19:04,843 epoch 7 - iter 15/32 - loss 0.01986637 - samples/sec: 16.01 - lr: 0.010000\n",
+      "2021-09-21 19:19:05,014 epoch 7 - iter 18/32 - loss 0.01689095 - samples/sec: 17.61 - lr: 0.010000\n",
+      "2021-09-21 19:19:05,196 epoch 7 - iter 21/32 - loss 0.02728953 - samples/sec: 16.51 - lr: 0.010000\n",
+      "2021-09-21 19:19:05,399 epoch 7 - iter 24/32 - loss 0.02454707 - samples/sec: 14.79 - lr: 0.010000\n",
+      "2021-09-21 19:19:05,638 epoch 7 - iter 27/32 - loss 0.02189072 - samples/sec: 12.58 - lr: 0.010000\n",
+      "2021-09-21 19:19:05,867 epoch 7 - iter 30/32 - loss 0.02669442 - samples/sec: 13.14 - lr: 0.010000\n",
+      "2021-09-21 19:19:05,984 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:05,985 EPOCH 7 done: loss 0.0254 - lr 0.0100000\n",
+      "2021-09-21 19:19:06,535 DEV : loss 1.232753872871399 - score 0.5\n",
+      "2021-09-21 19:19:06,536 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:19:06,561 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:06,828 epoch 8 - iter 3/32 - loss 0.00219231 - samples/sec: 12.53 - lr: 0.010000\n",
+      "2021-09-21 19:19:07,049 epoch 8 - iter 6/32 - loss 0.00224313 - samples/sec: 13.64 - lr: 0.010000\n",
+      "2021-09-21 19:19:07,274 epoch 8 - iter 9/32 - loss 0.00763290 - samples/sec: 13.38 - lr: 0.010000\n",
+      "2021-09-21 19:19:07,437 epoch 8 - iter 12/32 - loss 0.00713969 - samples/sec: 18.47 - lr: 0.010000\n",
+      "2021-09-21 19:19:07,657 epoch 8 - iter 15/32 - loss 0.00616849 - samples/sec: 13.67 - lr: 0.010000\n",
+      "2021-09-21 19:19:07,884 epoch 8 - iter 18/32 - loss 0.00531367 - samples/sec: 13.24 - lr: 0.010000\n",
+      "2021-09-21 19:19:08,104 epoch 8 - iter 21/32 - loss 0.00535106 - samples/sec: 13.68 - lr: 0.010000\n",
+      "2021-09-21 19:19:08,348 epoch 8 - iter 24/32 - loss 0.00558147 - samples/sec: 12.34 - lr: 0.010000\n",
+      "2021-09-21 19:19:08,584 epoch 8 - iter 27/32 - loss 0.00507530 - samples/sec: 12.72 - lr: 0.010000\n",
+      "2021-09-21 19:19:08,821 epoch 8 - iter 30/32 - loss 0.00496690 - samples/sec: 12.69 - lr: 0.010000\n",
+      "2021-09-21 19:19:08,980 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:08,981 EPOCH 8 done: loss 0.0047 - lr 0.0100000\n",
+      "2021-09-21 19:19:09,237 DEV : loss 1.353977918624878 - score 0.5\n",
+      "2021-09-21 19:19:09,237 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:19:09,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:09,551 epoch 9 - iter 3/32 - loss 0.00262220 - samples/sec: 13.72 - lr: 0.010000\n",
+      "2021-09-21 19:19:09,772 epoch 9 - iter 6/32 - loss 0.00153977 - samples/sec: 13.66 - lr: 0.010000\n",
+      "2021-09-21 19:19:09,993 epoch 9 - iter 9/32 - loss 0.05822485 - samples/sec: 13.58 - lr: 0.010000\n",
+      "2021-09-21 19:19:10,233 epoch 9 - iter 12/32 - loss 0.04414380 - samples/sec: 12.54 - lr: 0.010000\n",
+      "2021-09-21 19:19:10,458 epoch 9 - iter 15/32 - loss 0.03556066 - samples/sec: 13.37 - lr: 0.010000\n",
+      "2021-09-21 19:19:10,687 epoch 9 - iter 18/32 - loss 0.02989451 - samples/sec: 13.10 - lr: 0.010000\n",
+      "2021-09-21 19:19:10,918 epoch 9 - iter 21/32 - loss 0.02666760 - samples/sec: 13.03 - lr: 0.010000\n",
+      "2021-09-21 19:19:11,165 epoch 9 - iter 24/32 - loss 0.02466169 - samples/sec: 12.15 - lr: 0.010000\n",
+      "2021-09-21 19:19:11,376 epoch 9 - iter 27/32 - loss 0.02207496 - samples/sec: 14.26 - lr: 0.010000\n",
+      "2021-09-21 19:19:11,586 epoch 9 - iter 30/32 - loss 0.02022034 - samples/sec: 14.31 - lr: 0.010000\n",
+      "2021-09-21 19:19:11,754 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:11,755 EPOCH 9 done: loss 0.0190 - lr 0.0100000\n",
+      "2021-09-21 19:19:14,985 DEV : loss 0.997540295124054 - score 0.5\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:19:14,986 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:19:14,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:15,249 epoch 10 - iter 3/32 - loss 0.00135320 - samples/sec: 13.21 - lr: 0.005000\n",
+      "2021-09-21 19:19:15,492 epoch 10 - iter 6/32 - loss 0.00149365 - samples/sec: 12.40 - lr: 0.005000\n",
+      "2021-09-21 19:19:15,697 epoch 10 - iter 9/32 - loss 0.00173350 - samples/sec: 14.66 - lr: 0.005000\n",
+      "2021-09-21 19:19:15,875 epoch 10 - iter 12/32 - loss 0.00165127 - samples/sec: 16.97 - lr: 0.005000\n",
+      "2021-09-21 19:19:16,061 epoch 10 - iter 15/32 - loss 0.00140354 - samples/sec: 16.20 - lr: 0.005000\n",
+      "2021-09-21 19:19:16,251 epoch 10 - iter 18/32 - loss 0.00133192 - samples/sec: 15.83 - lr: 0.005000\n",
+      "2021-09-21 19:19:16,429 epoch 10 - iter 21/32 - loss 0.00123332 - samples/sec: 16.94 - lr: 0.005000\n",
+      "2021-09-21 19:19:16,609 epoch 10 - iter 24/32 - loss 0.00123111 - samples/sec: 16.76 - lr: 0.005000\n",
+      "2021-09-21 19:19:16,786 epoch 10 - iter 27/32 - loss 0.00133425 - samples/sec: 17.01 - lr: 0.005000\n",
+      "2021-09-21 19:19:16,951 epoch 10 - iter 30/32 - loss 0.00127974 - samples/sec: 18.16 - lr: 0.005000\n",
+      "2021-09-21 19:19:17,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:17,066 EPOCH 10 done: loss 0.0012 - lr 0.0050000\n",
+      "2021-09-21 19:19:17,283 DEV : loss 1.0185199975967407 - score 0.5\n",
+      "2021-09-21 19:19:17,284 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:19:30,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:19:30,617 Testing using best model ...\n",
+      "2021-09-21 19:19:30,618 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:55:02,754 \t0.5\n",
-      "2021-09-08 10:55:02,754 \n",
+      "2021-09-21 19:19:53,058 \t0.75\n",
+      "2021-09-21 19:19:53,059 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.375\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.5\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         2\n",
-      "this text expresses optimism     0.3333    1.0000    0.5000         1\n",
+      " this text expresses sadness     1.0000    1.0000    1.0000         1\n",
+      "this text expresses optimism     0.0000    0.0000    0.0000         1\n",
       "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      "     this text expresses joy     1.0000    1.0000    1.0000         1\n",
+      "     this text expresses joy     1.0000    1.0000    1.0000         2\n",
       "\n",
-      "                   micro avg     0.5000    0.5000    0.5000         4\n",
-      "                   macro avg     0.3333    0.5000    0.3750         4\n",
-      "                weighted avg     0.3333    0.5000    0.3750         4\n",
-      "                 samples avg     0.5000    0.5000    0.5000         4\n",
+      "                   micro avg     0.7500    0.7500    0.7500         4\n",
+      "                   macro avg     0.5000    0.5000    0.5000         4\n",
+      "                weighted avg     0.7500    0.7500    0.7500         4\n",
+      "                 samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 10:55:02,755 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:10,145 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:19:53,059 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:03,790 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:55:14,438 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:20:08,209 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 42438.15it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 43240.25it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:14,441 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 10:55:14,449 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,451 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:20:08,211 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 19:20:08,220 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:08,221 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4382,26 +4384,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:14,452 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,452 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 10:55:14,452 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,453 Parameters:\n",
-      "2021-09-08 10:55:14,453  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:55:14,454  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:55:14,454  - patience: \"3\"\n",
-      "2021-09-08 10:55:14,454  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:55:14,455  - max_epochs: \"10\"\n",
-      "2021-09-08 10:55:14,455  - shuffle: \"True\"\n",
-      "2021-09-08 10:55:14,455  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:55:14,456  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:55:14,456 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,456 Model training base path: \"None1\"\n",
-      "2021-09-08 10:55:14,457 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,457 Device: cuda:1\n",
-      "2021-09-08 10:55:14,457 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,458 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:55:14,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,622 epoch 1 - iter 3/32 - loss 0.46711969 - samples/sec: 21.00 - lr: 0.020000\n"
+      "2021-09-21 19:20:08,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:08,222 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 19:20:08,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:08,223 Parameters:\n",
+      "2021-09-21 19:20:08,223  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:20:08,223  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:20:08,223  - patience: \"3\"\n",
+      "2021-09-21 19:20:08,224  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:20:08,224  - max_epochs: \"10\"\n",
+      "2021-09-21 19:20:08,224  - shuffle: \"True\"\n",
+      "2021-09-21 19:20:08,225  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:20:08,225  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:20:08,225 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:08,225 Model training base path: \"None1\"\n",
+      "2021-09-21 19:20:08,226 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:08,226 Device: cuda:0\n",
+      "2021-09-21 19:20:08,226 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:08,227 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:20:08,233 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4415,208 +4416,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:14,770 epoch 1 - iter 6/32 - loss 0.28032038 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 10:55:14,923 epoch 1 - iter 9/32 - loss 0.55101004 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 10:55:15,071 epoch 1 - iter 12/32 - loss 0.71251236 - samples/sec: 20.44 - lr: 0.020000\n",
-      "2021-09-08 10:55:15,224 epoch 1 - iter 15/32 - loss 0.72748820 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 10:55:15,366 epoch 1 - iter 18/32 - loss 0.66561627 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 10:55:15,506 epoch 1 - iter 21/32 - loss 0.70766834 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 10:55:15,640 epoch 1 - iter 24/32 - loss 0.63122161 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 10:55:15,780 epoch 1 - iter 27/32 - loss 0.61887847 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 10:55:15,924 epoch 1 - iter 30/32 - loss 0.61484542 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 10:55:16,020 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:16,020 EPOCH 1 done: loss 0.6054 - lr 0.0200000\n",
-      "2021-09-08 10:55:16,200 DEV : loss 0.5325462222099304 - score 0.5\n",
-      "2021-09-08 10:55:16,202 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:20:08,508 epoch 1 - iter 3/32 - loss 0.24533476 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 19:20:08,724 epoch 1 - iter 6/32 - loss 0.21104268 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 19:20:08,951 epoch 1 - iter 9/32 - loss 0.15428968 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 19:20:09,225 epoch 1 - iter 12/32 - loss 0.45330132 - samples/sec: 10.95 - lr: 0.020000\n",
+      "2021-09-21 19:20:09,452 epoch 1 - iter 15/32 - loss 0.46644999 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 19:20:09,670 epoch 1 - iter 18/32 - loss 0.54336100 - samples/sec: 13.79 - lr: 0.020000\n",
+      "2021-09-21 19:20:09,910 epoch 1 - iter 21/32 - loss 0.54624831 - samples/sec: 12.54 - lr: 0.020000\n",
+      "2021-09-21 19:20:10,147 epoch 1 - iter 24/32 - loss 0.49580346 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 19:20:10,385 epoch 1 - iter 27/32 - loss 0.52635909 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 19:20:10,616 epoch 1 - iter 30/32 - loss 0.60593058 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 19:20:10,786 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:10,787 EPOCH 1 done: loss 0.5888 - lr 0.0200000\n",
+      "2021-09-21 19:20:10,956 DEV : loss 0.4176509976387024 - score 0.5\n",
+      "2021-09-21 19:20:10,957 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:24,579 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:24,737 epoch 2 - iter 3/32 - loss 0.91291676 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 10:55:24,878 epoch 2 - iter 6/32 - loss 0.93050076 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,018 epoch 2 - iter 9/32 - loss 0.79183309 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,154 epoch 2 - iter 12/32 - loss 0.64108265 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,293 epoch 2 - iter 15/32 - loss 0.67910171 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,430 epoch 2 - iter 18/32 - loss 0.61024789 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,569 epoch 2 - iter 21/32 - loss 0.61332878 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,708 epoch 2 - iter 24/32 - loss 0.63592547 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,852 epoch 2 - iter 27/32 - loss 0.60672540 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 10:55:25,991 epoch 2 - iter 30/32 - loss 0.59396711 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 10:55:26,085 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:26,086 EPOCH 2 done: loss 0.5754 - lr 0.0200000\n",
-      "2021-09-08 10:55:26,146 DEV : loss 0.6212338805198669 - score 0.5\n",
-      "2021-09-08 10:55:26,147 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:26,150 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:26,296 epoch 3 - iter 3/32 - loss 0.08256906 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 10:55:26,435 epoch 3 - iter 6/32 - loss 0.45329653 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 10:55:26,577 epoch 3 - iter 9/32 - loss 0.52808146 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 10:55:26,713 epoch 3 - iter 12/32 - loss 0.43652944 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 10:55:26,849 epoch 3 - iter 15/32 - loss 0.37398576 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 10:55:26,989 epoch 3 - iter 18/32 - loss 0.43791185 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 10:55:27,127 epoch 3 - iter 21/32 - loss 0.44898515 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 10:55:27,269 epoch 3 - iter 24/32 - loss 0.46357383 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 10:55:27,409 epoch 3 - iter 27/32 - loss 0.45783853 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 10:55:27,543 epoch 3 - iter 30/32 - loss 0.45594333 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 10:55:27,637 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:27,638 EPOCH 3 done: loss 0.4279 - lr 0.0200000\n",
-      "2021-09-08 10:55:27,700 DEV : loss 0.5905765295028687 - score 0.75\n",
-      "2021-09-08 10:55:27,701 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:20:15,609 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:15,829 epoch 2 - iter 3/32 - loss 0.72812390 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 19:20:16,040 epoch 2 - iter 6/32 - loss 0.93410262 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 19:20:16,272 epoch 2 - iter 9/32 - loss 0.85000272 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 19:20:16,494 epoch 2 - iter 12/32 - loss 0.70022217 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 19:20:16,720 epoch 2 - iter 15/32 - loss 0.68533153 - samples/sec: 13.27 - lr: 0.020000\n",
+      "2021-09-21 19:20:16,935 epoch 2 - iter 18/32 - loss 0.61756526 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 19:20:17,195 epoch 2 - iter 21/32 - loss 0.60040904 - samples/sec: 11.55 - lr: 0.020000\n",
+      "2021-09-21 19:20:17,440 epoch 2 - iter 24/32 - loss 0.54882916 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 19:20:17,639 epoch 2 - iter 27/32 - loss 0.49688718 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 19:20:17,839 epoch 2 - iter 30/32 - loss 0.45581205 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 19:20:17,980 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:17,980 EPOCH 2 done: loss 0.4423 - lr 0.0200000\n",
+      "2021-09-21 19:20:18,147 DEV : loss 1.138171911239624 - score 0.25\n",
+      "2021-09-21 19:20:18,148 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:20:18,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:18,410 epoch 3 - iter 3/32 - loss 0.00476161 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 19:20:18,657 epoch 3 - iter 6/32 - loss 0.20185094 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 19:20:18,885 epoch 3 - iter 9/32 - loss 0.16391596 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 19:20:19,130 epoch 3 - iter 12/32 - loss 0.26018557 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 19:20:19,378 epoch 3 - iter 15/32 - loss 0.31595844 - samples/sec: 12.13 - lr: 0.020000\n",
+      "2021-09-21 19:20:19,600 epoch 3 - iter 18/32 - loss 0.34655219 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 19:20:19,850 epoch 3 - iter 21/32 - loss 0.45566669 - samples/sec: 12.00 - lr: 0.020000\n",
+      "2021-09-21 19:20:20,103 epoch 3 - iter 24/32 - loss 0.48364452 - samples/sec: 11.91 - lr: 0.020000\n",
+      "2021-09-21 19:20:20,338 epoch 3 - iter 27/32 - loss 0.46301384 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 19:20:20,558 epoch 3 - iter 30/32 - loss 0.43658350 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 19:20:20,713 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:20,714 EPOCH 3 done: loss 0.4801 - lr 0.0200000\n",
+      "2021-09-21 19:20:20,847 DEV : loss 0.7096747756004333 - score 0.25\n",
+      "2021-09-21 19:20:20,847 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:20:20,849 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:21,120 epoch 4 - iter 3/32 - loss 0.06874802 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 19:20:21,366 epoch 4 - iter 6/32 - loss 0.12912076 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 19:20:21,595 epoch 4 - iter 9/32 - loss 0.09868447 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 19:20:21,824 epoch 4 - iter 12/32 - loss 0.14056940 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 19:20:22,053 epoch 4 - iter 15/32 - loss 0.12097598 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 19:20:22,274 epoch 4 - iter 18/32 - loss 0.10129493 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 19:20:22,482 epoch 4 - iter 21/32 - loss 0.12211060 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 19:20:22,710 epoch 4 - iter 24/32 - loss 0.14170170 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 19:20:22,929 epoch 4 - iter 27/32 - loss 0.15766411 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 19:20:23,151 epoch 4 - iter 30/32 - loss 0.17522878 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 19:20:23,308 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:23,309 EPOCH 4 done: loss 0.2257 - lr 0.0200000\n",
+      "2021-09-21 19:20:23,464 DEV : loss 0.6337716579437256 - score 0.5\n",
+      "2021-09-21 19:20:23,468 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:20:23,470 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:23,760 epoch 5 - iter 3/32 - loss 0.00276420 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 19:20:24,006 epoch 5 - iter 6/32 - loss 0.18530259 - samples/sec: 12.19 - lr: 0.020000\n",
+      "2021-09-21 19:20:24,232 epoch 5 - iter 9/32 - loss 0.30178888 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 19:20:24,480 epoch 5 - iter 12/32 - loss 0.27518034 - samples/sec: 12.12 - lr: 0.020000\n",
+      "2021-09-21 19:20:24,732 epoch 5 - iter 15/32 - loss 0.27124165 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 19:20:24,938 epoch 5 - iter 18/32 - loss 0.23592329 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 19:20:25,171 epoch 5 - iter 21/32 - loss 0.20515092 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 19:20:25,392 epoch 5 - iter 24/32 - loss 0.21426686 - samples/sec: 13.55 - lr: 0.020000\n",
+      "2021-09-21 19:20:25,624 epoch 5 - iter 27/32 - loss 0.19432988 - samples/sec: 12.96 - lr: 0.020000\n",
+      "2021-09-21 19:20:25,836 epoch 5 - iter 30/32 - loss 0.17625124 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 19:20:25,971 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:25,971 EPOCH 5 done: loss 0.2114 - lr 0.0200000\n",
+      "2021-09-21 19:20:26,103 DEV : loss 0.5280801653862 - score 0.75\n",
+      "2021-09-21 19:20:26,103 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:31,744 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:31,923 epoch 4 - iter 3/32 - loss 0.21524060 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 10:55:32,086 epoch 4 - iter 6/32 - loss 0.21065787 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 10:55:32,232 epoch 4 - iter 9/32 - loss 0.17475791 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 10:55:32,387 epoch 4 - iter 12/32 - loss 0.16088859 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 10:55:32,539 epoch 4 - iter 15/32 - loss 0.26211232 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 10:55:32,684 epoch 4 - iter 18/32 - loss 0.24818924 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 10:55:32,832 epoch 4 - iter 21/32 - loss 0.24497746 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 10:55:32,983 epoch 4 - iter 24/32 - loss 0.26544135 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 10:55:33,140 epoch 4 - iter 27/32 - loss 0.31302256 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 10:55:33,287 epoch 4 - iter 30/32 - loss 0.34444577 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 10:55:33,399 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:33,400 EPOCH 4 done: loss 0.3839 - lr 0.0200000\n",
-      "2021-09-08 10:55:33,474 DEV : loss 0.3985716700553894 - score 0.75\n",
-      "2021-09-08 10:55:33,475 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:55:38,817 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:38,992 epoch 5 - iter 3/32 - loss 0.16333889 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 10:55:39,145 epoch 5 - iter 6/32 - loss 0.14648038 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 10:55:39,310 epoch 5 - iter 9/32 - loss 0.13390343 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 10:55:39,476 epoch 5 - iter 12/32 - loss 0.38818247 - samples/sec: 18.14 - lr: 0.020000\n",
-      "2021-09-08 10:55:39,627 epoch 5 - iter 15/32 - loss 0.34399261 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 10:55:39,788 epoch 5 - iter 18/32 - loss 0.32286050 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 10:55:39,940 epoch 5 - iter 21/32 - loss 0.29217020 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 10:55:40,104 epoch 5 - iter 24/32 - loss 0.26494976 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 10:55:40,262 epoch 5 - iter 27/32 - loss 0.33182990 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 10:55:40,423 epoch 5 - iter 30/32 - loss 0.30807973 - samples/sec: 18.65 - lr: 0.020000\n",
-      "2021-09-08 10:55:40,529 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:40,529 EPOCH 5 done: loss 0.2934 - lr 0.0200000\n",
-      "2021-09-08 10:55:42,201 DEV : loss 0.6170182228088379 - score 0.5\n",
-      "2021-09-08 10:55:42,202 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:42,205 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,377 epoch 6 - iter 3/32 - loss 0.05325334 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,546 epoch 6 - iter 6/32 - loss 0.06894789 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,686 epoch 6 - iter 9/32 - loss 0.04977321 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,822 epoch 6 - iter 12/32 - loss 0.03793618 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,979 epoch 6 - iter 15/32 - loss 0.13736771 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,142 epoch 6 - iter 18/32 - loss 0.13219091 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,293 epoch 6 - iter 21/32 - loss 0.12413836 - samples/sec: 20.00 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,450 epoch 6 - iter 24/32 - loss 0.14939858 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,600 epoch 6 - iter 27/32 - loss 0.15665588 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,753 epoch 6 - iter 30/32 - loss 0.14509921 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,848 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,848 EPOCH 6 done: loss 0.1391 - lr 0.0200000\n"
+      "2021-09-21 19:20:30,168 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:30,424 epoch 6 - iter 3/32 - loss 0.05226390 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 19:20:30,658 epoch 6 - iter 6/32 - loss 0.02846060 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 19:20:30,893 epoch 6 - iter 9/32 - loss 0.05429048 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 19:20:31,097 epoch 6 - iter 12/32 - loss 0.04098381 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 19:20:31,357 epoch 6 - iter 15/32 - loss 0.08134062 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 19:20:31,582 epoch 6 - iter 18/32 - loss 0.07526265 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 19:20:31,801 epoch 6 - iter 21/32 - loss 0.06613923 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 19:20:32,042 epoch 6 - iter 24/32 - loss 0.05792993 - samples/sec: 12.48 - lr: 0.020000\n",
+      "2021-09-21 19:20:32,268 epoch 6 - iter 27/32 - loss 0.08009612 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 19:20:32,482 epoch 6 - iter 30/32 - loss 0.07265396 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 19:20:32,635 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:43,936 DEV : loss 0.6793076992034912 - score 0.5\n",
-      "2021-09-08 10:55:43,936 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:55:43,938 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:44,113 epoch 7 - iter 3/32 - loss 0.02053583 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 10:55:44,266 epoch 7 - iter 6/32 - loss 0.01503260 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 10:55:44,415 epoch 7 - iter 9/32 - loss 0.01670213 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 10:55:44,565 epoch 7 - iter 12/32 - loss 0.02252964 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 10:55:44,726 epoch 7 - iter 15/32 - loss 0.11719223 - samples/sec: 18.75 - lr: 0.020000\n",
-      "2021-09-08 10:55:44,872 epoch 7 - iter 18/32 - loss 0.09981590 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 10:55:45,025 epoch 7 - iter 21/32 - loss 0.10321351 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 10:55:45,180 epoch 7 - iter 24/32 - loss 0.09111760 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:55:45,331 epoch 7 - iter 27/32 - loss 0.08250408 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 10:55:45,482 epoch 7 - iter 30/32 - loss 0.07438424 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 10:55:45,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:45,588 EPOCH 7 done: loss 0.0718 - lr 0.0200000\n",
-      "2021-09-08 10:55:45,822 DEV : loss 0.923184871673584 - score 0.5\n",
-      "2021-09-08 10:55:45,823 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:55:45,910 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:46,067 epoch 8 - iter 3/32 - loss 0.00243684 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 10:55:46,216 epoch 8 - iter 6/32 - loss 0.00130100 - samples/sec: 20.21 - lr: 0.020000\n",
-      "2021-09-08 10:55:46,348 epoch 8 - iter 9/32 - loss 0.00473016 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 10:55:46,493 epoch 8 - iter 12/32 - loss 0.00521510 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 10:55:46,637 epoch 8 - iter 15/32 - loss 0.00490021 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 10:55:46,776 epoch 8 - iter 18/32 - loss 0.00422477 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 10:55:46,928 epoch 8 - iter 21/32 - loss 0.04331415 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 10:55:47,074 epoch 8 - iter 24/32 - loss 0.03805101 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 10:55:47,239 epoch 8 - iter 27/32 - loss 0.03644488 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 10:55:47,390 epoch 8 - iter 30/32 - loss 0.03281094 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 10:55:47,492 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:47,492 EPOCH 8 done: loss 0.0318 - lr 0.0200000\n",
-      "2021-09-08 10:55:47,695 DEV : loss 0.8260840177536011 - score 1.0\n",
-      "2021-09-08 10:55:47,696 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:20:32,635 EPOCH 6 done: loss 0.0682 - lr 0.0200000\n",
+      "2021-09-21 19:20:32,789 DEV : loss 0.777358889579773 - score 0.75\n",
+      "2021-09-21 19:20:32,794 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:20:32,796 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:33,049 epoch 7 - iter 3/32 - loss 0.00262618 - samples/sec: 13.79 - lr: 0.020000\n",
+      "2021-09-21 19:20:33,299 epoch 7 - iter 6/32 - loss 0.19452647 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 19:20:33,530 epoch 7 - iter 9/32 - loss 0.13055711 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 19:20:33,760 epoch 7 - iter 12/32 - loss 0.09823423 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 19:20:33,974 epoch 7 - iter 15/32 - loss 0.07978044 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 19:20:34,174 epoch 7 - iter 18/32 - loss 0.06945741 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 19:20:34,364 epoch 7 - iter 21/32 - loss 0.05959263 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 19:20:34,614 epoch 7 - iter 24/32 - loss 0.05617568 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 19:20:34,873 epoch 7 - iter 27/32 - loss 0.05093263 - samples/sec: 11.60 - lr: 0.020000\n",
+      "2021-09-21 19:20:35,102 epoch 7 - iter 30/32 - loss 0.04590487 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 19:20:35,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:35,250 EPOCH 7 done: loss 0.0603 - lr 0.0200000\n",
+      "2021-09-21 19:20:35,387 DEV : loss 0.21163995563983917 - score 0.75\n",
+      "2021-09-21 19:20:35,389 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:20:40,955 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:41,161 epoch 8 - iter 3/32 - loss 0.03707168 - samples/sec: 15.90 - lr: 0.020000\n",
+      "2021-09-21 19:20:41,332 epoch 8 - iter 6/32 - loss 0.15167972 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 19:20:41,519 epoch 8 - iter 9/32 - loss 0.10135049 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 19:20:41,726 epoch 8 - iter 12/32 - loss 0.07636927 - samples/sec: 14.54 - lr: 0.020000\n",
+      "2021-09-21 19:20:41,919 epoch 8 - iter 15/32 - loss 0.06131451 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 19:20:42,092 epoch 8 - iter 18/32 - loss 0.05159080 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 19:20:42,269 epoch 8 - iter 21/32 - loss 0.04959047 - samples/sec: 16.93 - lr: 0.020000\n",
+      "2021-09-21 19:20:42,466 epoch 8 - iter 24/32 - loss 0.04401721 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 19:20:42,693 epoch 8 - iter 27/32 - loss 0.03917234 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 19:20:42,923 epoch 8 - iter 30/32 - loss 0.03535193 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 19:20:43,068 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:43,068 EPOCH 8 done: loss 0.0332 - lr 0.0200000\n",
+      "2021-09-21 19:20:43,243 DEV : loss 0.48258841037750244 - score 1.0\n",
+      "2021-09-21 19:20:43,244 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:54,493 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:54,661 epoch 9 - iter 3/32 - loss 0.04082637 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 10:55:54,815 epoch 9 - iter 6/32 - loss 0.03465175 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 10:55:54,957 epoch 9 - iter 9/32 - loss 0.02420879 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 10:55:55,103 epoch 9 - iter 12/32 - loss 0.01833792 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 10:55:55,251 epoch 9 - iter 15/32 - loss 0.01553028 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 10:55:55,396 epoch 9 - iter 18/32 - loss 0.01315644 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 10:55:55,548 epoch 9 - iter 21/32 - loss 0.01136258 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 10:55:55,691 epoch 9 - iter 24/32 - loss 0.01032619 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 10:55:55,852 epoch 9 - iter 27/32 - loss 0.01838034 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 10:55:55,999 epoch 9 - iter 30/32 - loss 0.01660374 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 10:55:56,102 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:56,103 EPOCH 9 done: loss 0.0167 - lr 0.0200000\n",
-      "2021-09-08 10:55:56,330 DEV : loss 1.3687644004821777 - score 0.5\n",
-      "2021-09-08 10:55:56,331 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:56,333 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:56,503 epoch 10 - iter 3/32 - loss 0.00035140 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:55:56,655 epoch 10 - iter 6/32 - loss 0.00028606 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 10:55:56,801 epoch 10 - iter 9/32 - loss 0.00039267 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 10:55:56,945 epoch 10 - iter 12/32 - loss 0.00368993 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 10:55:57,098 epoch 10 - iter 15/32 - loss 0.00301573 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 10:55:57,243 epoch 10 - iter 18/32 - loss 0.00320592 - samples/sec: 20.66 - lr: 0.020000\n",
-      "2021-09-08 10:55:57,400 epoch 10 - iter 21/32 - loss 0.05047571 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 10:55:57,542 epoch 10 - iter 24/32 - loss 0.04422499 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 10:55:57,692 epoch 10 - iter 27/32 - loss 0.03938308 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 10:55:57,837 epoch 10 - iter 30/32 - loss 0.03545769 - samples/sec: 20.74 - lr: 0.020000\n",
-      "2021-09-08 10:55:57,933 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:57,934 EPOCH 10 done: loss 0.0333 - lr 0.0200000\n",
-      "2021-09-08 10:55:58,103 DEV : loss 0.9046944379806519 - score 0.5\n",
-      "2021-09-08 10:55:58,104 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:56:03,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:03,650 Testing using best model ...\n",
-      "2021-09-08 10:56:03,652 loading file None1/best-model.pt\n",
+      "2021-09-21 19:20:47,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:47,671 epoch 9 - iter 3/32 - loss 0.00081928 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 19:20:47,879 epoch 9 - iter 6/32 - loss 0.00069577 - samples/sec: 14.43 - lr: 0.020000\n",
+      "2021-09-21 19:20:48,105 epoch 9 - iter 9/32 - loss 0.00069559 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 19:20:48,351 epoch 9 - iter 12/32 - loss 0.00072325 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 19:20:48,576 epoch 9 - iter 15/32 - loss 0.00067477 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 19:20:48,828 epoch 9 - iter 18/32 - loss 0.00108827 - samples/sec: 11.91 - lr: 0.020000\n",
+      "2021-09-21 19:20:49,049 epoch 9 - iter 21/32 - loss 0.00205754 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 19:20:49,254 epoch 9 - iter 24/32 - loss 0.04277253 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 19:20:49,497 epoch 9 - iter 27/32 - loss 0.12279726 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 19:20:49,717 epoch 9 - iter 30/32 - loss 0.11059257 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 19:20:49,867 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:49,867 EPOCH 9 done: loss 0.1037 - lr 0.0200000\n",
+      "2021-09-21 19:20:50,021 DEV : loss 0.7207078337669373 - score 0.75\n",
+      "2021-09-21 19:20:50,022 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:20:50,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:50,318 epoch 10 - iter 3/32 - loss 0.00159602 - samples/sec: 12.30 - lr: 0.020000\n",
+      "2021-09-21 19:20:50,512 epoch 10 - iter 6/32 - loss 0.00210819 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 19:20:50,710 epoch 10 - iter 9/32 - loss 0.00182066 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 19:20:50,895 epoch 10 - iter 12/32 - loss 0.00151928 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 19:20:51,077 epoch 10 - iter 15/32 - loss 0.00127910 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 19:20:51,287 epoch 10 - iter 18/32 - loss 0.00141875 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 19:20:51,474 epoch 10 - iter 21/32 - loss 0.00627775 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 19:20:51,655 epoch 10 - iter 24/32 - loss 0.00561230 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 19:20:51,850 epoch 10 - iter 27/32 - loss 0.00522093 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 19:20:52,030 epoch 10 - iter 30/32 - loss 0.00479307 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 19:20:52,144 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:52,145 EPOCH 10 done: loss 0.0046 - lr 0.0200000\n",
+      "2021-09-21 19:20:52,301 DEV : loss 0.8351771831512451 - score 0.75\n",
+      "2021-09-21 19:20:52,301 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:20:56,259 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:20:56,259 Testing using best model ...\n",
+      "2021-09-21 19:20:56,261 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:56:09,411 \t0.25\n",
-      "2021-09-08 10:56:09,412 \n",
+      "2021-09-21 19:21:01,384 \t0.75\n",
+      "2021-09-21 19:21:01,385 \n",
       "Results:\n",
-      "- F-score (micro) 0.25\n",
-      "- F-score (macro) 0.125\n",
-      "- Accuracy 0.25\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.5\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         1\n",
-      "this text expresses optimism     0.0000    0.0000    0.0000         0\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
-      "     this text expresses joy     1.0000    0.3333    0.5000         3\n",
+      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
+      "this text expresses optimism     0.0000    0.0000    0.0000         1\n",
+      "   this text expresses anger     1.0000    1.0000    1.0000         1\n",
+      "     this text expresses joy     1.0000    1.0000    1.0000         2\n",
       "\n",
-      "                   micro avg     0.2500    0.2500    0.2500         4\n",
-      "                   macro avg     0.2500    0.0833    0.1250         4\n",
-      "                weighted avg     0.7500    0.2500    0.3750         4\n",
-      "                 samples avg     0.2500    0.2500    0.2500         4\n",
+      "                   micro avg     0.7500    0.7500    0.7500         4\n",
+      "                   macro avg     0.5000    0.5000    0.5000         4\n",
+      "                weighted avg     0.7500    0.7500    0.7500         4\n",
+      "                 samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 10:56:09,413 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:16,728 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:21:01,386 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:15,611 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:56:20,745 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:21:19,993 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 36/36 [00:00<00:00, 38806.21it/s]"
+      "100%|██████████| 36/36 [00:00<00:00, 41311.89it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:20,748 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
-      "2021-09-08 10:56:20,878 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:20,880 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:21:19,995 [b'this text expresses sadness', b'this text expresses optimism', b'this text expresses anger', b'this text expresses joy']\n",
+      "2021-09-21 19:21:20,004 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:20,006 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4929,24 +4931,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:20,881 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:20,881 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
-      "2021-09-08 10:56:20,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:20,882 Parameters:\n",
-      "2021-09-08 10:56:20,882  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:56:20,883  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:56:20,883  - patience: \"3\"\n",
-      "2021-09-08 10:56:20,884  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:56:20,884  - max_epochs: \"10\"\n",
-      "2021-09-08 10:56:20,884  - shuffle: \"True\"\n",
-      "2021-09-08 10:56:20,885  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:56:20,885  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:56:20,885 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:20,886 Model training base path: \"None1\"\n",
-      "2021-09-08 10:56:20,886 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:20,886 Device: cuda:1\n",
-      "2021-09-08 10:56:20,887 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:20,887 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:21:20,006 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:20,006 Corpus: \"Corpus: 32 train + 4 dev + 4 test sentences\"\n",
+      "2021-09-21 19:21:20,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:20,007 Parameters:\n",
+      "2021-09-21 19:21:20,007  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:21:20,008  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:21:20,008  - patience: \"3\"\n",
+      "2021-09-21 19:21:20,008  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:21:20,008  - max_epochs: \"10\"\n",
+      "2021-09-21 19:21:20,009  - shuffle: \"True\"\n",
+      "2021-09-21 19:21:20,009  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:21:20,009  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:21:20,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:20,010 Model training base path: \"None1\"\n",
+      "2021-09-21 19:21:20,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:20,010 Device: cuda:0\n",
+      "2021-09-21 19:21:20,011 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:20,011 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:21:20,017 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -4960,194 +4963,191 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:21,062 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:21,208 epoch 1 - iter 3/32 - loss 0.65069027 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 10:56:21,347 epoch 1 - iter 6/32 - loss 0.47751271 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 10:56:21,483 epoch 1 - iter 9/32 - loss 0.38105228 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 10:56:21,623 epoch 1 - iter 12/32 - loss 0.43291309 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 10:56:21,763 epoch 1 - iter 15/32 - loss 0.52030872 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 10:56:21,903 epoch 1 - iter 18/32 - loss 0.48410539 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 10:56:22,046 epoch 1 - iter 21/32 - loss 0.54097826 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 10:56:22,185 epoch 1 - iter 24/32 - loss 0.48099767 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 10:56:22,325 epoch 1 - iter 27/32 - loss 0.64917300 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 10:56:22,465 epoch 1 - iter 30/32 - loss 0.69904112 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 10:56:22,559 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:22,560 EPOCH 1 done: loss 0.6701 - lr 0.0200000\n",
-      "2021-09-08 10:56:23,203 DEV : loss 0.0402553454041481 - score 1.0\n",
-      "2021-09-08 10:56:23,204 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:21:20,229 epoch 1 - iter 3/32 - loss 0.78851294 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 19:21:20,419 epoch 1 - iter 6/32 - loss 0.50719605 - samples/sec: 15.82 - lr: 0.020000\n",
+      "2021-09-21 19:21:20,618 epoch 1 - iter 9/32 - loss 0.74894559 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:21:20,824 epoch 1 - iter 12/32 - loss 0.75393926 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 19:21:21,035 epoch 1 - iter 15/32 - loss 0.66731286 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 19:21:21,238 epoch 1 - iter 18/32 - loss 0.71986961 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 19:21:21,464 epoch 1 - iter 21/32 - loss 0.68233668 - samples/sec: 13.34 - lr: 0.020000\n",
+      "2021-09-21 19:21:21,647 epoch 1 - iter 24/32 - loss 0.60343214 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 19:21:21,835 epoch 1 - iter 27/32 - loss 0.76013957 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 19:21:22,020 epoch 1 - iter 30/32 - loss 0.78478358 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 19:21:22,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:22,160 EPOCH 1 done: loss 0.7747 - lr 0.0200000\n",
+      "2021-09-21 19:21:22,276 DEV : loss 0.714401125907898 - score 0.25\n",
+      "2021-09-21 19:21:22,276 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:56:29,420 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:29,573 epoch 2 - iter 3/32 - loss 0.69363105 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 10:56:29,709 epoch 2 - iter 6/32 - loss 0.47792479 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 10:56:29,846 epoch 2 - iter 9/32 - loss 0.45415884 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 10:56:29,986 epoch 2 - iter 12/32 - loss 0.52096081 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 10:56:30,122 epoch 2 - iter 15/32 - loss 0.59360608 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 10:56:30,259 epoch 2 - iter 18/32 - loss 0.61505657 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 10:56:30,422 epoch 2 - iter 21/32 - loss 0.65393216 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 10:56:30,579 epoch 2 - iter 24/32 - loss 0.63707967 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 10:56:30,745 epoch 2 - iter 27/32 - loss 0.57427451 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 10:56:30,887 epoch 2 - iter 30/32 - loss 0.53241809 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 10:56:30,984 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:30,985 EPOCH 2 done: loss 0.5390 - lr 0.0200000\n",
-      "2021-09-08 10:56:31,047 DEV : loss 0.00596537534147501 - score 1.0\n",
-      "2021-09-08 10:56:31,048 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:21:26,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:26,485 epoch 2 - iter 3/32 - loss 0.17469084 - samples/sec: 12.29 - lr: 0.020000\n",
+      "2021-09-21 19:21:26,719 epoch 2 - iter 6/32 - loss 0.54690185 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 19:21:26,936 epoch 2 - iter 9/32 - loss 0.51369786 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 19:21:27,149 epoch 2 - iter 12/32 - loss 0.50155927 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 19:21:27,373 epoch 2 - iter 15/32 - loss 0.50893299 - samples/sec: 13.42 - lr: 0.020000\n",
+      "2021-09-21 19:21:27,621 epoch 2 - iter 18/32 - loss 0.56725017 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 19:21:27,828 epoch 2 - iter 21/32 - loss 0.51782942 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 19:21:28,027 epoch 2 - iter 24/32 - loss 0.59266253 - samples/sec: 15.14 - lr: 0.020000\n",
+      "2021-09-21 19:21:28,224 epoch 2 - iter 27/32 - loss 0.57995862 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 19:21:28,439 epoch 2 - iter 30/32 - loss 0.59257388 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 19:21:28,592 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:28,593 EPOCH 2 done: loss 0.5980 - lr 0.0200000\n",
+      "2021-09-21 19:21:28,714 DEV : loss 0.4491647779941559 - score 0.75\n",
+      "2021-09-21 19:21:28,715 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:56:39,318 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,520 epoch 3 - iter 3/32 - loss 0.72504195 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 10:56:39,690 epoch 3 - iter 6/32 - loss 0.47242191 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 10:56:39,843 epoch 3 - iter 9/32 - loss 0.47242244 - samples/sec: 19.69 - lr: 0.020000\n",
-      "2021-09-08 10:56:39,983 epoch 3 - iter 12/32 - loss 0.49068697 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,122 epoch 3 - iter 15/32 - loss 0.53745484 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,261 epoch 3 - iter 18/32 - loss 0.45398208 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,406 epoch 3 - iter 21/32 - loss 0.39631169 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,549 epoch 3 - iter 24/32 - loss 0.36247946 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,699 epoch 3 - iter 27/32 - loss 0.33626220 - samples/sec: 20.25 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,841 epoch 3 - iter 30/32 - loss 0.30385174 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,939 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:40,940 EPOCH 3 done: loss 0.3197 - lr 0.0200000\n",
-      "2021-09-08 10:56:41,146 DEV : loss 0.003515427466481924 - score 1.0\n",
-      "2021-09-08 10:56:41,147 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:21:35,246 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:35,484 epoch 3 - iter 3/32 - loss 0.49922413 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 19:21:35,670 epoch 3 - iter 6/32 - loss 0.31030617 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 19:21:35,858 epoch 3 - iter 9/32 - loss 0.38300221 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 19:21:36,094 epoch 3 - iter 12/32 - loss 0.37956184 - samples/sec: 12.71 - lr: 0.020000\n",
+      "2021-09-21 19:21:36,315 epoch 3 - iter 15/32 - loss 0.36131277 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 19:21:36,522 epoch 3 - iter 18/32 - loss 0.32429011 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 19:21:36,728 epoch 3 - iter 21/32 - loss 0.32236034 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 19:21:36,930 epoch 3 - iter 24/32 - loss 0.30292134 - samples/sec: 14.94 - lr: 0.020000\n",
+      "2021-09-21 19:21:37,155 epoch 3 - iter 27/32 - loss 0.28978472 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 19:21:37,346 epoch 3 - iter 30/32 - loss 0.26696562 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 19:21:37,487 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:37,487 EPOCH 3 done: loss 0.2591 - lr 0.0200000\n",
+      "2021-09-21 19:21:37,627 DEV : loss 0.7784988880157471 - score 0.5\n",
+      "2021-09-21 19:21:37,627 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:21:37,629 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:37,849 epoch 4 - iter 3/32 - loss 0.60346474 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 19:21:38,079 epoch 4 - iter 6/32 - loss 0.63246415 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 19:21:38,265 epoch 4 - iter 9/32 - loss 0.47458154 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 19:21:38,476 epoch 4 - iter 12/32 - loss 0.56460335 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 19:21:38,693 epoch 4 - iter 15/32 - loss 0.50417314 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 19:21:38,892 epoch 4 - iter 18/32 - loss 0.53445570 - samples/sec: 15.13 - lr: 0.020000\n",
+      "2021-09-21 19:21:39,128 epoch 4 - iter 21/32 - loss 0.49015180 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 19:21:39,327 epoch 4 - iter 24/32 - loss 0.50641163 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 19:21:39,526 epoch 4 - iter 27/32 - loss 0.45327183 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 19:21:39,714 epoch 4 - iter 30/32 - loss 0.40845626 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 19:21:39,842 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:39,843 EPOCH 4 done: loss 0.3848 - lr 0.0200000\n",
+      "2021-09-21 19:21:39,974 DEV : loss 0.8493618965148926 - score 0.5\n",
+      "2021-09-21 19:21:39,974 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:21:39,977 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:40,188 epoch 5 - iter 3/32 - loss 0.22571242 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 19:21:40,386 epoch 5 - iter 6/32 - loss 0.39189748 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 19:21:40,581 epoch 5 - iter 9/32 - loss 0.38854959 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 19:21:40,800 epoch 5 - iter 12/32 - loss 0.49160770 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 19:21:41,016 epoch 5 - iter 15/32 - loss 0.39504041 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 19:21:41,206 epoch 5 - iter 18/32 - loss 0.32943140 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 19:21:41,426 epoch 5 - iter 21/32 - loss 0.29979154 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 19:21:41,626 epoch 5 - iter 24/32 - loss 0.29606785 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 19:21:41,832 epoch 5 - iter 27/32 - loss 0.29507346 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 19:21:42,052 epoch 5 - iter 30/32 - loss 0.30193267 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 19:21:42,173 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:42,174 EPOCH 5 done: loss 0.2833 - lr 0.0200000\n",
+      "2021-09-21 19:21:42,310 DEV : loss 0.2815582752227783 - score 1.0\n",
+      "2021-09-21 19:21:42,311 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:56:58,224 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,386 epoch 4 - iter 3/32 - loss 0.66482461 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,547 epoch 4 - iter 6/32 - loss 0.53248643 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,686 epoch 4 - iter 9/32 - loss 0.35945320 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,826 epoch 4 - iter 12/32 - loss 0.27266510 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,983 epoch 4 - iter 15/32 - loss 0.22541746 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,132 epoch 4 - iter 18/32 - loss 0.19056445 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,290 epoch 4 - iter 21/32 - loss 0.23297072 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,443 epoch 4 - iter 24/32 - loss 0.25979677 - samples/sec: 19.63 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,605 epoch 4 - iter 27/32 - loss 0.28874131 - samples/sec: 18.65 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,760 epoch 4 - iter 30/32 - loss 0.27952908 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,865 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,866 EPOCH 4 done: loss 0.2663 - lr 0.0200000\n",
-      "2021-09-08 10:56:59,946 DEV : loss 0.7822226285934448 - score 1.0\n",
-      "2021-09-08 10:56:59,947 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:59,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:00,128 epoch 5 - iter 3/32 - loss 0.01817377 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 10:57:00,268 epoch 5 - iter 6/32 - loss 0.16060539 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 10:57:00,419 epoch 5 - iter 9/32 - loss 0.19439765 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 10:57:00,568 epoch 5 - iter 12/32 - loss 0.18027107 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 10:57:00,712 epoch 5 - iter 15/32 - loss 0.14480461 - samples/sec: 20.95 - lr: 0.020000\n",
-      "2021-09-08 10:57:00,881 epoch 5 - iter 18/32 - loss 0.18716319 - samples/sec: 17.77 - lr: 0.020000\n",
-      "2021-09-08 10:57:01,031 epoch 5 - iter 21/32 - loss 0.16067776 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 10:57:01,181 epoch 5 - iter 24/32 - loss 0.14264818 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 10:57:01,339 epoch 5 - iter 27/32 - loss 0.14248346 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 10:57:01,493 epoch 5 - iter 30/32 - loss 0.12895324 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 10:57:01,603 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:01,603 EPOCH 5 done: loss 0.1377 - lr 0.0200000\n",
-      "2021-09-08 10:57:01,795 DEV : loss 0.020556490868330002 - score 1.0\n",
-      "2021-09-08 10:57:01,796 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:57:01,903 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:02,089 epoch 6 - iter 3/32 - loss 0.00754259 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 10:57:02,246 epoch 6 - iter 6/32 - loss 0.00575264 - samples/sec: 19.26 - lr: 0.020000\n",
-      "2021-09-08 10:57:02,404 epoch 6 - iter 9/32 - loss 0.21660285 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 10:57:02,551 epoch 6 - iter 12/32 - loss 0.16297873 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 10:57:02,702 epoch 6 - iter 15/32 - loss 0.20789776 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 10:57:02,849 epoch 6 - iter 18/32 - loss 0.17371014 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 10:57:03,001 epoch 6 - iter 21/32 - loss 0.17354143 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 10:57:03,156 epoch 6 - iter 24/32 - loss 0.15224771 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 10:57:03,308 epoch 6 - iter 27/32 - loss 0.13744103 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 10:57:03,473 epoch 6 - iter 30/32 - loss 0.12694027 - samples/sec: 18.29 - lr: 0.020000\n"
+      "2021-09-21 19:21:46,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:46,859 epoch 6 - iter 3/32 - loss 0.02244820 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 19:21:47,046 epoch 6 - iter 6/32 - loss 0.01464263 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 19:21:47,261 epoch 6 - iter 9/32 - loss 0.01021428 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 19:21:47,468 epoch 6 - iter 12/32 - loss 0.00894697 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 19:21:47,681 epoch 6 - iter 15/32 - loss 0.02735986 - samples/sec: 14.13 - lr: 0.020000\n",
+      "2021-09-21 19:21:47,877 epoch 6 - iter 18/32 - loss 0.02320732 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 19:21:48,063 epoch 6 - iter 21/32 - loss 0.09433274 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 19:21:48,277 epoch 6 - iter 24/32 - loss 0.08355706 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 19:21:48,493 epoch 6 - iter 27/32 - loss 0.10068214 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 19:21:48,719 epoch 6 - iter 30/32 - loss 0.12323603 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 19:21:48,850 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:03,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:03,574 EPOCH 6 done: loss 0.1206 - lr 0.0200000\n",
-      "2021-09-08 10:57:04,460 DEV : loss 0.00040039693703874946 - score 1.0\n",
-      "2021-09-08 10:57:04,461 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:57:13,229 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:13,406 epoch 7 - iter 3/32 - loss 0.00204504 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 10:57:13,565 epoch 7 - iter 6/32 - loss 0.10100500 - samples/sec: 19.01 - lr: 0.020000\n",
-      "2021-09-08 10:57:13,715 epoch 7 - iter 9/32 - loss 0.12999857 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 10:57:13,871 epoch 7 - iter 12/32 - loss 0.16324394 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 10:57:14,026 epoch 7 - iter 15/32 - loss 0.17260111 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:57:14,171 epoch 7 - iter 18/32 - loss 0.20337447 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 10:57:14,319 epoch 7 - iter 21/32 - loss 0.17486995 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 10:57:14,471 epoch 7 - iter 24/32 - loss 0.15356903 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 10:57:14,614 epoch 7 - iter 27/32 - loss 0.13676650 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 10:57:14,757 epoch 7 - iter 30/32 - loss 0.12329830 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 10:57:14,857 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:14,857 EPOCH 7 done: loss 0.1280 - lr 0.0200000\n",
-      "2021-09-08 10:57:14,941 DEV : loss 0.00015190283011179417 - score 1.0\n",
-      "2021-09-08 10:57:14,943 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:57:19,215 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:19,383 epoch 8 - iter 3/32 - loss 0.01399660 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 10:57:19,541 epoch 8 - iter 6/32 - loss 0.01355959 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 10:57:19,691 epoch 8 - iter 9/32 - loss 0.03146007 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 10:57:19,833 epoch 8 - iter 12/32 - loss 0.02472077 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 10:57:19,987 epoch 8 - iter 15/32 - loss 0.01981519 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 10:57:20,132 epoch 8 - iter 18/32 - loss 0.01664520 - samples/sec: 20.71 - lr: 0.020000\n",
-      "2021-09-08 10:57:20,284 epoch 8 - iter 21/32 - loss 0.01460941 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 10:57:20,447 epoch 8 - iter 24/32 - loss 0.01457923 - samples/sec: 18.51 - lr: 0.020000\n",
-      "2021-09-08 10:57:20,585 epoch 8 - iter 27/32 - loss 0.01320537 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 10:57:20,729 epoch 8 - iter 30/32 - loss 0.01315583 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 10:57:20,834 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:20,835 EPOCH 8 done: loss 0.0123 - lr 0.0200000\n",
-      "2021-09-08 10:57:20,907 DEV : loss 6.44709070911631e-05 - score 1.0\n",
-      "2021-09-08 10:57:20,908 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:57:26,446 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:26,635 epoch 9 - iter 3/32 - loss 0.21038961 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 10:57:26,803 epoch 9 - iter 6/32 - loss 0.10527423 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 10:57:26,968 epoch 9 - iter 9/32 - loss 0.07966110 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 10:57:27,131 epoch 9 - iter 12/32 - loss 0.05995611 - samples/sec: 18.48 - lr: 0.020000\n",
-      "2021-09-08 10:57:27,312 epoch 9 - iter 15/32 - loss 0.04817908 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 10:57:27,487 epoch 9 - iter 18/32 - loss 0.04042866 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 10:57:27,653 epoch 9 - iter 21/32 - loss 0.04140669 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 10:57:27,824 epoch 9 - iter 24/32 - loss 0.04019143 - samples/sec: 17.70 - lr: 0.020000\n",
-      "2021-09-08 10:57:27,993 epoch 9 - iter 27/32 - loss 0.04161182 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 10:57:28,163 epoch 9 - iter 30/32 - loss 0.03774182 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 10:57:28,278 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:28,279 EPOCH 9 done: loss 0.0354 - lr 0.0200000\n",
-      "2021-09-08 10:57:28,372 DEV : loss 0.00016068681725300848 - score 1.0\n",
-      "2021-09-08 10:57:28,373 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:57:28,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:28,564 epoch 10 - iter 3/32 - loss 0.00029731 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 10:57:28,729 epoch 10 - iter 6/32 - loss 0.00574470 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 10:57:28,903 epoch 10 - iter 9/32 - loss 0.00401323 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 10:57:29,072 epoch 10 - iter 12/32 - loss 0.00693917 - samples/sec: 17.82 - lr: 0.020000\n",
-      "2021-09-08 10:57:29,237 epoch 10 - iter 15/32 - loss 0.00578374 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 10:57:29,402 epoch 10 - iter 18/32 - loss 0.00488936 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 10:57:29,569 epoch 10 - iter 21/32 - loss 0.00433218 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 10:57:29,741 epoch 10 - iter 24/32 - loss 0.00414072 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 10:57:29,908 epoch 10 - iter 27/32 - loss 0.00371319 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 10:57:30,077 epoch 10 - iter 30/32 - loss 0.00338525 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 10:57:30,185 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:30,185 EPOCH 10 done: loss 0.0032 - lr 0.0200000\n",
-      "2021-09-08 10:57:30,280 DEV : loss 8.482612611260265e-05 - score 1.0\n",
-      "2021-09-08 10:57:30,282 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:57:34,283 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:34,283 Testing using best model ...\n",
-      "2021-09-08 10:57:34,285 loading file None1/best-model.pt\n",
+      "2021-09-21 19:21:48,850 EPOCH 6 done: loss 0.1208 - lr 0.0200000\n",
+      "2021-09-21 19:21:48,986 DEV : loss 1.0306739807128906 - score 0.5\n",
+      "2021-09-21 19:21:48,986 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:21:48,988 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:49,206 epoch 7 - iter 3/32 - loss 0.04205064 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 19:21:49,396 epoch 7 - iter 6/32 - loss 0.05643687 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 19:21:49,593 epoch 7 - iter 9/32 - loss 0.04777072 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 19:21:49,765 epoch 7 - iter 12/32 - loss 0.03726615 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 19:21:49,966 epoch 7 - iter 15/32 - loss 0.06207179 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 19:21:50,152 epoch 7 - iter 18/32 - loss 0.05207392 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 19:21:50,335 epoch 7 - iter 21/32 - loss 0.08865103 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 19:21:50,544 epoch 7 - iter 24/32 - loss 0.08976791 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 19:21:50,772 epoch 7 - iter 27/32 - loss 0.08204447 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 19:21:50,956 epoch 7 - iter 30/32 - loss 0.07583055 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 19:21:51,088 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:51,088 EPOCH 7 done: loss 0.0734 - lr 0.0200000\n",
+      "2021-09-21 19:21:51,206 DEV : loss 0.9229949116706848 - score 0.75\n",
+      "2021-09-21 19:21:51,206 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:21:51,209 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:51,443 epoch 8 - iter 3/32 - loss 0.55165039 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 19:21:51,648 epoch 8 - iter 6/32 - loss 0.44950621 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 19:21:51,844 epoch 8 - iter 9/32 - loss 0.30831038 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 19:21:52,007 epoch 8 - iter 12/32 - loss 0.23214513 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 19:21:52,157 epoch 8 - iter 15/32 - loss 0.19086818 - samples/sec: 20.12 - lr: 0.020000\n",
+      "2021-09-21 19:21:52,393 epoch 8 - iter 18/32 - loss 0.16739934 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 19:21:52,574 epoch 8 - iter 21/32 - loss 0.14382918 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 19:21:52,786 epoch 8 - iter 24/32 - loss 0.13267838 - samples/sec: 14.21 - lr: 0.020000\n",
+      "2021-09-21 19:21:52,957 epoch 8 - iter 27/32 - loss 0.11832144 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 19:21:53,146 epoch 8 - iter 30/32 - loss 0.10673435 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 19:21:53,265 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:53,265 EPOCH 8 done: loss 0.1012 - lr 0.0200000\n",
+      "2021-09-21 19:21:53,387 DEV : loss 1.666229009628296 - score 0.5\n",
+      "2021-09-21 19:21:53,388 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:21:53,390 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:53,596 epoch 9 - iter 3/32 - loss 0.05184333 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 19:21:53,787 epoch 9 - iter 6/32 - loss 0.02697748 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 19:21:53,965 epoch 9 - iter 9/32 - loss 0.01814146 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 19:21:54,158 epoch 9 - iter 12/32 - loss 0.01373183 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 19:21:54,337 epoch 9 - iter 15/32 - loss 0.02895799 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 19:21:54,525 epoch 9 - iter 18/32 - loss 0.02417837 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 19:21:54,696 epoch 9 - iter 21/32 - loss 0.02094196 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 19:21:54,890 epoch 9 - iter 24/32 - loss 0.01858750 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 19:21:55,065 epoch 9 - iter 27/32 - loss 0.02946554 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 19:21:55,263 epoch 9 - iter 30/32 - loss 0.02677374 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 19:21:55,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:55,391 EPOCH 9 done: loss 0.0251 - lr 0.0200000\n",
+      "2021-09-21 19:21:55,499 DEV : loss 1.6287509202957153 - score 0.5\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:21:55,500 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:21:55,502 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:55,688 epoch 10 - iter 3/32 - loss 0.00102512 - samples/sec: 18.48 - lr: 0.010000\n",
+      "2021-09-21 19:21:55,894 epoch 10 - iter 6/32 - loss 0.00212266 - samples/sec: 14.60 - lr: 0.010000\n",
+      "2021-09-21 19:21:56,083 epoch 10 - iter 9/32 - loss 0.00169160 - samples/sec: 15.95 - lr: 0.010000\n",
+      "2021-09-21 19:21:56,248 epoch 10 - iter 12/32 - loss 0.00146711 - samples/sec: 18.20 - lr: 0.010000\n",
+      "2021-09-21 19:21:56,421 epoch 10 - iter 15/32 - loss 0.00124753 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 19:21:56,603 epoch 10 - iter 18/32 - loss 0.00113008 - samples/sec: 16.58 - lr: 0.010000\n",
+      "2021-09-21 19:21:56,762 epoch 10 - iter 21/32 - loss 0.00104019 - samples/sec: 18.84 - lr: 0.010000\n",
+      "2021-09-21 19:21:56,928 epoch 10 - iter 24/32 - loss 0.00180787 - samples/sec: 18.12 - lr: 0.010000\n",
+      "2021-09-21 19:21:57,100 epoch 10 - iter 27/32 - loss 0.00186893 - samples/sec: 17.52 - lr: 0.010000\n",
+      "2021-09-21 19:21:57,292 epoch 10 - iter 30/32 - loss 0.00181256 - samples/sec: 15.71 - lr: 0.010000\n",
+      "2021-09-21 19:21:57,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:21:57,414 EPOCH 10 done: loss 0.0018 - lr 0.0100000\n",
+      "2021-09-21 19:21:57,554 DEV : loss 1.576278805732727 - score 0.5\n",
+      "2021-09-21 19:21:57,555 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:22:05,823 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:05,824 Testing using best model ...\n",
+      "2021-09-21 19:22:05,825 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:57:39,106 \t0.25\n",
-      "2021-09-08 10:57:39,107 \n",
+      "2021-09-21 19:22:13,579 \t0.75\n",
+      "2021-09-21 19:22:13,580 \n",
       "Results:\n",
-      "- F-score (micro) 0.25\n",
-      "- F-score (macro) 0.1667\n",
-      "- Accuracy 0.25\n",
+      "- F-score (micro) 0.75\n",
+      "- F-score (macro) 0.5\n",
+      "- Accuracy 0.75\n",
       "\n",
       "By class:\n",
       "                              precision    recall  f1-score   support\n",
       "\n",
-      " this text expresses sadness     0.0000    0.0000    0.0000         0\n",
+      " this text expresses sadness     1.0000    1.0000    1.0000         1\n",
       "this text expresses optimism     0.0000    0.0000    0.0000         1\n",
-      "   this text expresses anger     0.0000    0.0000    0.0000         1\n",
-      "     this text expresses joy     1.0000    0.5000    0.6667         2\n",
+      "   this text expresses anger     0.0000    0.0000    0.0000         0\n",
+      "     this text expresses joy     1.0000    1.0000    1.0000         2\n",
       "\n",
-      "                   micro avg     0.2500    0.2500    0.2500         4\n",
-      "                   macro avg     0.2500    0.1250    0.1667         4\n",
-      "                weighted avg     0.5000    0.2500    0.3333         4\n",
-      "                 samples avg     0.2500    0.2500    0.2500         4\n",
+      "                   micro avg     0.7500    0.7500    0.7500         4\n",
+      "                   macro avg     0.5000    0.5000    0.5000         4\n",
+      "                weighted avg     0.7500    0.7500    0.7500         4\n",
+      "                 samples avg     0.7500    0.7500    0.7500         4\n",
       "\n",
-      "2021-09-08 10:57:39,107 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.6227805695142379\n"
+      "2021-09-21 19:22:13,580 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.6442211055276382\n"
      ]
     }
    ],
@@ -5220,11 +5220,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "86988d74",
+   "execution_count": 7,
+   "id": "41564259",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.6348408710217756, 0.6231155778894473, 0.6381909547738693, 0.6850921273031826, 0.6398659966499163]\n",
+      "0.021256538799618495\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -5236,7 +5248,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5244,25 +5256,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:48,949 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:22:29,890 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:57:53,394 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:22:35,922 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 23730.15it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 21481.71it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:53,396 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 10:57:53,548 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:53,550 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:22:35,924 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
+      "2021-09-21 19:22:35,942 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:35,944 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5575,24 +5587,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:53,551 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:53,551 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:57:53,552 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:53,552 Parameters:\n",
-      "2021-09-08 10:57:53,552  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:57:53,553  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:57:53,553  - patience: \"3\"\n",
-      "2021-09-08 10:57:53,553  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:57:53,553  - max_epochs: \"10\"\n",
-      "2021-09-08 10:57:53,554  - shuffle: \"True\"\n",
-      "2021-09-08 10:57:53,554  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:57:53,554  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:57:53,555 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:53,555 Model training base path: \"None1\"\n",
-      "2021-09-08 10:57:53,555 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:53,555 Device: cuda:1\n",
-      "2021-09-08 10:57:53,556 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:53,556 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:22:35,945 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:35,945 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:22:35,945 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:35,946 Parameters:\n",
+      "2021-09-21 19:22:35,946  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:22:35,946  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:22:35,946  - patience: \"3\"\n",
+      "2021-09-21 19:22:35,947  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:22:35,947  - max_epochs: \"10\"\n",
+      "2021-09-21 19:22:35,947  - shuffle: \"True\"\n",
+      "2021-09-21 19:22:35,947  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:22:35,948  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:22:35,948 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:35,948 Model training base path: \"None1\"\n",
+      "2021-09-21 19:22:35,949 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:35,949 Device: cuda:0\n",
+      "2021-09-21 19:22:35,949 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:35,949 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -5606,109 +5618,122 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:53,784 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:53,847 epoch 1 - iter 1/4 - loss 0.04848063 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 10:57:53,903 epoch 1 - iter 2/4 - loss 0.68695313 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 10:57:53,959 epoch 1 - iter 3/4 - loss 1.07148465 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 10:57:54,014 epoch 1 - iter 4/4 - loss 1.04297537 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 10:57:54,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:54,015 EPOCH 1 done: loss 1.0430 - lr 0.0200000\n",
-      "2021-09-08 10:57:54,016 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:22:36,153 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:36,233 epoch 1 - iter 1/4 - loss 0.46727428 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 19:22:36,296 epoch 1 - iter 2/4 - loss 0.56771366 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 19:22:36,362 epoch 1 - iter 3/4 - loss 0.57101505 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 19:22:36,430 epoch 1 - iter 4/4 - loss 0.67028547 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 19:22:36,431 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:36,432 EPOCH 1 done: loss 0.6703 - lr 0.0200000\n",
+      "2021-09-21 19:22:36,432 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:06,133 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:06,208 epoch 2 - iter 1/4 - loss 0.26102403 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 10:58:06,264 epoch 2 - iter 2/4 - loss 0.68396743 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 10:58:06,320 epoch 2 - iter 3/4 - loss 0.47841118 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 10:58:06,376 epoch 2 - iter 4/4 - loss 0.51262322 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 10:58:06,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:06,377 EPOCH 2 done: loss 0.5126 - lr 0.0200000\n",
-      "2021-09-08 10:58:06,378 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:07,057 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,131 epoch 3 - iter 1/4 - loss 0.18292814 - samples/sec: 17.06 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,194 epoch 3 - iter 2/4 - loss 0.15829101 - samples/sec: 16.07 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,256 epoch 3 - iter 3/4 - loss 0.34305185 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,321 epoch 3 - iter 4/4 - loss 0.25755466 - samples/sec: 15.40 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,322 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,323 EPOCH 3 done: loss 0.2576 - lr 0.0200000\n",
-      "2021-09-08 10:58:07,323 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:07,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,408 epoch 4 - iter 1/4 - loss 0.16306518 - samples/sec: 15.56 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,467 epoch 4 - iter 2/4 - loss 0.08386537 - samples/sec: 17.14 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,525 epoch 4 - iter 3/4 - loss 0.06240819 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,580 epoch 4 - iter 4/4 - loss 0.09993966 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,581 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,582 EPOCH 4 done: loss 0.0999 - lr 0.0200000\n",
-      "2021-09-08 10:58:07,582 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:07,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,672 epoch 5 - iter 1/4 - loss 0.06900992 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,732 epoch 5 - iter 2/4 - loss 0.03537716 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,795 epoch 5 - iter 3/4 - loss 0.02465147 - samples/sec: 15.84 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,856 epoch 5 - iter 4/4 - loss 0.10858460 - samples/sec: 16.68 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,857 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,857 EPOCH 5 done: loss 0.1086 - lr 0.0200000\n",
+      "2021-09-21 19:22:40,629 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:40,723 epoch 2 - iter 1/4 - loss 0.81807166 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 19:22:40,786 epoch 2 - iter 2/4 - loss 0.44090940 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 19:22:40,838 epoch 2 - iter 3/4 - loss 0.31326505 - samples/sec: 19.32 - lr: 0.020000\n",
+      "2021-09-21 19:22:40,905 epoch 2 - iter 4/4 - loss 0.34297352 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 19:22:40,906 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:40,907 EPOCH 2 done: loss 0.3430 - lr 0.0200000\n",
+      "2021-09-21 19:22:40,907 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:22:40,916 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:41,014 epoch 3 - iter 1/4 - loss 0.18432935 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,067 epoch 3 - iter 2/4 - loss 0.09424507 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,130 epoch 3 - iter 3/4 - loss 0.06798821 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,185 epoch 3 - iter 4/4 - loss 0.21741174 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,186 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:41,187 EPOCH 3 done: loss 0.2174 - lr 0.0200000\n",
+      "2021-09-21 19:22:41,187 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:22:41,189 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:41,297 epoch 4 - iter 1/4 - loss 0.77795529 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,349 epoch 4 - iter 2/4 - loss 0.39363274 - samples/sec: 19.55 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,412 epoch 4 - iter 3/4 - loss 0.26706286 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,475 epoch 4 - iter 4/4 - loss 0.20180519 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,476 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:41,476 EPOCH 4 done: loss 0.2018 - lr 0.0200000\n",
+      "2021-09-21 19:22:41,477 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:22:41,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:41,586 epoch 5 - iter 1/4 - loss 0.32254294 - samples/sec: 16.70 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,643 epoch 5 - iter 2/4 - loss 0.29487368 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,699 epoch 5 - iter 3/4 - loss 0.19768363 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,764 epoch 5 - iter 4/4 - loss 0.23324626 - samples/sec: 15.62 - lr: 0.020000\n",
+      "2021-09-21 19:22:41,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:41,765 EPOCH 5 done: loss 0.2332 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:58:07,858 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:58:07,860 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,945 epoch 6 - iter 1/4 - loss 0.00974273 - samples/sec: 15.98 - lr: 0.010000\n",
-      "2021-09-08 10:58:07,998 epoch 6 - iter 2/4 - loss 0.00604577 - samples/sec: 18.97 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,051 epoch 6 - iter 3/4 - loss 0.01412329 - samples/sec: 18.93 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,107 epoch 6 - iter 4/4 - loss 0.01317995 - samples/sec: 18.15 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,108 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:08,109 EPOCH 6 done: loss 0.0132 - lr 0.0100000\n",
-      "2021-09-08 10:58:08,109 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:08,111 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:08,183 epoch 7 - iter 1/4 - loss 0.01985179 - samples/sec: 17.99 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,239 epoch 7 - iter 2/4 - loss 0.02836045 - samples/sec: 18.27 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,294 epoch 7 - iter 3/4 - loss 0.02164003 - samples/sec: 18.38 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,347 epoch 7 - iter 4/4 - loss 0.01635291 - samples/sec: 19.10 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,348 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:08,349 EPOCH 7 done: loss 0.0164 - lr 0.0100000\n",
-      "2021-09-08 10:58:08,349 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:08,437 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:08,509 epoch 8 - iter 1/4 - loss 0.01965549 - samples/sec: 18.21 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,565 epoch 8 - iter 2/4 - loss 0.01453118 - samples/sec: 17.93 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,626 epoch 8 - iter 3/4 - loss 0.01036723 - samples/sec: 16.79 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,679 epoch 8 - iter 4/4 - loss 0.00962348 - samples/sec: 19.07 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,680 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:08,680 EPOCH 8 done: loss 0.0096 - lr 0.0100000\n",
-      "2021-09-08 10:58:08,680 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:08,825 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:08,892 epoch 9 - iter 1/4 - loss 0.02519611 - samples/sec: 19.39 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,944 epoch 9 - iter 2/4 - loss 0.01736986 - samples/sec: 19.51 - lr: 0.010000\n",
-      "2021-09-08 10:58:08,995 epoch 9 - iter 3/4 - loss 0.01596447 - samples/sec: 19.61 - lr: 0.010000\n",
-      "2021-09-08 10:58:09,047 epoch 9 - iter 4/4 - loss 0.01219802 - samples/sec: 19.65 - lr: 0.010000\n",
-      "2021-09-08 10:58:09,048 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:09,048 EPOCH 9 done: loss 0.0122 - lr 0.0100000\n",
+      "2021-09-21 19:22:41,765 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:22:41,767 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:41,889 epoch 6 - iter 1/4 - loss 0.04664413 - samples/sec: 13.23 - lr: 0.010000\n",
+      "2021-09-21 19:22:41,978 epoch 6 - iter 2/4 - loss 0.06177786 - samples/sec: 11.22 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,056 epoch 6 - iter 3/4 - loss 0.04410150 - samples/sec: 12.88 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,136 epoch 6 - iter 4/4 - loss 0.03325244 - samples/sec: 12.65 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,137 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:42,137 EPOCH 6 done: loss 0.0333 - lr 0.0100000\n",
+      "2021-09-21 19:22:42,138 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:22:42,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:42,249 epoch 7 - iter 1/4 - loss 0.01912837 - samples/sec: 15.81 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,344 epoch 7 - iter 2/4 - loss 0.09753470 - samples/sec: 10.59 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,418 epoch 7 - iter 3/4 - loss 0.06569237 - samples/sec: 13.48 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,490 epoch 7 - iter 4/4 - loss 0.05177903 - samples/sec: 14.09 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,491 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:42,491 EPOCH 7 done: loss 0.0518 - lr 0.0100000\n",
+      "2021-09-21 19:22:42,492 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:22:42,493 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:42,630 epoch 8 - iter 1/4 - loss 0.00855190 - samples/sec: 12.07 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,716 epoch 8 - iter 2/4 - loss 0.00682281 - samples/sec: 11.74 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,796 epoch 8 - iter 3/4 - loss 0.02857593 - samples/sec: 12.59 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,867 epoch 8 - iter 4/4 - loss 0.02273820 - samples/sec: 14.09 - lr: 0.010000\n",
+      "2021-09-21 19:22:42,868 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:42,868 EPOCH 8 done: loss 0.0227 - lr 0.0100000\n",
+      "2021-09-21 19:22:42,869 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:22:42,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:43,011 epoch 9 - iter 1/4 - loss 0.01174254 - samples/sec: 10.47 - lr: 0.010000\n",
+      "2021-09-21 19:22:43,081 epoch 9 - iter 2/4 - loss 0.00701134 - samples/sec: 14.38 - lr: 0.010000\n",
+      "2021-09-21 19:22:43,139 epoch 9 - iter 3/4 - loss 0.00795021 - samples/sec: 17.21 - lr: 0.010000\n",
+      "2021-09-21 19:22:43,204 epoch 9 - iter 4/4 - loss 0.01196478 - samples/sec: 15.49 - lr: 0.010000\n",
+      "2021-09-21 19:22:43,205 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:43,206 EPOCH 9 done: loss 0.0120 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:58:09,048 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:58:09,193 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:09,259 epoch 10 - iter 1/4 - loss 0.01072994 - samples/sec: 19.81 - lr: 0.005000\n",
-      "2021-09-08 10:58:09,311 epoch 10 - iter 2/4 - loss 0.00767206 - samples/sec: 19.58 - lr: 0.005000\n",
-      "2021-09-08 10:58:09,362 epoch 10 - iter 3/4 - loss 0.00749535 - samples/sec: 19.63 - lr: 0.005000\n",
-      "2021-09-08 10:58:09,413 epoch 10 - iter 4/4 - loss 0.01078899 - samples/sec: 19.66 - lr: 0.005000\n",
-      "2021-09-08 10:58:09,414 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:09,415 EPOCH 10 done: loss 0.0108 - lr 0.0050000\n",
-      "2021-09-08 10:58:09,415 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:19,304 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:58:30,191 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:22:43,206 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:22:43,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:43,330 epoch 10 - iter 1/4 - loss 0.00141618 - samples/sec: 12.93 - lr: 0.005000\n",
+      "2021-09-21 19:22:43,423 epoch 10 - iter 2/4 - loss 0.04128535 - samples/sec: 10.74 - lr: 0.005000\n",
+      "2021-09-21 19:22:43,507 epoch 10 - iter 3/4 - loss 0.02998729 - samples/sec: 12.03 - lr: 0.005000\n",
+      "2021-09-21 19:22:43,590 epoch 10 - iter 4/4 - loss 0.02395263 - samples/sec: 12.08 - lr: 0.005000\n",
+      "2021-09-21 19:22:43,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:22:43,591 EPOCH 10 done: loss 0.0240 - lr 0.0050000\n",
+      "2021-09-21 19:22:43,592 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:22:47,852 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:23:00,171 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:58:34,540 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:23:04,525 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 4/4 [00:00<00:00, 20763.88it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:23:04,527 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 24036.13it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:34,542 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 10:58:34,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:34,679 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:23:04,805 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:04,807 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6021,140 +6046,146 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:34,680 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:34,680 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:58:34,680 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:34,681 Parameters:\n",
-      "2021-09-08 10:58:34,681  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:58:34,681  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:58:34,682  - patience: \"3\"\n",
-      "2021-09-08 10:58:34,682  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:58:34,682  - max_epochs: \"10\"\n",
-      "2021-09-08 10:58:34,683  - shuffle: \"True\"\n",
-      "2021-09-08 10:58:34,683  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:58:34,683  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:58:34,683 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:34,684 Model training base path: \"None1\"\n",
-      "2021-09-08 10:58:34,684 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:34,684 Device: cuda:1\n",
-      "2021-09-08 10:58:34,685 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:34,685 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:23:04,808 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:04,808 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:23:04,808 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:04,808 Parameters:\n",
+      "2021-09-21 19:23:04,809  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:23:04,809  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:23:04,809  - patience: \"3\"\n",
+      "2021-09-21 19:23:04,810  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:23:04,810  - max_epochs: \"10\"\n",
+      "2021-09-21 19:23:04,810  - shuffle: \"True\"\n",
+      "2021-09-21 19:23:04,810  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:23:04,811  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:23:04,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:04,811 Model training base path: \"None1\"\n",
+      "2021-09-21 19:23:04,812 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:04,812 Device: cuda:0\n",
+      "2021-09-21 19:23:04,812 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:04,812 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:23:05,003 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:05,094 epoch 1 - iter 1/4 - loss 0.02118094 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 19:23:05,159 epoch 1 - iter 2/4 - loss 0.24987308 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 19:23:05,228 epoch 1 - iter 3/4 - loss 0.52119254 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 19:23:05,296 epoch 1 - iter 4/4 - loss 0.64238604 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 19:23:05,297 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:05,297 EPOCH 1 done: loss 0.6424 - lr 0.0200000\n",
+      "2021-09-21 19:23:05,297 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:23:10,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:10,704 epoch 2 - iter 1/4 - loss 0.06106594 - samples/sec: 10.15 - lr: 0.020000\n",
+      "2021-09-21 19:23:10,792 epoch 2 - iter 2/4 - loss 0.28626026 - samples/sec: 11.45 - lr: 0.020000\n",
+      "2021-09-21 19:23:10,882 epoch 2 - iter 3/4 - loss 0.57022394 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 19:23:10,974 epoch 2 - iter 4/4 - loss 0.63883357 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 19:23:10,975 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:10,975 EPOCH 2 done: loss 0.6388 - lr 0.0200000\n",
+      "2021-09-21 19:23:10,976 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:23:10,992 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:11,144 epoch 3 - iter 1/4 - loss 0.28841367 - samples/sec: 9.68 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,228 epoch 3 - iter 2/4 - loss 0.46682440 - samples/sec: 11.97 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,322 epoch 3 - iter 3/4 - loss 0.33439670 - samples/sec: 10.74 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,411 epoch 3 - iter 4/4 - loss 0.46301281 - samples/sec: 11.30 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,412 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:11,412 EPOCH 3 done: loss 0.4630 - lr 0.0200000\n",
+      "2021-09-21 19:23:11,412 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:23:11,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:11,516 epoch 4 - iter 1/4 - loss 0.15151949 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,597 epoch 4 - iter 2/4 - loss 0.20731901 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,666 epoch 4 - iter 3/4 - loss 0.14065605 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,764 epoch 4 - iter 4/4 - loss 0.11916128 - samples/sec: 10.30 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:11,765 EPOCH 4 done: loss 0.1192 - lr 0.0200000\n",
+      "2021-09-21 19:23:11,765 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:23:11,767 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:11,890 epoch 5 - iter 1/4 - loss 0.03754872 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 19:23:11,981 epoch 5 - iter 2/4 - loss 0.02231593 - samples/sec: 11.12 - lr: 0.020000\n",
+      "2021-09-21 19:23:12,088 epoch 5 - iter 3/4 - loss 0.15487472 - samples/sec: 9.38 - lr: 0.020000\n",
+      "2021-09-21 19:23:12,177 epoch 5 - iter 4/4 - loss 0.12222676 - samples/sec: 11.24 - lr: 0.020000\n",
+      "2021-09-21 19:23:12,178 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:12,178 EPOCH 5 done: loss 0.1222 - lr 0.0200000\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:23:12,179 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:23:12,181 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:12,322 epoch 6 - iter 1/4 - loss 0.12400919 - samples/sec: 9.97 - lr: 0.010000\n",
+      "2021-09-21 19:23:12,412 epoch 6 - iter 2/4 - loss 0.07228660 - samples/sec: 11.14 - lr: 0.010000\n",
+      "2021-09-21 19:23:12,513 epoch 6 - iter 3/4 - loss 0.05130091 - samples/sec: 9.93 - lr: 0.010000\n",
+      "2021-09-21 19:23:12,598 epoch 6 - iter 4/4 - loss 0.03989663 - samples/sec: 11.82 - lr: 0.010000\n",
+      "2021-09-21 19:23:12,599 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:12,599 EPOCH 6 done: loss 0.0399 - lr 0.0100000\n",
+      "2021-09-21 19:23:12,600 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:23:12,602 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:12,735 epoch 7 - iter 1/4 - loss 0.04029866 - samples/sec: 10.34 - lr: 0.010000\n",
+      "2021-09-21 19:23:12,816 epoch 7 - iter 2/4 - loss 0.13736513 - samples/sec: 12.41 - lr: 0.010000\n",
+      "2021-09-21 19:23:12,915 epoch 7 - iter 3/4 - loss 0.09380800 - samples/sec: 10.16 - lr: 0.010000\n",
+      "2021-09-21 19:23:12,999 epoch 7 - iter 4/4 - loss 0.07096742 - samples/sec: 12.02 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,000 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:13,000 EPOCH 7 done: loss 0.0710 - lr 0.0100000\n",
+      "2021-09-21 19:23:13,000 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:23:13,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:13,126 epoch 8 - iter 1/4 - loss 0.03104958 - samples/sec: 11.49 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,222 epoch 8 - iter 2/4 - loss 0.01818770 - samples/sec: 10.49 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,317 epoch 8 - iter 3/4 - loss 0.01485936 - samples/sec: 10.55 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,398 epoch 8 - iter 4/4 - loss 0.01727196 - samples/sec: 12.47 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:13,399 EPOCH 8 done: loss 0.0173 - lr 0.0100000\n",
+      "2021-09-21 19:23:13,399 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:23:13,401 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:13,549 epoch 9 - iter 1/4 - loss 0.00141295 - samples/sec: 11.45 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,620 epoch 9 - iter 2/4 - loss 0.00655331 - samples/sec: 14.19 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,703 epoch 9 - iter 3/4 - loss 0.01068319 - samples/sec: 12.04 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,798 epoch 9 - iter 4/4 - loss 0.01195057 - samples/sec: 10.57 - lr: 0.010000\n",
+      "2021-09-21 19:23:13,799 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:13,800 EPOCH 9 done: loss 0.0120 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:23:13,800 BAD EPOCHS (no improvement): 4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:23:13,802 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:13,933 epoch 10 - iter 1/4 - loss 0.00245326 - samples/sec: 12.22 - lr: 0.005000\n",
+      "2021-09-21 19:23:14,026 epoch 10 - iter 2/4 - loss 0.00767103 - samples/sec: 10.75 - lr: 0.005000\n",
+      "2021-09-21 19:23:14,125 epoch 10 - iter 3/4 - loss 0.00839596 - samples/sec: 10.15 - lr: 0.005000\n",
+      "2021-09-21 19:23:14,216 epoch 10 - iter 4/4 - loss 0.00859215 - samples/sec: 11.14 - lr: 0.005000\n",
+      "2021-09-21 19:23:14,217 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:14,217 EPOCH 10 done: loss 0.0086 - lr 0.0050000\n",
+      "2021-09-21 19:23:14,217 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:23:17,835 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:23:33,783 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 19:23:38,362 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 4/4 [00:00<00:00, 19761.15it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:34,854 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:34,918 epoch 1 - iter 1/4 - loss 1.48322010 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 10:58:34,974 epoch 1 - iter 2/4 - loss 1.28565645 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 10:58:35,030 epoch 1 - iter 3/4 - loss 1.06846762 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 10:58:35,085 epoch 1 - iter 4/4 - loss 0.99616764 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 10:58:35,086 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:35,087 EPOCH 1 done: loss 0.9962 - lr 0.0200000\n",
-      "2021-09-08 10:58:35,087 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:58:40,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:40,786 epoch 2 - iter 1/4 - loss 0.06734987 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 10:58:40,843 epoch 2 - iter 2/4 - loss 0.17097360 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 10:58:40,899 epoch 2 - iter 3/4 - loss 0.30803942 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 10:58:40,955 epoch 2 - iter 4/4 - loss 0.28682826 - samples/sec: 18.13 - lr: 0.020000\n",
-      "2021-09-08 10:58:40,956 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:40,956 EPOCH 2 done: loss 0.2868 - lr 0.0200000\n",
-      "2021-09-08 10:58:40,956 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:41,301 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:41,371 epoch 3 - iter 1/4 - loss 0.56961578 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,423 epoch 3 - iter 2/4 - loss 0.28868400 - samples/sec: 19.69 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,478 epoch 3 - iter 3/4 - loss 0.42931991 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,534 epoch 3 - iter 4/4 - loss 0.42581771 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,535 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:41,535 EPOCH 3 done: loss 0.4258 - lr 0.0200000\n",
-      "2021-09-08 10:58:41,535 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:41,537 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:41,607 epoch 4 - iter 1/4 - loss 0.22057338 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,663 epoch 4 - iter 2/4 - loss 0.28058811 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,714 epoch 4 - iter 3/4 - loss 0.18994774 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,772 epoch 4 - iter 4/4 - loss 0.20600296 - samples/sec: 17.42 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,773 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:41,773 EPOCH 4 done: loss 0.2060 - lr 0.0200000\n",
-      "2021-09-08 10:58:41,774 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:41,776 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:41,846 epoch 5 - iter 1/4 - loss 0.40451100 - samples/sec: 18.00 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,897 epoch 5 - iter 2/4 - loss 0.20966830 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 10:58:41,953 epoch 5 - iter 3/4 - loss 0.19485519 - samples/sec: 18.16 - lr: 0.020000\n",
-      "2021-09-08 10:58:42,008 epoch 5 - iter 4/4 - loss 0.18124925 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 10:58:42,009 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,010 EPOCH 5 done: loss 0.1812 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:58:42,010 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:58:42,012 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,082 epoch 6 - iter 1/4 - loss 0.07965782 - samples/sec: 18.22 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,137 epoch 6 - iter 2/4 - loss 0.06873039 - samples/sec: 18.11 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,194 epoch 6 - iter 3/4 - loss 0.09677252 - samples/sec: 17.93 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,250 epoch 6 - iter 4/4 - loss 0.10709289 - samples/sec: 18.01 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,251 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,251 EPOCH 6 done: loss 0.1071 - lr 0.0100000\n",
-      "2021-09-08 10:58:42,251 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:42,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,320 epoch 7 - iter 1/4 - loss 0.01507854 - samples/sec: 19.48 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,372 epoch 7 - iter 2/4 - loss 0.01890137 - samples/sec: 19.56 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,427 epoch 7 - iter 3/4 - loss 0.07059869 - samples/sec: 18.10 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,479 epoch 7 - iter 4/4 - loss 0.05431614 - samples/sec: 19.45 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,480 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,480 EPOCH 7 done: loss 0.0543 - lr 0.0100000\n",
-      "2021-09-08 10:58:42,481 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:42,488 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,554 epoch 8 - iter 1/4 - loss 0.02709111 - samples/sec: 19.78 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,606 epoch 8 - iter 2/4 - loss 0.02325772 - samples/sec: 19.55 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,657 epoch 8 - iter 3/4 - loss 0.01914174 - samples/sec: 19.72 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,708 epoch 8 - iter 4/4 - loss 0.01451279 - samples/sec: 19.53 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,709 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,710 EPOCH 8 done: loss 0.0145 - lr 0.0100000\n",
-      "2021-09-08 10:58:42,710 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:42,713 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,778 epoch 9 - iter 1/4 - loss 0.01008287 - samples/sec: 19.79 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,832 epoch 9 - iter 2/4 - loss 0.01046969 - samples/sec: 18.58 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,884 epoch 9 - iter 3/4 - loss 0.00734425 - samples/sec: 19.58 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,935 epoch 9 - iter 4/4 - loss 0.01087431 - samples/sec: 19.58 - lr: 0.010000\n",
-      "2021-09-08 10:58:42,936 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,937 EPOCH 9 done: loss 0.0109 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:58:42,937 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:58:42,945 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:43,015 epoch 10 - iter 1/4 - loss 0.05560537 - samples/sec: 18.08 - lr: 0.005000\n",
-      "2021-09-08 10:58:43,067 epoch 10 - iter 2/4 - loss 0.02961029 - samples/sec: 19.53 - lr: 0.005000\n",
-      "2021-09-08 10:58:43,118 epoch 10 - iter 3/4 - loss 0.02767056 - samples/sec: 19.85 - lr: 0.005000\n",
-      "2021-09-08 10:58:43,169 epoch 10 - iter 4/4 - loss 0.02343433 - samples/sec: 19.59 - lr: 0.005000\n",
-      "2021-09-08 10:58:43,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:43,170 EPOCH 10 done: loss 0.0234 - lr 0.0050000\n",
-      "2021-09-08 10:58:43,171 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:50,118 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:59:04,398 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
-      "init TARS\n",
-      "2021-09-08 10:59:08,773 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:23:38,363 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 20610.83it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:08,775 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 10:59:08,787 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:08,789 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:23:38,896 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:38,897 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6467,140 +6498,133 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:08,790 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:08,790 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:59:08,790 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:08,791 Parameters:\n",
-      "2021-09-08 10:59:08,791  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:59:08,792  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:59:08,792  - patience: \"3\"\n",
-      "2021-09-08 10:59:08,792  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:59:08,793  - max_epochs: \"10\"\n",
-      "2021-09-08 10:59:08,793  - shuffle: \"True\"\n",
-      "2021-09-08 10:59:08,793  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:59:08,794  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:59:08,794 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:08,794 Model training base path: \"None1\"\n",
-      "2021-09-08 10:59:08,795 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:08,795 Device: cuda:1\n",
-      "2021-09-08 10:59:08,795 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:08,796 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:59:08,802 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:08,872 epoch 1 - iter 1/4 - loss 0.34360138 - samples/sec: 19.33 - lr: 0.020000\n",
-      "2021-09-08 10:59:08,935 epoch 1 - iter 2/4 - loss 0.56904338 - samples/sec: 16.03 - lr: 0.020000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "2021-09-21 19:23:38,898 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:38,899 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:23:38,899 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:38,899 Parameters:\n",
+      "2021-09-21 19:23:38,899  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:23:38,900  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:23:38,900  - patience: \"3\"\n",
+      "2021-09-21 19:23:38,900  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:23:38,901  - max_epochs: \"10\"\n",
+      "2021-09-21 19:23:38,901  - shuffle: \"True\"\n",
+      "2021-09-21 19:23:38,901  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:23:38,901  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:23:38,902 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:38,902 Model training base path: \"None1\"\n",
+      "2021-09-21 19:23:38,902 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:38,903 Device: cuda:0\n",
+      "2021-09-21 19:23:38,903 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:38,903 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:23:39,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:39,160 epoch 1 - iter 1/4 - loss 0.07098488 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 19:23:39,228 epoch 1 - iter 2/4 - loss 1.79138269 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 19:23:39,298 epoch 1 - iter 3/4 - loss 1.73832167 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 19:23:39,366 epoch 1 - iter 4/4 - loss 1.45002096 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 19:23:39,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:39,367 EPOCH 1 done: loss 1.4500 - lr 0.0200000\n",
+      "2021-09-21 19:23:39,368 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:23:44,198 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:44,332 epoch 2 - iter 1/4 - loss 0.03386107 - samples/sec: 11.38 - lr: 0.020000\n",
+      "2021-09-21 19:23:44,425 epoch 2 - iter 2/4 - loss 0.06107806 - samples/sec: 10.85 - lr: 0.020000\n",
+      "2021-09-21 19:23:44,525 epoch 2 - iter 3/4 - loss 0.08474029 - samples/sec: 10.10 - lr: 0.020000\n",
+      "2021-09-21 19:23:44,625 epoch 2 - iter 4/4 - loss 0.76393942 - samples/sec: 10.03 - lr: 0.020000\n",
+      "2021-09-21 19:23:44,626 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:44,626 EPOCH 2 done: loss 0.7639 - lr 0.0200000\n",
+      "2021-09-21 19:23:44,626 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:23:44,629 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:44,863 epoch 3 - iter 1/4 - loss 0.03754487 - samples/sec: 4.99 - lr: 0.020000\n",
+      "2021-09-21 19:23:44,956 epoch 3 - iter 2/4 - loss 0.50542946 - samples/sec: 10.78 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,050 epoch 3 - iter 3/4 - loss 0.55052158 - samples/sec: 10.71 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,145 epoch 3 - iter 4/4 - loss 0.48902051 - samples/sec: 10.65 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:45,146 EPOCH 3 done: loss 0.4890 - lr 0.0200000\n",
+      "2021-09-21 19:23:45,146 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:23:45,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:45,297 epoch 4 - iter 1/4 - loss 0.57650876 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,395 epoch 4 - iter 2/4 - loss 0.39590418 - samples/sec: 10.24 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,480 epoch 4 - iter 3/4 - loss 0.27194956 - samples/sec: 11.71 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,572 epoch 4 - iter 4/4 - loss 0.20456198 - samples/sec: 11.01 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,573 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:45,573 EPOCH 4 done: loss 0.2046 - lr 0.0200000\n",
+      "2021-09-21 19:23:45,573 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:23:45,575 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:45,724 epoch 5 - iter 1/4 - loss 1.28256333 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,813 epoch 5 - iter 2/4 - loss 0.64716185 - samples/sec: 11.27 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,900 epoch 5 - iter 3/4 - loss 0.55375346 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,987 epoch 5 - iter 4/4 - loss 0.41924964 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 19:23:45,988 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:45,988 EPOCH 5 done: loss 0.4192 - lr 0.0200000\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:23:45,988 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:23:45,990 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:46,123 epoch 6 - iter 1/4 - loss 0.00414846 - samples/sec: 14.01 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,220 epoch 6 - iter 2/4 - loss 0.02021658 - samples/sec: 10.43 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,320 epoch 6 - iter 3/4 - loss 0.26727573 - samples/sec: 10.00 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,393 epoch 6 - iter 4/4 - loss 0.20081980 - samples/sec: 13.76 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,394 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:46,395 EPOCH 6 done: loss 0.2008 - lr 0.0100000\n",
+      "2021-09-21 19:23:46,395 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:23:46,397 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:46,520 epoch 7 - iter 1/4 - loss 0.16271774 - samples/sec: 12.62 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,581 epoch 7 - iter 2/4 - loss 0.08180026 - samples/sec: 16.43 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,662 epoch 7 - iter 3/4 - loss 0.05547283 - samples/sec: 12.45 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,755 epoch 7 - iter 4/4 - loss 0.05226666 - samples/sec: 10.76 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,756 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:46,756 EPOCH 7 done: loss 0.0523 - lr 0.0100000\n",
+      "2021-09-21 19:23:46,757 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:23:46,759 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:46,895 epoch 8 - iter 1/4 - loss 0.00225818 - samples/sec: 11.97 - lr: 0.010000\n",
+      "2021-09-21 19:23:46,990 epoch 8 - iter 2/4 - loss 0.11594262 - samples/sec: 10.56 - lr: 0.010000\n",
+      "2021-09-21 19:23:47,094 epoch 8 - iter 3/4 - loss 0.07770930 - samples/sec: 9.64 - lr: 0.010000\n",
+      "2021-09-21 19:23:47,183 epoch 8 - iter 4/4 - loss 0.06062900 - samples/sec: 11.34 - lr: 0.010000\n",
+      "2021-09-21 19:23:47,184 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:47,184 EPOCH 8 done: loss 0.0606 - lr 0.0100000\n",
+      "2021-09-21 19:23:47,184 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:23:47,186 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:47,338 epoch 9 - iter 1/4 - loss 0.36990586 - samples/sec: 11.56 - lr: 0.010000\n",
+      "2021-09-21 19:23:47,424 epoch 9 - iter 2/4 - loss 0.19302938 - samples/sec: 11.71 - lr: 0.010000\n",
+      "2021-09-21 19:23:47,494 epoch 9 - iter 3/4 - loss 0.12984986 - samples/sec: 14.33 - lr: 0.010000\n",
+      "2021-09-21 19:23:47,561 epoch 9 - iter 4/4 - loss 0.09785532 - samples/sec: 15.10 - lr: 0.010000\n",
+      "2021-09-21 19:23:47,562 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:47,562 EPOCH 9 done: loss 0.0979 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:23:47,563 BAD EPOCHS (no improvement): 4\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:08,991 epoch 1 - iter 3/4 - loss 0.60598072 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 10:59:09,046 epoch 1 - iter 4/4 - loss 0.48189801 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 10:59:09,047 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:09,048 EPOCH 1 done: loss 0.4819 - lr 0.0200000\n",
-      "2021-09-08 10:59:09,048 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:59:16,711 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:16,791 epoch 2 - iter 1/4 - loss 0.25914529 - samples/sec: 16.35 - lr: 0.020000\n",
-      "2021-09-08 10:59:16,849 epoch 2 - iter 2/4 - loss 0.35580403 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 10:59:16,910 epoch 2 - iter 3/4 - loss 0.28878654 - samples/sec: 16.66 - lr: 0.020000\n",
-      "2021-09-08 10:59:16,966 epoch 2 - iter 4/4 - loss 0.21983299 - samples/sec: 17.93 - lr: 0.020000\n",
-      "2021-09-08 10:59:16,967 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:16,968 EPOCH 2 done: loss 0.2198 - lr 0.0200000\n",
-      "2021-09-08 10:59:16,968 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:18,602 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:18,676 epoch 3 - iter 1/4 - loss 0.00465409 - samples/sec: 18.64 - lr: 0.020000\n",
-      "2021-09-08 10:59:18,728 epoch 3 - iter 2/4 - loss 0.00486315 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:59:18,779 epoch 3 - iter 3/4 - loss 0.02136984 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 10:59:18,843 epoch 3 - iter 4/4 - loss 0.05053879 - samples/sec: 16.00 - lr: 0.020000\n",
-      "2021-09-08 10:59:18,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:18,844 EPOCH 3 done: loss 0.0505 - lr 0.0200000\n",
-      "2021-09-08 10:59:18,844 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:19,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:19,151 epoch 4 - iter 1/4 - loss 0.01040259 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,206 epoch 4 - iter 2/4 - loss 0.01011814 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,265 epoch 4 - iter 3/4 - loss 0.01304483 - samples/sec: 17.16 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,321 epoch 4 - iter 4/4 - loss 0.01469827 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,323 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:19,323 EPOCH 4 done: loss 0.0147 - lr 0.0200000\n",
-      "2021-09-08 10:59:19,324 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:59:19,326 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:19,394 epoch 5 - iter 1/4 - loss 0.00555428 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,451 epoch 5 - iter 2/4 - loss 0.00927029 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,512 epoch 5 - iter 3/4 - loss 0.46898701 - samples/sec: 16.68 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,571 epoch 5 - iter 4/4 - loss 0.54274160 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:19,574 EPOCH 5 done: loss 0.5427 - lr 0.0200000\n",
-      "2021-09-08 10:59:19,574 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:59:28,648 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,720 epoch 6 - iter 1/4 - loss 0.11734017 - samples/sec: 18.54 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,771 epoch 6 - iter 2/4 - loss 0.05944664 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,830 epoch 6 - iter 3/4 - loss 0.24583595 - samples/sec: 17.18 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,884 epoch 6 - iter 4/4 - loss 0.18510083 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,886 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,886 EPOCH 6 done: loss 0.1851 - lr 0.0200000\n",
-      "2021-09-08 10:59:28,886 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:28,888 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,953 epoch 7 - iter 1/4 - loss 0.00779923 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,005 epoch 7 - iter 2/4 - loss 0.00871278 - samples/sec: 19.43 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,060 epoch 7 - iter 3/4 - loss 0.02881438 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,115 epoch 7 - iter 4/4 - loss 0.03223903 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,117 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,117 EPOCH 7 done: loss 0.0322 - lr 0.0200000\n",
-      "2021-09-08 10:59:29,117 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:29,119 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,200 epoch 8 - iter 1/4 - loss 0.00230853 - samples/sec: 17.67 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,256 epoch 8 - iter 2/4 - loss 0.01151264 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,313 epoch 8 - iter 3/4 - loss 0.00789308 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,370 epoch 8 - iter 4/4 - loss 0.01193071 - samples/sec: 17.71 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,372 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,372 EPOCH 8 done: loss 0.0119 - lr 0.0200000\n",
-      "2021-09-08 10:59:29,372 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:59:29,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,458 epoch 9 - iter 1/4 - loss 0.00080711 - samples/sec: 15.36 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,517 epoch 9 - iter 2/4 - loss 0.00443962 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,576 epoch 9 - iter 3/4 - loss 0.00961803 - samples/sec: 17.28 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,634 epoch 9 - iter 4/4 - loss 0.00782402 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,635 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,636 EPOCH 9 done: loss 0.0078 - lr 0.0200000\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:59:29,636 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:59:29,712 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,778 epoch 10 - iter 1/4 - loss 0.00128050 - samples/sec: 19.89 - lr: 0.010000\n",
-      "2021-09-08 10:59:29,829 epoch 10 - iter 2/4 - loss 0.00347954 - samples/sec: 19.58 - lr: 0.010000\n",
-      "2021-09-08 10:59:29,881 epoch 10 - iter 3/4 - loss 0.00336962 - samples/sec: 19.47 - lr: 0.010000\n",
-      "2021-09-08 10:59:29,940 epoch 10 - iter 4/4 - loss 0.00385022 - samples/sec: 17.36 - lr: 0.010000\n",
-      "2021-09-08 10:59:29,941 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,941 EPOCH 10 done: loss 0.0039 - lr 0.0100000\n",
-      "2021-09-08 10:59:29,942 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:37,610 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:59:44,682 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:23:47,565 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:47,672 epoch 10 - iter 1/4 - loss 0.00316597 - samples/sec: 16.50 - lr: 0.005000\n",
+      "2021-09-21 19:23:47,747 epoch 10 - iter 2/4 - loss 0.00608198 - samples/sec: 13.39 - lr: 0.005000\n",
+      "2021-09-21 19:23:47,822 epoch 10 - iter 3/4 - loss 0.00555073 - samples/sec: 13.40 - lr: 0.005000\n",
+      "2021-09-21 19:23:47,913 epoch 10 - iter 4/4 - loss 0.03429298 - samples/sec: 10.99 - lr: 0.005000\n",
+      "2021-09-21 19:23:47,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:23:47,915 EPOCH 10 done: loss 0.0343 - lr 0.0050000\n",
+      "2021-09-21 19:23:47,915 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:23:52,422 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:24:07,101 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:59:49,529 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:24:11,856 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 18829.65it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 17962.76it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:49,531 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
-      "2021-09-08 10:59:49,699 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,702 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:24:11,857 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
+      "2021-09-21 19:24:11,867 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:11,869 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6913,25 +6937,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:49,702 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,702 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:59:49,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,703 Parameters:\n",
-      "2021-09-08 10:59:49,703  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:59:49,704  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:59:49,704  - patience: \"3\"\n",
-      "2021-09-08 10:59:49,704  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:59:49,704  - max_epochs: \"10\"\n",
-      "2021-09-08 10:59:49,705  - shuffle: \"True\"\n",
-      "2021-09-08 10:59:49,705  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:59:49,705  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:59:49,705 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,706 Model training base path: \"None1\"\n",
-      "2021-09-08 10:59:49,706 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,706 Device: cuda:1\n",
-      "2021-09-08 10:59:49,707 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,707 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:59:49,721 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 19:24:11,869 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:11,870 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:24:11,870 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:11,870 Parameters:\n",
+      "2021-09-21 19:24:11,870  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:24:11,871  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:24:11,871  - patience: \"3\"\n",
+      "2021-09-21 19:24:11,871  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:24:11,872  - max_epochs: \"10\"\n",
+      "2021-09-21 19:24:11,872  - shuffle: \"True\"\n",
+      "2021-09-21 19:24:11,872  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:24:11,872  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:24:11,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:11,873 Model training base path: \"None1\"\n",
+      "2021-09-21 19:24:11,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:11,874 Device: cuda:0\n",
+      "2021-09-21 19:24:11,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:11,874 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:24:11,881 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:11,974 epoch 1 - iter 1/4 - loss 0.28439629 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,051 epoch 1 - iter 2/4 - loss 0.98606455 - samples/sec: 13.16 - lr: 0.020000\n"
      ]
     },
     {
@@ -6945,121 +6971,106 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:49,775 epoch 1 - iter 1/4 - loss 0.21038598 - samples/sec: 25.38 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,822 epoch 1 - iter 2/4 - loss 0.34377214 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,868 epoch 1 - iter 3/4 - loss 0.99485952 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,915 epoch 1 - iter 4/4 - loss 0.97135299 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,915 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,916 EPOCH 1 done: loss 0.9714 - lr 0.0200000\n",
-      "2021-09-08 10:59:49,916 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:24:12,114 epoch 1 - iter 3/4 - loss 0.77969817 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,201 epoch 1 - iter 4/4 - loss 0.72869979 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 19:24:12,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:12,202 EPOCH 1 done: loss 0.7287 - lr 0.0200000\n",
+      "2021-09-21 19:24:12,202 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:00:02,339 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:02,407 epoch 2 - iter 1/4 - loss 0.22564267 - samples/sec: 19.30 - lr: 0.020000\n",
-      "2021-09-08 11:00:02,458 epoch 2 - iter 2/4 - loss 0.14990170 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 11:00:02,509 epoch 2 - iter 3/4 - loss 0.18922140 - samples/sec: 19.93 - lr: 0.020000\n",
-      "2021-09-08 11:00:02,560 epoch 2 - iter 4/4 - loss 0.28637744 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 11:00:02,561 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:02,561 EPOCH 2 done: loss 0.2864 - lr 0.0200000\n",
-      "2021-09-08 11:00:02,561 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:03,344 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,408 epoch 3 - iter 1/4 - loss 0.74550515 - samples/sec: 19.86 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,456 epoch 3 - iter 2/4 - loss 0.39245236 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,506 epoch 3 - iter 3/4 - loss 0.33163729 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,556 epoch 3 - iter 4/4 - loss 0.29956533 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,557 EPOCH 3 done: loss 0.2996 - lr 0.0200000\n",
-      "2021-09-08 11:00:03,558 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:03,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,736 epoch 4 - iter 1/4 - loss 0.14071326 - samples/sec: 20.06 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,784 epoch 4 - iter 2/4 - loss 0.07985457 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,834 epoch 4 - iter 3/4 - loss 0.09087125 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,884 epoch 4 - iter 4/4 - loss 0.08663364 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,885 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,886 EPOCH 4 done: loss 0.0866 - lr 0.0200000\n",
-      "2021-09-08 11:00:03,886 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:00:03,927 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,991 epoch 5 - iter 1/4 - loss 0.05136274 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 11:00:04,040 epoch 5 - iter 2/4 - loss 0.02947772 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 11:00:04,091 epoch 5 - iter 3/4 - loss 0.06967156 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 11:00:04,142 epoch 5 - iter 4/4 - loss 0.05982548 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 11:00:04,143 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,144 EPOCH 5 done: loss 0.0598 - lr 0.0200000\n",
+      "2021-09-21 19:24:17,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:17,980 epoch 2 - iter 1/4 - loss 0.01902075 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 19:24:18,062 epoch 2 - iter 2/4 - loss 0.16260626 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 19:24:18,133 epoch 2 - iter 3/4 - loss 0.11445887 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 19:24:18,218 epoch 2 - iter 4/4 - loss 0.38541915 - samples/sec: 11.72 - lr: 0.020000\n",
+      "2021-09-21 19:24:18,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:18,220 EPOCH 2 done: loss 0.3854 - lr 0.0200000\n",
+      "2021-09-21 19:24:18,220 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:24:18,801 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:18,913 epoch 3 - iter 1/4 - loss 0.45940614 - samples/sec: 11.04 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,008 epoch 3 - iter 2/4 - loss 0.61105177 - samples/sec: 10.48 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,130 epoch 3 - iter 3/4 - loss 0.53317673 - samples/sec: 8.28 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,207 epoch 3 - iter 4/4 - loss 0.40136636 - samples/sec: 13.10 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:19,208 EPOCH 3 done: loss 0.4014 - lr 0.0200000\n",
+      "2021-09-21 19:24:19,208 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:24:19,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:19,367 epoch 4 - iter 1/4 - loss 0.12806712 - samples/sec: 9.69 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,466 epoch 4 - iter 2/4 - loss 0.18248295 - samples/sec: 10.23 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,547 epoch 4 - iter 3/4 - loss 0.12311226 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,617 epoch 4 - iter 4/4 - loss 0.09389326 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,618 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:19,619 EPOCH 4 done: loss 0.0939 - lr 0.0200000\n",
+      "2021-09-21 19:24:19,619 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:24:19,622 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:19,713 epoch 5 - iter 1/4 - loss 0.00405146 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,793 epoch 5 - iter 2/4 - loss 0.27792895 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,867 epoch 5 - iter 3/4 - loss 0.28222948 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,932 epoch 5 - iter 4/4 - loss 0.21787368 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 19:24:19,934 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:19,934 EPOCH 5 done: loss 0.2179 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:00:04,144 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:00:04,174 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,239 epoch 6 - iter 1/4 - loss 0.02997667 - samples/sec: 19.63 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,294 epoch 6 - iter 2/4 - loss 0.04879851 - samples/sec: 18.52 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,349 epoch 6 - iter 3/4 - loss 0.08819591 - samples/sec: 18.41 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,398 epoch 6 - iter 4/4 - loss 0.06980315 - samples/sec: 20.70 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,399 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,400 EPOCH 6 done: loss 0.0698 - lr 0.0100000\n",
-      "2021-09-08 11:00:04,400 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:04,402 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,461 epoch 7 - iter 1/4 - loss 0.00355621 - samples/sec: 21.56 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,508 epoch 7 - iter 2/4 - loss 0.01557491 - samples/sec: 21.53 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,555 epoch 7 - iter 3/4 - loss 0.01372616 - samples/sec: 21.70 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,604 epoch 7 - iter 4/4 - loss 0.01125037 - samples/sec: 20.31 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,606 EPOCH 7 done: loss 0.0113 - lr 0.0100000\n",
-      "2021-09-08 11:00:04,606 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:04,608 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,667 epoch 8 - iter 1/4 - loss 0.00451916 - samples/sec: 21.50 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,714 epoch 8 - iter 2/4 - loss 0.00649600 - samples/sec: 21.57 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,761 epoch 8 - iter 3/4 - loss 0.00723798 - samples/sec: 21.55 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,811 epoch 8 - iter 4/4 - loss 0.02359581 - samples/sec: 19.94 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,812 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,813 EPOCH 8 done: loss 0.0236 - lr 0.0100000\n",
-      "2021-09-08 11:00:04,813 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:00:04,890 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,949 epoch 9 - iter 1/4 - loss 0.00382141 - samples/sec: 21.53 - lr: 0.010000\n",
-      "2021-09-08 11:00:04,996 epoch 9 - iter 2/4 - loss 0.00425260 - samples/sec: 21.51 - lr: 0.010000\n",
-      "2021-09-08 11:00:05,043 epoch 9 - iter 3/4 - loss 0.00617115 - samples/sec: 21.54 - lr: 0.010000\n",
-      "2021-09-08 11:00:05,089 epoch 9 - iter 4/4 - loss 0.01223725 - samples/sec: 21.76 - lr: 0.010000\n",
-      "2021-09-08 11:00:05,090 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:05,090 EPOCH 9 done: loss 0.0122 - lr 0.0100000\n",
+      "2021-09-21 19:24:19,934 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:24:19,944 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:20,049 epoch 6 - iter 1/4 - loss 0.10439109 - samples/sec: 13.13 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,129 epoch 6 - iter 2/4 - loss 0.09627385 - samples/sec: 12.68 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,190 epoch 6 - iter 3/4 - loss 0.06526970 - samples/sec: 16.60 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,250 epoch 6 - iter 4/4 - loss 0.04914697 - samples/sec: 16.82 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,251 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:20,252 EPOCH 6 done: loss 0.0491 - lr 0.0100000\n",
+      "2021-09-21 19:24:20,252 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:24:20,255 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:20,340 epoch 7 - iter 1/4 - loss 0.01716205 - samples/sec: 18.83 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,419 epoch 7 - iter 2/4 - loss 0.03735914 - samples/sec: 12.85 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,468 epoch 7 - iter 3/4 - loss 0.02536677 - samples/sec: 20.49 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,534 epoch 7 - iter 4/4 - loss 0.13853457 - samples/sec: 15.26 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:20,535 EPOCH 7 done: loss 0.1385 - lr 0.0100000\n",
+      "2021-09-21 19:24:20,535 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:24:20,615 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:20,694 epoch 8 - iter 1/4 - loss 0.03447538 - samples/sec: 16.62 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,768 epoch 8 - iter 2/4 - loss 0.06765173 - samples/sec: 13.69 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,819 epoch 8 - iter 3/4 - loss 0.04581342 - samples/sec: 19.85 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,889 epoch 8 - iter 4/4 - loss 0.03499495 - samples/sec: 14.20 - lr: 0.010000\n",
+      "2021-09-21 19:24:20,890 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:20,891 EPOCH 8 done: loss 0.0350 - lr 0.0100000\n",
+      "2021-09-21 19:24:20,891 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:24:20,970 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:21,101 epoch 9 - iter 1/4 - loss 0.01660759 - samples/sec: 9.94 - lr: 0.010000\n",
+      "2021-09-21 19:24:21,178 epoch 9 - iter 2/4 - loss 0.03602031 - samples/sec: 13.08 - lr: 0.010000\n",
+      "2021-09-21 19:24:21,254 epoch 9 - iter 3/4 - loss 0.02934844 - samples/sec: 13.36 - lr: 0.010000\n",
+      "2021-09-21 19:24:21,324 epoch 9 - iter 4/4 - loss 0.02309605 - samples/sec: 14.22 - lr: 0.010000\n",
+      "2021-09-21 19:24:21,325 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:21,326 EPOCH 9 done: loss 0.0231 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:00:05,091 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:00:05,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:05,252 epoch 10 - iter 1/4 - loss 0.00357738 - samples/sec: 20.91 - lr: 0.005000\n",
-      "2021-09-08 11:00:05,299 epoch 10 - iter 2/4 - loss 0.00244964 - samples/sec: 21.53 - lr: 0.005000\n",
-      "2021-09-08 11:00:05,345 epoch 10 - iter 3/4 - loss 0.00286017 - samples/sec: 21.62 - lr: 0.005000\n",
-      "2021-09-08 11:00:05,396 epoch 10 - iter 4/4 - loss 0.01663900 - samples/sec: 19.95 - lr: 0.005000\n",
-      "2021-09-08 11:00:05,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:05,397 EPOCH 10 done: loss 0.0166 - lr 0.0050000\n",
-      "2021-09-08 11:00:05,397 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:21,453 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:00:39,567 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
+      "2021-09-21 19:24:21,326 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:24:21,416 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:21,521 epoch 10 - iter 1/4 - loss 0.06080346 - samples/sec: 13.17 - lr: 0.005000\n",
+      "2021-09-21 19:24:21,599 epoch 10 - iter 2/4 - loss 0.04835128 - samples/sec: 12.95 - lr: 0.005000\n",
+      "2021-09-21 19:24:21,658 epoch 10 - iter 3/4 - loss 0.03262065 - samples/sec: 16.88 - lr: 0.005000\n",
+      "2021-09-21 19:24:21,727 epoch 10 - iter 4/4 - loss 0.02551316 - samples/sec: 14.64 - lr: 0.005000\n",
+      "2021-09-21 19:24:21,728 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:21,729 EPOCH 10 done: loss 0.0255 - lr 0.0050000\n",
+      "2021-09-21 19:24:21,729 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:24:31,703 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:24:47,397 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/emotion_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:00:43,987 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:24:52,112 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 25614.07it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 23831.27it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:43,989 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:00:45,647 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,649 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:24:52,114 [b'emotions experienced when not in a state of well-being', b'the optimistic feeling that all is going to turn out well', b'the state of being angry', b'the emotion of great happiness']\n",
+      "2021-09-21 19:24:52,122 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,124 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7372,115 +7383,122 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:45,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,650 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:00:45,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,650 Parameters:\n",
-      "2021-09-08 11:00:45,651  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:00:45,651  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:00:45,651  - patience: \"3\"\n",
-      "2021-09-08 11:00:45,652  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:00:45,652  - max_epochs: \"10\"\n",
-      "2021-09-08 11:00:45,652  - shuffle: \"True\"\n",
-      "2021-09-08 11:00:45,652  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:00:45,653  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:00:45,653 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,653 Model training base path: \"None1\"\n",
-      "2021-09-08 11:00:45,653 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,654 Device: cuda:1\n",
-      "2021-09-08 11:00:45,654 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,654 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:00:45,669 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,736 epoch 1 - iter 1/4 - loss 0.69140977 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,793 epoch 1 - iter 2/4 - loss 0.41897870 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,848 epoch 1 - iter 3/4 - loss 0.49360526 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,905 epoch 1 - iter 4/4 - loss 0.39934414 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,906 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,907 EPOCH 1 done: loss 0.3993 - lr 0.0200000\n",
-      "2021-09-08 11:00:45,907 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:00:53,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:53,802 epoch 2 - iter 1/4 - loss 0.18134527 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 11:00:53,856 epoch 2 - iter 2/4 - loss 0.10848950 - samples/sec: 18.79 - lr: 0.020000\n",
-      "2021-09-08 11:00:53,907 epoch 2 - iter 3/4 - loss 0.21438491 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 11:00:53,955 epoch 2 - iter 4/4 - loss 0.17421275 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 11:00:53,956 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:53,956 EPOCH 2 done: loss 0.1742 - lr 0.0200000\n",
-      "2021-09-08 11:00:53,956 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:57,820 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:57,889 epoch 3 - iter 1/4 - loss 0.04245337 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 11:00:57,943 epoch 3 - iter 2/4 - loss 0.03082917 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 11:00:57,988 epoch 3 - iter 3/4 - loss 0.02302218 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,036 epoch 3 - iter 4/4 - loss 0.04074874 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,038 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,038 EPOCH 3 done: loss 0.0407 - lr 0.0200000\n",
-      "2021-09-08 11:00:58,038 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:58,041 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,104 epoch 4 - iter 1/4 - loss 0.14507769 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,152 epoch 4 - iter 2/4 - loss 0.66978770 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,197 epoch 4 - iter 3/4 - loss 0.44682619 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,244 epoch 4 - iter 4/4 - loss 0.34301706 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,245 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,245 EPOCH 4 done: loss 0.3430 - lr 0.0200000\n",
-      "2021-09-08 11:00:58,246 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:00:58,255 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,313 epoch 5 - iter 1/4 - loss 0.00181587 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,364 epoch 5 - iter 2/4 - loss 0.00357719 - samples/sec: 19.80 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,422 epoch 5 - iter 3/4 - loss 0.05841984 - samples/sec: 17.30 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,472 epoch 5 - iter 4/4 - loss 0.04594108 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,474 EPOCH 5 done: loss 0.0459 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:00:58,474 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:00:58,483 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,550 epoch 6 - iter 1/4 - loss 0.00258925 - samples/sec: 19.20 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,605 epoch 6 - iter 2/4 - loss 0.00982125 - samples/sec: 18.13 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,659 epoch 6 - iter 3/4 - loss 0.00674260 - samples/sec: 18.95 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,707 epoch 6 - iter 4/4 - loss 0.00576476 - samples/sec: 21.16 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,708 EPOCH 6 done: loss 0.0058 - lr 0.0100000\n",
-      "2021-09-08 11:00:58,708 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:58,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,792 epoch 7 - iter 1/4 - loss 0.00134558 - samples/sec: 21.47 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,837 epoch 7 - iter 2/4 - loss 0.00323491 - samples/sec: 22.45 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,882 epoch 7 - iter 3/4 - loss 0.00608908 - samples/sec: 22.70 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,930 epoch 7 - iter 4/4 - loss 0.00548576 - samples/sec: 21.31 - lr: 0.010000\n",
-      "2021-09-08 11:00:58,931 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,931 EPOCH 7 done: loss 0.0055 - lr 0.0100000\n",
-      "2021-09-08 11:00:58,932 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:58,935 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,997 epoch 8 - iter 1/4 - loss 0.00917543 - samples/sec: 21.17 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,045 epoch 8 - iter 2/4 - loss 0.00927066 - samples/sec: 21.04 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,093 epoch 8 - iter 3/4 - loss 0.00658482 - samples/sec: 21.20 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,141 epoch 8 - iter 4/4 - loss 0.00537312 - samples/sec: 21.38 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,142 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:59,142 EPOCH 8 done: loss 0.0054 - lr 0.0100000\n",
-      "2021-09-08 11:00:59,143 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:00:59,223 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:59,285 epoch 9 - iter 1/4 - loss 0.00496608 - samples/sec: 20.90 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,330 epoch 9 - iter 2/4 - loss 0.00355561 - samples/sec: 22.56 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,377 epoch 9 - iter 3/4 - loss 0.00312101 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,425 epoch 9 - iter 4/4 - loss 0.00275342 - samples/sec: 21.10 - lr: 0.010000\n",
-      "2021-09-08 11:00:59,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:59,427 EPOCH 9 done: loss 0.0028 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:00:59,427 BAD EPOCHS (no improvement): 4\n"
+      "2021-09-21 19:24:52,125 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,125 Corpus: \"Corpus: 4 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:24:52,125 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,125 Parameters:\n",
+      "2021-09-21 19:24:52,126  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:24:52,126  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:24:52,126  - patience: \"3\"\n",
+      "2021-09-21 19:24:52,126  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:24:52,127  - max_epochs: \"10\"\n",
+      "2021-09-21 19:24:52,127  - shuffle: \"True\"\n",
+      "2021-09-21 19:24:52,127  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:24:52,128  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:24:52,128 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,128 Model training base path: \"None1\"\n",
+      "2021-09-21 19:24:52,128 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,129 Device: cuda:0\n",
+      "2021-09-21 19:24:52,129 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,129 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:24:52,136 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,224 epoch 1 - iter 1/4 - loss 0.12202934 - samples/sec: 19.83 - lr: 0.020000\n",
+      "2021-09-21 19:24:52,288 epoch 1 - iter 2/4 - loss 0.38692974 - samples/sec: 15.64 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:59,505 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:59,563 epoch 10 - iter 1/4 - loss 0.00198755 - samples/sec: 22.64 - lr: 0.005000\n",
-      "2021-09-08 11:00:59,607 epoch 10 - iter 2/4 - loss 0.00179523 - samples/sec: 23.10 - lr: 0.005000\n",
-      "2021-09-08 11:00:59,650 epoch 10 - iter 3/4 - loss 0.00308167 - samples/sec: 23.32 - lr: 0.005000\n",
-      "2021-09-08 11:00:59,694 epoch 10 - iter 4/4 - loss 0.00245187 - samples/sec: 23.04 - lr: 0.005000\n",
-      "2021-09-08 11:00:59,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:59,696 EPOCH 10 done: loss 0.0025 - lr 0.0050000\n",
-      "2021-09-08 11:00:59,696 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:12,588 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.5338028169014085\n"
+      "2021-09-21 19:24:52,355 epoch 1 - iter 3/4 - loss 0.58070244 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 19:24:52,410 epoch 1 - iter 4/4 - loss 0.58436471 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 19:24:52,411 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:52,412 EPOCH 1 done: loss 0.5844 - lr 0.0200000\n",
+      "2021-09-21 19:24:52,412 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:24:56,448 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:56,559 epoch 2 - iter 1/4 - loss 0.01437707 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 19:24:56,626 epoch 2 - iter 2/4 - loss 0.03180324 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 19:24:56,700 epoch 2 - iter 3/4 - loss 0.15899928 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 19:24:56,763 epoch 2 - iter 4/4 - loss 0.18370052 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 19:24:56,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:56,765 EPOCH 2 done: loss 0.1837 - lr 0.0200000\n",
+      "2021-09-21 19:24:56,765 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:24:56,768 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:56,855 epoch 3 - iter 1/4 - loss 0.04391501 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 19:24:56,925 epoch 3 - iter 2/4 - loss 0.03443319 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 19:24:56,989 epoch 3 - iter 3/4 - loss 0.21993518 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,061 epoch 3 - iter 4/4 - loss 0.23272035 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,062 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:57,063 EPOCH 3 done: loss 0.2327 - lr 0.0200000\n",
+      "2021-09-21 19:24:57,063 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:24:57,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:57,172 epoch 4 - iter 1/4 - loss 0.22434634 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,244 epoch 4 - iter 2/4 - loss 0.11382466 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,324 epoch 4 - iter 3/4 - loss 0.22362549 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,390 epoch 4 - iter 4/4 - loss 0.17052301 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:57,392 EPOCH 4 done: loss 0.1705 - lr 0.0200000\n",
+      "2021-09-21 19:24:57,392 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:24:57,394 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:57,499 epoch 5 - iter 1/4 - loss 0.00813064 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,577 epoch 5 - iter 2/4 - loss 0.10944190 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,653 epoch 5 - iter 3/4 - loss 0.07424798 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,726 epoch 5 - iter 4/4 - loss 0.06107288 - samples/sec: 13.73 - lr: 0.020000\n",
+      "2021-09-21 19:24:57,728 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:57,728 EPOCH 5 done: loss 0.0611 - lr 0.0200000\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:24:57,729 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:24:57,730 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:57,849 epoch 6 - iter 1/4 - loss 0.00148196 - samples/sec: 14.09 - lr: 0.010000\n",
+      "2021-09-21 19:24:57,911 epoch 6 - iter 2/4 - loss 0.00491180 - samples/sec: 16.32 - lr: 0.010000\n",
+      "2021-09-21 19:24:57,986 epoch 6 - iter 3/4 - loss 0.01655900 - samples/sec: 13.43 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,050 epoch 6 - iter 4/4 - loss 0.01650179 - samples/sec: 15.83 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,051 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:58,051 EPOCH 6 done: loss 0.0165 - lr 0.0100000\n",
+      "2021-09-21 19:24:58,052 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:24:58,054 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:58,164 epoch 7 - iter 1/4 - loss 0.00705310 - samples/sec: 14.94 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,240 epoch 7 - iter 2/4 - loss 0.01279404 - samples/sec: 13.25 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,311 epoch 7 - iter 3/4 - loss 0.01065240 - samples/sec: 14.20 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,393 epoch 7 - iter 4/4 - loss 0.05523074 - samples/sec: 12.32 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,394 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:58,394 EPOCH 7 done: loss 0.0552 - lr 0.0100000\n",
+      "2021-09-21 19:24:58,395 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:24:58,397 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:58,500 epoch 8 - iter 1/4 - loss 0.01824372 - samples/sec: 16.20 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,588 epoch 8 - iter 2/4 - loss 0.01097582 - samples/sec: 11.39 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,660 epoch 8 - iter 3/4 - loss 0.02624594 - samples/sec: 13.95 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,733 epoch 8 - iter 4/4 - loss 0.02070725 - samples/sec: 13.91 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:58,734 EPOCH 8 done: loss 0.0207 - lr 0.0100000\n",
+      "2021-09-21 19:24:58,735 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:24:58,737 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:58,842 epoch 9 - iter 1/4 - loss 0.00829447 - samples/sec: 14.19 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,920 epoch 9 - iter 2/4 - loss 0.00475347 - samples/sec: 12.89 - lr: 0.010000\n",
+      "2021-09-21 19:24:58,992 epoch 9 - iter 3/4 - loss 0.05181199 - samples/sec: 14.00 - lr: 0.010000\n",
+      "2021-09-21 19:24:59,051 epoch 9 - iter 4/4 - loss 0.04236898 - samples/sec: 17.30 - lr: 0.010000\n",
+      "2021-09-21 19:24:59,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:59,053 EPOCH 9 done: loss 0.0424 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:24:59,053 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:24:59,056 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:59,176 epoch 10 - iter 1/4 - loss 0.00431042 - samples/sec: 14.02 - lr: 0.005000\n",
+      "2021-09-21 19:24:59,238 epoch 10 - iter 2/4 - loss 0.00419291 - samples/sec: 16.46 - lr: 0.005000\n",
+      "2021-09-21 19:24:59,327 epoch 10 - iter 3/4 - loss 0.00739608 - samples/sec: 11.28 - lr: 0.005000\n",
+      "2021-09-21 19:24:59,390 epoch 10 - iter 4/4 - loss 0.01789142 - samples/sec: 15.99 - lr: 0.005000\n",
+      "2021-09-21 19:24:59,391 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:24:59,392 EPOCH 10 done: loss 0.0179 - lr 0.0050000\n",
+      "2021-09-21 19:24:59,392 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:25:03,703 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.5661971830985916\n"
      ]
     }
    ],
@@ -7552,11 +7570,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "de9de0d9",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.5880281690140845, 0.5616197183098591, 0.596830985915493, 0.579225352112676, 0.5052816901408451]\n",
+      "0.03260799127546541\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/oneshot/sentiment_amazon_oneshot.ipynb b/oneshot/sentiment_amazon_oneshot.ipynb
index 6c837df..43d18df 100644
--- a/oneshot/sentiment_amazon_oneshot.ipynb
+++ b/oneshot/sentiment_amazon_oneshot.ipynb
@@ -70,25 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:01,582 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:34:04,638 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:49:08,345 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:34:16,003 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 18396.07it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 15827.56it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:08,347 [b'positive', b'neutral', b'negative']\n",
-      "2021-09-08 10:49:08,491 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,492 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:34:16,005 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 19:34:16,011 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,013 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -401,24 +401,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:08,493 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,493 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:49:08,494 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,494 Parameters:\n",
-      "2021-09-08 10:49:08,495  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:49:08,495  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:49:08,495  - patience: \"3\"\n",
-      "2021-09-08 10:49:08,496  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:49:08,496  - max_epochs: \"10\"\n",
-      "2021-09-08 10:49:08,496  - shuffle: \"True\"\n",
-      "2021-09-08 10:49:08,496  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:49:08,497  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:49:08,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,497 Model training base path: \"temp\"\n",
-      "2021-09-08 10:49:08,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,498 Device: cuda:0\n",
-      "2021-09-08 10:49:08,498 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,498 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:34:16,013 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,014 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:34:16,014 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,014 Parameters:\n",
+      "2021-09-21 19:34:16,014  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:34:16,015  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:34:16,015  - patience: \"3\"\n",
+      "2021-09-21 19:34:16,015  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:34:16,015  - max_epochs: \"10\"\n",
+      "2021-09-21 19:34:16,016  - shuffle: \"True\"\n",
+      "2021-09-21 19:34:16,016  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:34:16,016  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:34:16,017 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,017 Model training base path: \"temp\"\n",
+      "2021-09-21 19:34:16,017 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,017 Device: cuda:0\n",
+      "2021-09-21 19:34:16,018 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,018 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:34:16,025 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,137 epoch 1 - iter 1/3 - loss 0.63382995 - samples/sec: 12.79 - lr: 0.020000\n"
      ]
     },
     {
@@ -432,112 +434,98 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:08,671 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,734 epoch 1 - iter 1/3 - loss 0.63187546 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 10:49:08,779 epoch 1 - iter 2/3 - loss 0.63922191 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 10:49:08,827 epoch 1 - iter 3/3 - loss 0.64008037 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 10:49:08,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:08,828 EPOCH 1 done: loss 0.6401 - lr 0.0200000\n",
-      "2021-09-08 10:49:08,828 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:34:16,210 epoch 1 - iter 2/3 - loss 0.63245144 - samples/sec: 13.89 - lr: 0.020000\n",
+      "2021-09-21 19:34:16,294 epoch 1 - iter 3/3 - loss 0.63175499 - samples/sec: 11.98 - lr: 0.020000\n",
+      "2021-09-21 19:34:16,295 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:16,296 EPOCH 1 done: loss 0.6318 - lr 0.0200000\n",
+      "2021-09-21 19:34:16,296 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:49:20,789 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:20,846 epoch 2 - iter 1/3 - loss 0.64154881 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 10:49:20,889 epoch 2 - iter 2/3 - loss 0.62951797 - samples/sec: 23.41 - lr: 0.020000\n",
-      "2021-09-08 10:49:20,932 epoch 2 - iter 3/3 - loss 0.63157588 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 10:49:20,933 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:20,933 EPOCH 2 done: loss 0.6316 - lr 0.0200000\n",
-      "2021-09-08 10:49:20,933 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:49:20,952 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,007 epoch 3 - iter 1/3 - loss 0.64197445 - samples/sec: 23.46 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,050 epoch 3 - iter 2/3 - loss 0.63919351 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,094 epoch 3 - iter 3/3 - loss 0.63872369 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,095 EPOCH 3 done: loss 0.6387 - lr 0.0200000\n",
-      "2021-09-08 10:49:21,096 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:49:21,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,153 epoch 4 - iter 1/3 - loss 0.65932101 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,197 epoch 4 - iter 2/3 - loss 0.65369666 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,253 epoch 4 - iter 3/3 - loss 0.64656605 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 10:49:21,255 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:21,255 EPOCH 4 done: loss 0.6466 - lr 0.0200000\n",
-      "2021-09-08 10:49:21,256 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:34:22,074 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:22,132 epoch 2 - iter 1/3 - loss 0.62140059 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 19:34:22,175 epoch 2 - iter 2/3 - loss 0.63086647 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 19:34:22,218 epoch 2 - iter 3/3 - loss 0.62807618 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 19:34:22,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:22,220 EPOCH 2 done: loss 0.6281 - lr 0.0200000\n",
+      "2021-09-21 19:34:22,220 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:34:22,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:22,382 epoch 3 - iter 1/3 - loss 0.62607044 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 19:34:22,426 epoch 3 - iter 2/3 - loss 0.63022077 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 19:34:22,469 epoch 3 - iter 3/3 - loss 0.64111640 - samples/sec: 23.48 - lr: 0.020000\n",
+      "2021-09-21 19:34:22,470 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:22,470 EPOCH 3 done: loss 0.6411 - lr 0.0200000\n",
+      "2021-09-21 19:34:22,470 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:49:43,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,112 epoch 5 - iter 1/3 - loss 0.61964065 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,156 epoch 5 - iter 2/3 - loss 0.64361230 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,198 epoch 5 - iter 3/3 - loss 0.64475083 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,199 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,200 EPOCH 5 done: loss 0.6448 - lr 0.0200000\n",
-      "2021-09-08 10:49:43,200 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:49:43,203 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,258 epoch 6 - iter 1/3 - loss 0.63499808 - samples/sec: 23.39 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,302 epoch 6 - iter 2/3 - loss 0.63047585 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,346 epoch 6 - iter 3/3 - loss 0.62997051 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,347 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,347 EPOCH 6 done: loss 0.6300 - lr 0.0200000\n",
-      "2021-09-08 10:49:43,347 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:49:43,349 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,404 epoch 7 - iter 1/3 - loss 0.62903923 - samples/sec: 23.37 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,447 epoch 7 - iter 2/3 - loss 0.63740462 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,490 epoch 7 - iter 3/3 - loss 0.64549071 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,491 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,491 EPOCH 7 done: loss 0.6455 - lr 0.0200000\n",
-      "2021-09-08 10:49:43,491 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:49:43,493 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,548 epoch 8 - iter 1/3 - loss 0.64322704 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,591 epoch 8 - iter 2/3 - loss 0.63245845 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,633 epoch 8 - iter 3/3 - loss 0.64225099 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 10:49:43,634 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,635 EPOCH 8 done: loss 0.6423 - lr 0.0200000\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:49:43,635 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:49:43,637 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,693 epoch 9 - iter 1/3 - loss 0.64775801 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 10:49:43,737 epoch 9 - iter 2/3 - loss 0.64399692 - samples/sec: 23.16 - lr: 0.010000\n",
-      "2021-09-08 10:49:43,780 epoch 9 - iter 3/3 - loss 0.64615548 - samples/sec: 23.29 - lr: 0.010000\n",
-      "2021-09-08 10:49:43,781 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,782 EPOCH 9 done: loss 0.6462 - lr 0.0100000\n",
-      "2021-09-08 10:49:43,782 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:49:43,784 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,842 epoch 10 - iter 1/3 - loss 0.64425725 - samples/sec: 22.13 - lr: 0.010000\n",
-      "2021-09-08 10:49:43,885 epoch 10 - iter 2/3 - loss 0.63420421 - samples/sec: 23.39 - lr: 0.010000\n",
-      "2021-09-08 10:49:43,928 epoch 10 - iter 3/3 - loss 0.63506720 - samples/sec: 23.41 - lr: 0.010000\n",
-      "2021-09-08 10:49:43,929 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:49:43,929 EPOCH 10 done: loss 0.6351 - lr 0.0100000\n",
-      "2021-09-08 10:49:43,930 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:49:57,713 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:50:09,319 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:34:35,898 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:35,968 epoch 4 - iter 1/3 - loss 0.62962538 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 19:34:36,017 epoch 4 - iter 2/3 - loss 0.63626653 - samples/sec: 20.77 - lr: 0.020000\n",
+      "2021-09-21 19:34:36,070 epoch 4 - iter 3/3 - loss 0.64163846 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 19:34:36,071 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:36,071 EPOCH 4 done: loss 0.6416 - lr 0.0200000\n",
+      "2021-09-21 19:34:36,071 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:34:58,563 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:58,645 epoch 5 - iter 1/3 - loss 0.64682776 - samples/sec: 15.05 - lr: 0.020000\n",
+      "2021-09-21 19:34:58,721 epoch 5 - iter 2/3 - loss 0.64173761 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 19:34:58,809 epoch 5 - iter 3/3 - loss 0.63689200 - samples/sec: 11.53 - lr: 0.020000\n",
+      "2021-09-21 19:34:58,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:58,811 EPOCH 5 done: loss 0.6369 - lr 0.0200000\n",
+      "2021-09-21 19:34:58,812 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:34:58,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:58,921 epoch 6 - iter 1/3 - loss 0.63754678 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 19:34:58,988 epoch 6 - iter 2/3 - loss 0.63765204 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,072 epoch 6 - iter 3/3 - loss 0.62379247 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,073 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:59,074 EPOCH 6 done: loss 0.6238 - lr 0.0200000\n",
+      "2021-09-21 19:34:59,074 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:34:59,084 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:59,185 epoch 7 - iter 1/3 - loss 0.64618975 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,268 epoch 7 - iter 2/3 - loss 0.63650388 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,337 epoch 7 - iter 3/3 - loss 0.62863640 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,338 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:59,339 EPOCH 7 done: loss 0.6286 - lr 0.0200000\n",
+      "2021-09-21 19:34:59,339 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:34:59,433 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:59,522 epoch 8 - iter 1/3 - loss 0.62465167 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,589 epoch 8 - iter 2/3 - loss 0.64859927 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,665 epoch 8 - iter 3/3 - loss 0.64602643 - samples/sec: 13.19 - lr: 0.020000\n",
+      "2021-09-21 19:34:59,666 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:34:59,666 EPOCH 8 done: loss 0.6460 - lr 0.0200000\n",
+      "2021-09-21 19:34:59,667 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:35:07,592 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:07,662 epoch 9 - iter 1/3 - loss 0.62522703 - samples/sec: 20.01 - lr: 0.020000\n",
+      "2021-09-21 19:35:07,721 epoch 9 - iter 2/3 - loss 0.63107169 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 19:35:07,773 epoch 9 - iter 3/3 - loss 0.63204056 - samples/sec: 19.42 - lr: 0.020000\n",
+      "2021-09-21 19:35:07,774 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:07,775 EPOCH 9 done: loss 0.6320 - lr 0.0200000\n",
+      "2021-09-21 19:35:07,775 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:35:07,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:07,928 epoch 10 - iter 1/3 - loss 0.62063831 - samples/sec: 18.42 - lr: 0.020000\n",
+      "2021-09-21 19:35:07,982 epoch 10 - iter 2/3 - loss 0.63426077 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 19:35:08,033 epoch 10 - iter 3/3 - loss 0.63686645 - samples/sec: 19.78 - lr: 0.020000\n",
+      "2021-09-21 19:35:08,034 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:08,034 EPOCH 10 done: loss 0.6369 - lr 0.0200000\n",
+      "2021-09-21 19:35:08,035 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:35:25,342 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:35:40,068 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:50:13,455 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 24291.34it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:50:13,457 [b'positive', b'neutral', b'negative']\n"
+      "2021-09-21 19:35:44,464 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 3/3 [00:00<00:00, 16448.25it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:13,709 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:13,711 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:35:44,466 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 19:35:44,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,626 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -850,125 +838,131 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:13,711 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:13,712 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:50:13,712 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:13,712 Parameters:\n",
-      "2021-09-08 10:50:13,713  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:50:13,713  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:50:13,713  - patience: \"3\"\n",
-      "2021-09-08 10:50:13,713  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:50:13,714  - max_epochs: \"10\"\n",
-      "2021-09-08 10:50:13,714  - shuffle: \"True\"\n",
-      "2021-09-08 10:50:13,714  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:50:13,715  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:50:13,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:13,715 Model training base path: \"temp\"\n",
-      "2021-09-08 10:50:13,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:13,716 Device: cuda:0\n",
-      "2021-09-08 10:50:13,716 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:13,716 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:50:14,057 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:14,106 epoch 1 - iter 1/3 - loss 0.62521917 - samples/sec: 27.95 - lr: 0.020000\n",
-      "2021-09-08 10:50:14,150 epoch 1 - iter 2/3 - loss 0.62612969 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 10:50:14,193 epoch 1 - iter 3/3 - loss 0.62843730 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 10:50:14,194 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:14,194 EPOCH 1 done: loss 0.6284 - lr 0.0200000\n",
-      "2021-09-08 10:50:14,194 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:50:34,164 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:34,221 epoch 2 - iter 1/3 - loss 0.63807541 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 10:50:34,266 epoch 2 - iter 2/3 - loss 0.63283387 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 10:50:34,315 epoch 2 - iter 3/3 - loss 0.63167552 - samples/sec: 20.35 - lr: 0.020000\n",
-      "2021-09-08 10:50:34,316 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:34,317 EPOCH 2 done: loss 0.6317 - lr 0.0200000\n",
-      "2021-09-08 10:50:34,317 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:50:38,762 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:38,847 epoch 3 - iter 1/3 - loss 0.66236675 - samples/sec: 16.11 - lr: 0.020000\n",
-      "2021-09-08 10:50:38,902 epoch 3 - iter 2/3 - loss 0.65485570 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 10:50:38,974 epoch 3 - iter 3/3 - loss 0.64678987 - samples/sec: 13.99 - lr: 0.020000\n",
-      "2021-09-08 10:50:38,975 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:38,975 EPOCH 3 done: loss 0.6468 - lr 0.0200000\n",
-      "2021-09-08 10:50:38,976 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:50:43,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:43,263 epoch 4 - iter 1/3 - loss 0.64924455 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:50:43,311 epoch 4 - iter 2/3 - loss 0.64468226 - samples/sec: 21.00 - lr: 0.020000\n",
-      "2021-09-08 10:50:43,357 epoch 4 - iter 3/3 - loss 0.64025009 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 10:50:43,358 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:43,358 EPOCH 4 done: loss 0.6403 - lr 0.0200000\n",
-      "2021-09-08 10:50:43,359 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:43,388 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:43,463 epoch 5 - iter 1/3 - loss 0.64532495 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 10:50:43,522 epoch 5 - iter 2/3 - loss 0.64801997 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 10:50:43,569 epoch 5 - iter 3/3 - loss 0.65352668 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 10:50:43,570 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:43,570 EPOCH 5 done: loss 0.6535 - lr 0.0200000\n",
-      "2021-09-08 10:50:43,571 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:50:52,120 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,193 epoch 6 - iter 1/3 - loss 0.66267449 - samples/sec: 19.73 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,243 epoch 6 - iter 2/3 - loss 0.65523279 - samples/sec: 20.36 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,312 epoch 6 - iter 3/3 - loss 0.65093430 - samples/sec: 14.62 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,313 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,313 EPOCH 6 done: loss 0.6509 - lr 0.0200000\n",
-      "2021-09-08 10:50:52,314 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:52,318 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,378 epoch 7 - iter 1/3 - loss 0.64391959 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,435 epoch 7 - iter 2/3 - loss 0.64683715 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,496 epoch 7 - iter 3/3 - loss 0.64448321 - samples/sec: 16.54 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,498 EPOCH 7 done: loss 0.6445 - lr 0.0200000\n",
-      "2021-09-08 10:50:52,498 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:52,503 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,577 epoch 8 - iter 1/3 - loss 0.64888149 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,627 epoch 8 - iter 2/3 - loss 0.63918212 - samples/sec: 20.16 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,682 epoch 8 - iter 3/3 - loss 0.63906143 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,683 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,684 EPOCH 8 done: loss 0.6391 - lr 0.0200000\n",
-      "2021-09-08 10:50:52,684 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:52,686 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,765 epoch 9 - iter 1/3 - loss 0.63884550 - samples/sec: 18.33 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,817 epoch 9 - iter 2/3 - loss 0.63479316 - samples/sec: 19.70 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,880 epoch 9 - iter 3/3 - loss 0.63302821 - samples/sec: 15.89 - lr: 0.020000\n",
-      "2021-09-08 10:50:52,881 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,881 EPOCH 9 done: loss 0.6330 - lr 0.0200000\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:50:52,882 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:50:52,884 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:52,951 epoch 10 - iter 1/3 - loss 0.63459820 - samples/sec: 20.63 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,010 epoch 10 - iter 2/3 - loss 0.64446378 - samples/sec: 16.90 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,066 epoch 10 - iter 3/3 - loss 0.65041624 - samples/sec: 18.19 - lr: 0.010000\n",
-      "2021-09-08 10:50:53,067 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:53,067 EPOCH 10 done: loss 0.6504 - lr 0.0100000\n",
-      "2021-09-08 10:50:53,067 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:57,583 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:51:11,579 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n"
+      "2021-09-21 19:35:44,626 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,627 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:35:44,627 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,627 Parameters:\n",
+      "2021-09-21 19:35:44,628  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:35:44,628  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:35:44,628  - patience: \"3\"\n",
+      "2021-09-21 19:35:44,628  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:35:44,629  - max_epochs: \"10\"\n",
+      "2021-09-21 19:35:44,629  - shuffle: \"True\"\n",
+      "2021-09-21 19:35:44,629  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:35:44,630  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:35:44,630 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,630 Model training base path: \"temp\"\n",
+      "2021-09-21 19:35:44,630 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,631 Device: cuda:0\n",
+      "2021-09-21 19:35:44,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,631 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2021-09-21 19:35:44,805 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,870 epoch 1 - iter 1/3 - loss 0.65456808 - samples/sec: 19.58 - lr: 0.020000\n",
+      "2021-09-21 19:35:44,925 epoch 1 - iter 2/3 - loss 0.64945936 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 19:35:44,979 epoch 1 - iter 3/3 - loss 0.63473930 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 19:35:44,980 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:44,980 EPOCH 1 done: loss 0.6347 - lr 0.0200000\n",
+      "2021-09-21 19:35:44,980 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:35:49,236 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:49,371 epoch 2 - iter 1/3 - loss 0.62634367 - samples/sec: 11.43 - lr: 0.020000\n",
+      "2021-09-21 19:35:49,455 epoch 2 - iter 2/3 - loss 0.62607935 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 19:35:49,531 epoch 2 - iter 3/3 - loss 0.62947309 - samples/sec: 13.24 - lr: 0.020000\n",
+      "2021-09-21 19:35:49,532 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:49,532 EPOCH 2 done: loss 0.6295 - lr 0.0200000\n",
+      "2021-09-21 19:35:49,533 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:35:49,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:49,662 epoch 3 - iter 1/3 - loss 0.63022733 - samples/sec: 11.40 - lr: 0.020000\n",
+      "2021-09-21 19:35:49,743 epoch 3 - iter 2/3 - loss 0.63077393 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 19:35:49,824 epoch 3 - iter 3/3 - loss 0.63712776 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 19:35:49,825 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:49,826 EPOCH 3 done: loss 0.6371 - lr 0.0200000\n",
+      "2021-09-21 19:35:49,826 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:35:53,572 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:53,702 epoch 4 - iter 1/3 - loss 0.62594992 - samples/sec: 12.00 - lr: 0.020000\n",
+      "2021-09-21 19:35:53,779 epoch 4 - iter 2/3 - loss 0.63042748 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 19:35:53,872 epoch 4 - iter 3/3 - loss 0.63207944 - samples/sec: 10.82 - lr: 0.020000\n",
+      "2021-09-21 19:35:53,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:53,873 EPOCH 4 done: loss 0.6321 - lr 0.0200000\n",
+      "2021-09-21 19:35:53,873 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:35:53,875 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:53,993 epoch 5 - iter 1/3 - loss 0.61311185 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 19:35:54,082 epoch 5 - iter 2/3 - loss 0.62545344 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 19:35:54,158 epoch 5 - iter 3/3 - loss 0.62597185 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 19:35:54,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:54,159 EPOCH 5 done: loss 0.6260 - lr 0.0200000\n",
+      "2021-09-21 19:35:54,159 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:35:54,161 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:54,299 epoch 6 - iter 1/3 - loss 0.64800698 - samples/sec: 10.80 - lr: 0.020000\n",
+      "2021-09-21 19:35:54,362 epoch 6 - iter 2/3 - loss 0.64352462 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 19:35:54,443 epoch 6 - iter 3/3 - loss 0.64645024 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 19:35:54,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:54,444 EPOCH 6 done: loss 0.6465 - lr 0.0200000\n",
+      "2021-09-21 19:35:54,445 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:35:58,539 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:58,664 epoch 7 - iter 1/3 - loss 0.60473806 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 19:35:58,741 epoch 7 - iter 2/3 - loss 0.62407541 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 19:35:58,822 epoch 7 - iter 3/3 - loss 0.63004273 - samples/sec: 12.47 - lr: 0.020000\n",
+      "2021-09-21 19:35:58,823 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:58,823 EPOCH 7 done: loss 0.6300 - lr 0.0200000\n",
+      "2021-09-21 19:35:58,824 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:35:58,893 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:59,036 epoch 8 - iter 1/3 - loss 0.62373364 - samples/sec: 10.04 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,102 epoch 8 - iter 2/3 - loss 0.61459249 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,186 epoch 8 - iter 3/3 - loss 0.61850709 - samples/sec: 11.99 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,187 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:59,187 EPOCH 8 done: loss 0.6185 - lr 0.0200000\n",
+      "2021-09-21 19:35:59,188 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:35:59,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:59,469 epoch 9 - iter 1/3 - loss 0.62476414 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,538 epoch 9 - iter 2/3 - loss 0.63014686 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,586 epoch 9 - iter 3/3 - loss 0.62163764 - samples/sec: 21.21 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,586 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:59,587 EPOCH 9 done: loss 0.6216 - lr 0.0200000\n",
+      "2021-09-21 19:35:59,587 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:35:59,589 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:59,682 epoch 10 - iter 1/3 - loss 0.62571603 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,754 epoch 10 - iter 2/3 - loss 0.63442943 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,829 epoch 10 - iter 3/3 - loss 0.62638593 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 19:35:59,830 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:35:59,830 EPOCH 10 done: loss 0.6264 - lr 0.0200000\n",
+      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:35:59,830 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:36:06,009 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:36:26,378 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:51:16,054 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:36:30,803 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 18078.90it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 18921.67it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:16,056 [b'positive', b'neutral', b'negative']\n",
-      "2021-09-08 10:51:16,181 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,183 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:36:30,805 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 19:36:30,814 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:30,816 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1281,24 +1275,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:16,183 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,184 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:51:16,184 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,184 Parameters:\n",
-      "2021-09-08 10:51:16,184  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:51:16,185  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:51:16,185  - patience: \"3\"\n",
-      "2021-09-08 10:51:16,185  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:51:16,186  - max_epochs: \"10\"\n",
-      "2021-09-08 10:51:16,186  - shuffle: \"True\"\n",
-      "2021-09-08 10:51:16,186  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:51:16,186  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:51:16,187 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,187 Model training base path: \"temp\"\n",
-      "2021-09-08 10:51:16,187 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,188 Device: cuda:0\n",
-      "2021-09-08 10:51:16,188 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,188 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:36:30,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:30,817 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:36:30,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:30,817 Parameters:\n",
+      "2021-09-21 19:36:30,818  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:36:30,818  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:36:30,818  - patience: \"3\"\n",
+      "2021-09-21 19:36:30,818  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:36:30,819  - max_epochs: \"10\"\n",
+      "2021-09-21 19:36:30,819  - shuffle: \"True\"\n",
+      "2021-09-21 19:36:30,819  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:36:30,820  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:36:30,820 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:30,820 Model training base path: \"temp\"\n",
+      "2021-09-21 19:36:30,820 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:30,821 Device: cuda:0\n",
+      "2021-09-21 19:36:30,821 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:30,821 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:36:30,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:30,900 epoch 1 - iter 1/3 - loss 0.65101105 - samples/sec: 20.45 - lr: 0.020000\n"
      ]
     },
     {
@@ -1312,113 +1308,98 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:16,369 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,422 epoch 1 - iter 1/3 - loss 0.65189892 - samples/sec: 24.74 - lr: 0.020000\n",
-      "2021-09-08 10:51:16,467 epoch 1 - iter 2/3 - loss 0.63865328 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 10:51:16,511 epoch 1 - iter 3/3 - loss 0.63549958 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 10:51:16,512 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,512 EPOCH 1 done: loss 0.6355 - lr 0.0200000\n",
-      "2021-09-08 10:51:16,512 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:51:25,443 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:25,500 epoch 2 - iter 1/3 - loss 0.63915128 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 10:51:25,550 epoch 2 - iter 2/3 - loss 0.64911065 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 10:51:25,607 epoch 2 - iter 3/3 - loss 0.64572332 - samples/sec: 17.80 - lr: 0.020000\n",
-      "2021-09-08 10:51:25,608 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:25,608 EPOCH 2 done: loss 0.6457 - lr 0.0200000\n",
-      "2021-09-08 10:51:25,609 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:36:31,084 epoch 1 - iter 2/3 - loss 0.62948984 - samples/sec: 5.44 - lr: 0.020000\n",
+      "2021-09-21 19:36:31,142 epoch 1 - iter 3/3 - loss 0.63190830 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 19:36:31,143 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:31,143 EPOCH 1 done: loss 0.6319 - lr 0.0200000\n",
+      "2021-09-21 19:36:31,144 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:35,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:35,489 epoch 3 - iter 1/3 - loss 0.65267545 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,655 epoch 3 - iter 2/3 - loss 0.64095265 - samples/sec: 6.04 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,707 epoch 3 - iter 3/3 - loss 0.64739941 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 10:51:35,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:35,708 EPOCH 3 done: loss 0.6474 - lr 0.0200000\n",
-      "2021-09-08 10:51:35,708 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:36:35,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:35,574 epoch 2 - iter 1/3 - loss 0.64679474 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 19:36:35,652 epoch 2 - iter 2/3 - loss 0.64712736 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 19:36:35,727 epoch 2 - iter 3/3 - loss 0.64605894 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 19:36:35,728 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:35,728 EPOCH 2 done: loss 0.6461 - lr 0.0200000\n",
+      "2021-09-21 19:36:35,728 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:44,944 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,028 epoch 4 - iter 1/3 - loss 0.64411801 - samples/sec: 14.25 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,089 epoch 4 - iter 2/3 - loss 0.64340386 - samples/sec: 16.37 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,153 epoch 4 - iter 3/3 - loss 0.64169834 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,155 EPOCH 4 done: loss 0.6417 - lr 0.0200000\n",
-      "2021-09-08 10:51:45,155 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:45,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,232 epoch 5 - iter 1/3 - loss 0.64134055 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,289 epoch 5 - iter 2/3 - loss 0.63788894 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,349 epoch 5 - iter 3/3 - loss 0.63387463 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,350 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,350 EPOCH 5 done: loss 0.6339 - lr 0.0200000\n",
-      "2021-09-08 10:51:45,350 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:51:45,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,429 epoch 6 - iter 1/3 - loss 0.63826853 - samples/sec: 16.63 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,487 epoch 6 - iter 2/3 - loss 0.64482355 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,536 epoch 6 - iter 3/3 - loss 0.63623106 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,537 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,537 EPOCH 6 done: loss 0.6362 - lr 0.0200000\n",
-      "2021-09-08 10:51:45,538 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:51:45,556 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,637 epoch 7 - iter 1/3 - loss 0.63563794 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,686 epoch 7 - iter 2/3 - loss 0.64416069 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,733 epoch 7 - iter 3/3 - loss 0.64307888 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 10:51:45,734 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,734 EPOCH 7 done: loss 0.6431 - lr 0.0200000\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:51:45,734 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:51:45,737 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,821 epoch 8 - iter 1/3 - loss 0.65197283 - samples/sec: 16.30 - lr: 0.010000\n",
-      "2021-09-08 10:51:45,881 epoch 8 - iter 2/3 - loss 0.63393766 - samples/sec: 16.82 - lr: 0.010000\n",
-      "2021-09-08 10:51:45,938 epoch 8 - iter 3/3 - loss 0.63869518 - samples/sec: 17.60 - lr: 0.010000\n",
-      "2021-09-08 10:51:45,939 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:45,940 EPOCH 8 done: loss 0.6387 - lr 0.0100000\n",
-      "2021-09-08 10:51:45,940 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:45,942 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:46,025 epoch 9 - iter 1/3 - loss 0.63525242 - samples/sec: 18.91 - lr: 0.010000\n",
-      "2021-09-08 10:51:46,084 epoch 9 - iter 2/3 - loss 0.63809130 - samples/sec: 16.94 - lr: 0.010000\n",
-      "2021-09-08 10:51:46,141 epoch 9 - iter 3/3 - loss 0.63628624 - samples/sec: 17.68 - lr: 0.010000\n",
-      "2021-09-08 10:51:46,142 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:46,143 EPOCH 9 done: loss 0.6363 - lr 0.0100000\n",
-      "2021-09-08 10:51:46,143 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:51:46,224 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:46,294 epoch 10 - iter 1/3 - loss 0.63911599 - samples/sec: 20.19 - lr: 0.010000\n",
-      "2021-09-08 10:51:46,358 epoch 10 - iter 2/3 - loss 0.64342231 - samples/sec: 15.58 - lr: 0.010000\n",
-      "2021-09-08 10:51:46,424 epoch 10 - iter 3/3 - loss 0.64270101 - samples/sec: 15.31 - lr: 0.010000\n",
-      "2021-09-08 10:51:46,425 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:46,426 EPOCH 10 done: loss 0.6427 - lr 0.0100000\n",
-      "2021-09-08 10:51:46,426 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:51:52,053 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:52:04,630 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:36:39,905 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,011 epoch 3 - iter 1/3 - loss 0.62753910 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,082 epoch 3 - iter 2/3 - loss 0.63392085 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,146 epoch 3 - iter 3/3 - loss 0.63732392 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,147 EPOCH 3 done: loss 0.6373 - lr 0.0200000\n",
+      "2021-09-21 19:36:40,147 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:36:40,151 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,243 epoch 4 - iter 1/3 - loss 0.64191484 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,297 epoch 4 - iter 2/3 - loss 0.63543907 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,347 epoch 4 - iter 3/3 - loss 0.63566931 - samples/sec: 19.95 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,348 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,349 EPOCH 4 done: loss 0.6357 - lr 0.0200000\n",
+      "2021-09-21 19:36:40,349 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:36:40,352 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,450 epoch 5 - iter 1/3 - loss 0.62768894 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,520 epoch 5 - iter 2/3 - loss 0.62665373 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,588 epoch 5 - iter 3/3 - loss 0.63234907 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,589 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,589 EPOCH 5 done: loss 0.6323 - lr 0.0200000\n",
+      "2021-09-21 19:36:40,590 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:36:40,592 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,683 epoch 6 - iter 1/3 - loss 0.62337887 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,750 epoch 6 - iter 2/3 - loss 0.62942144 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,824 epoch 6 - iter 3/3 - loss 0.63371332 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 19:36:40,825 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,825 EPOCH 6 done: loss 0.6337 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:36:40,825 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:36:40,828 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:40,941 epoch 7 - iter 1/3 - loss 0.64143145 - samples/sec: 12.93 - lr: 0.010000\n",
+      "2021-09-21 19:36:40,999 epoch 7 - iter 2/3 - loss 0.64826778 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,064 epoch 7 - iter 3/3 - loss 0.64228366 - samples/sec: 15.38 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:41,066 EPOCH 7 done: loss 0.6423 - lr 0.0100000\n",
+      "2021-09-21 19:36:41,066 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:36:41,068 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:41,180 epoch 8 - iter 1/3 - loss 0.65392083 - samples/sec: 11.70 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,242 epoch 8 - iter 2/3 - loss 0.64415067 - samples/sec: 16.22 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,308 epoch 8 - iter 3/3 - loss 0.64477275 - samples/sec: 15.14 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,309 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:41,309 EPOCH 8 done: loss 0.6448 - lr 0.0100000\n",
+      "2021-09-21 19:36:41,310 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:36:41,312 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:41,408 epoch 9 - iter 1/3 - loss 0.63331550 - samples/sec: 17.16 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,490 epoch 9 - iter 2/3 - loss 0.64485645 - samples/sec: 12.27 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,546 epoch 9 - iter 3/3 - loss 0.64104259 - samples/sec: 18.15 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:41,547 EPOCH 9 done: loss 0.6410 - lr 0.0100000\n",
+      "2021-09-21 19:36:41,548 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:36:41,550 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:41,636 epoch 10 - iter 1/3 - loss 0.64421993 - samples/sec: 18.18 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,699 epoch 10 - iter 2/3 - loss 0.63699466 - samples/sec: 15.94 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,758 epoch 10 - iter 3/3 - loss 0.64294527 - samples/sec: 17.11 - lr: 0.010000\n",
+      "2021-09-21 19:36:41,759 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:36:41,760 EPOCH 10 done: loss 0.6429 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:36:41,760 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:36:46,082 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:37:10,467 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:52:09,810 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:37:14,831 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 16491.37it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 18921.67it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:09,812 [b'positive', b'neutral', b'negative']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:52:11,756 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:11,758 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:37:14,832 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 19:37:14,842 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:14,844 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1731,137 +1712,130 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:11,759 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:11,760 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:52:11,760 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:11,760 Parameters:\n",
-      "2021-09-08 10:52:11,761  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:52:11,761  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:52:11,761  - patience: \"3\"\n",
-      "2021-09-08 10:52:11,761  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:52:11,762  - max_epochs: \"10\"\n",
-      "2021-09-08 10:52:11,762  - shuffle: \"True\"\n",
-      "2021-09-08 10:52:11,762  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:52:11,762  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:52:11,763 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:11,763 Model training base path: \"temp\"\n",
-      "2021-09-08 10:52:11,763 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:11,764 Device: cuda:0\n",
-      "2021-09-08 10:52:11,764 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:11,764 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:52:12,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:12,925 epoch 1 - iter 1/3 - loss 0.64768392 - samples/sec: 25.96 - lr: 0.020000\n",
-      "2021-09-08 10:52:12,986 epoch 1 - iter 2/3 - loss 0.64426214 - samples/sec: 16.73 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,050 epoch 1 - iter 3/3 - loss 0.64478205 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,051 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:13,051 EPOCH 1 done: loss 0.6448 - lr 0.0200000\n",
-      "2021-09-08 10:52:13,051 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:52:20,213 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:20,272 epoch 2 - iter 1/3 - loss 0.63058919 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,316 epoch 2 - iter 2/3 - loss 0.62398538 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,360 epoch 2 - iter 3/3 - loss 0.62808748 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:20,362 EPOCH 2 done: loss 0.6281 - lr 0.0200000\n",
-      "2021-09-08 10:52:20,362 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:28,350 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,426 epoch 3 - iter 1/3 - loss 0.64689583 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,487 epoch 3 - iter 2/3 - loss 0.63882741 - samples/sec: 16.48 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,541 epoch 3 - iter 3/3 - loss 0.63534065 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,542 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,543 EPOCH 3 done: loss 0.6353 - lr 0.0200000\n",
-      "2021-09-08 10:52:28,543 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:52:28,601 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,681 epoch 4 - iter 1/3 - loss 0.63876963 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,725 epoch 4 - iter 2/3 - loss 0.65509668 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,780 epoch 4 - iter 3/3 - loss 0.64538942 - samples/sec: 18.30 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,781 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,782 EPOCH 4 done: loss 0.6454 - lr 0.0200000\n",
-      "2021-09-08 10:52:28,782 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:52:35,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:35,320 epoch 5 - iter 1/3 - loss 0.62038815 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 10:52:35,364 epoch 5 - iter 2/3 - loss 0.63981345 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 10:52:35,408 epoch 5 - iter 3/3 - loss 0.63893865 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 10:52:35,409 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:35,410 EPOCH 5 done: loss 0.6389 - lr 0.0200000\n",
-      "2021-09-08 10:52:35,410 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:35,534 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:35,594 epoch 6 - iter 1/3 - loss 0.62847739 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 10:52:35,640 epoch 6 - iter 2/3 - loss 0.65267244 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 10:52:35,685 epoch 6 - iter 3/3 - loss 0.64930395 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 10:52:35,686 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:35,686 EPOCH 6 done: loss 0.6493 - lr 0.0200000\n",
-      "2021-09-08 10:52:35,687 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:52:43,221 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,305 epoch 7 - iter 1/3 - loss 0.63277191 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,358 epoch 7 - iter 2/3 - loss 0.63318941 - samples/sec: 19.05 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,420 epoch 7 - iter 3/3 - loss 0.62771233 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,421 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,421 EPOCH 7 done: loss 0.6277 - lr 0.0200000\n",
-      "2021-09-08 10:52:43,422 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:43,440 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,504 epoch 8 - iter 1/3 - loss 0.65588963 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,552 epoch 8 - iter 2/3 - loss 0.64386821 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,604 epoch 8 - iter 3/3 - loss 0.64304235 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,605 EPOCH 8 done: loss 0.6430 - lr 0.0200000\n",
-      "2021-09-08 10:52:43,605 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:52:43,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,692 epoch 9 - iter 1/3 - loss 0.64792627 - samples/sec: 14.31 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,753 epoch 9 - iter 2/3 - loss 0.64245898 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,824 epoch 9 - iter 3/3 - loss 0.64012116 - samples/sec: 14.20 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,825 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,825 EPOCH 9 done: loss 0.6401 - lr 0.0200000\n",
-      "2021-09-08 10:52:43,825 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:52:43,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,893 epoch 10 - iter 1/3 - loss 0.63818502 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,939 epoch 10 - iter 2/3 - loss 0.64953095 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,993 epoch 10 - iter 3/3 - loss 0.64631951 - samples/sec: 18.63 - lr: 0.020000\n",
-      "2021-09-08 10:52:43,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:43,995 EPOCH 10 done: loss 0.6463 - lr 0.0200000\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:52:43,995 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:52:48,289 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:53:07,136 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "init TARS\n",
-      "2021-09-08 10:53:11,738 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:37:14,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:14,845 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:37:14,845 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:14,845 Parameters:\n",
+      "2021-09-21 19:37:14,846  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:37:14,846  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:37:14,846  - patience: \"3\"\n",
+      "2021-09-21 19:37:14,846  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:37:14,847  - max_epochs: \"10\"\n",
+      "2021-09-21 19:37:14,847  - shuffle: \"True\"\n",
+      "2021-09-21 19:37:14,847  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:37:14,847  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:37:14,848 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:14,848 Model training base path: \"temp\"\n",
+      "2021-09-21 19:37:14,848 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:14,849 Device: cuda:0\n",
+      "2021-09-21 19:37:14,849 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:14,849 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:37:14,857 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:14,941 epoch 1 - iter 1/3 - loss 0.63944799 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 19:37:15,010 epoch 1 - iter 2/3 - loss 0.63392919 - samples/sec: 14.68 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 17975.59it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:11,739 [b'positive', b'neutral', b'negative']\n"
+      "2021-09-21 19:37:15,077 epoch 1 - iter 3/3 - loss 0.64390198 - samples/sec: 15.00 - lr: 0.020000\n",
+      "2021-09-21 19:37:15,078 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:15,078 EPOCH 1 done: loss 0.6439 - lr 0.0200000\n",
+      "2021-09-21 19:37:15,078 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:37:19,070 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:19,153 epoch 2 - iter 1/3 - loss 0.64572144 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,228 epoch 2 - iter 2/3 - loss 0.63328803 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,285 epoch 2 - iter 3/3 - loss 0.63094447 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:19,287 EPOCH 2 done: loss 0.6309 - lr 0.0200000\n",
+      "2021-09-21 19:37:19,287 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:37:19,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:19,392 epoch 3 - iter 1/3 - loss 0.63033247 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,472 epoch 3 - iter 2/3 - loss 0.64252350 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,549 epoch 3 - iter 3/3 - loss 0.64250974 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,550 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:19,550 EPOCH 3 done: loss 0.6425 - lr 0.0200000\n",
+      "2021-09-21 19:37:19,551 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:37:19,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:19,649 epoch 4 - iter 1/3 - loss 0.64148241 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,723 epoch 4 - iter 2/3 - loss 0.63755697 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,776 epoch 4 - iter 3/3 - loss 0.63733337 - samples/sec: 19.15 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,777 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:19,777 EPOCH 4 done: loss 0.6373 - lr 0.0200000\n",
+      "2021-09-21 19:37:19,777 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:37:19,780 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:19,887 epoch 5 - iter 1/3 - loss 0.63298476 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 19:37:19,945 epoch 5 - iter 2/3 - loss 0.64130089 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 19:37:20,015 epoch 5 - iter 3/3 - loss 0.63877686 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 19:37:20,016 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,017 EPOCH 5 done: loss 0.6388 - lr 0.0200000\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:37:20,017 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:37:20,019 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,121 epoch 6 - iter 1/3 - loss 0.64056647 - samples/sec: 14.13 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,178 epoch 6 - iter 2/3 - loss 0.64224920 - samples/sec: 17.57 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,236 epoch 6 - iter 3/3 - loss 0.63630992 - samples/sec: 17.60 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,237 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,237 EPOCH 6 done: loss 0.6363 - lr 0.0100000\n",
+      "2021-09-21 19:37:20,238 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:37:20,240 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,331 epoch 7 - iter 1/3 - loss 0.64458889 - samples/sec: 13.95 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,399 epoch 7 - iter 2/3 - loss 0.63904744 - samples/sec: 15.01 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,474 epoch 7 - iter 3/3 - loss 0.63011219 - samples/sec: 13.27 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,475 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,476 EPOCH 7 done: loss 0.6301 - lr 0.0100000\n",
+      "2021-09-21 19:37:20,476 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:37:20,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,568 epoch 8 - iter 1/3 - loss 0.62871701 - samples/sec: 16.57 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,644 epoch 8 - iter 2/3 - loss 0.63267970 - samples/sec: 13.29 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,723 epoch 8 - iter 3/3 - loss 0.63488978 - samples/sec: 12.71 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,724 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,724 EPOCH 8 done: loss 0.6349 - lr 0.0100000\n",
+      "2021-09-21 19:37:20,725 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:37:20,726 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,823 epoch 9 - iter 1/3 - loss 0.65224046 - samples/sec: 15.64 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,894 epoch 9 - iter 2/3 - loss 0.63757202 - samples/sec: 14.11 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,956 epoch 9 - iter 3/3 - loss 0.64022942 - samples/sec: 16.16 - lr: 0.010000\n",
+      "2021-09-21 19:37:20,957 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:20,958 EPOCH 9 done: loss 0.6402 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:37:20,958 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:37:20,960 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:21,058 epoch 10 - iter 1/3 - loss 0.64031631 - samples/sec: 13.67 - lr: 0.005000\n",
+      "2021-09-21 19:37:21,129 epoch 10 - iter 2/3 - loss 0.63579315 - samples/sec: 14.20 - lr: 0.005000\n",
+      "2021-09-21 19:37:21,187 epoch 10 - iter 3/3 - loss 0.63264318 - samples/sec: 17.39 - lr: 0.005000\n",
+      "2021-09-21 19:37:21,188 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:21,189 EPOCH 10 done: loss 0.6326 - lr 0.0050000\n",
+      "2021-09-21 19:37:21,189 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:37:25,319 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:37:50,250 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 19:37:54,530 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 3/3 [00:00<00:00, 17848.10it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:11,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:11,967 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:37:54,532 [b'positive', b'neutral', b'negative']\n",
+      "2021-09-21 19:37:54,540 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,542 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2174,100 +2148,113 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:11,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:11,968 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:53:11,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:11,969 Parameters:\n",
-      "2021-09-08 10:53:11,969  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:53:11,969  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:53:11,969  - patience: \"3\"\n",
-      "2021-09-08 10:53:11,970  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:53:11,970  - max_epochs: \"10\"\n",
-      "2021-09-08 10:53:11,970  - shuffle: \"True\"\n",
-      "2021-09-08 10:53:11,971  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:53:11,971  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:53:11,971 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:11,971 Model training base path: \"temp\"\n",
-      "2021-09-08 10:53:11,972 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:11,972 Device: cuda:0\n",
-      "2021-09-08 10:53:11,972 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:11,973 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:53:12,243 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:12,312 epoch 1 - iter 1/3 - loss 0.64435410 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 10:53:12,365 epoch 1 - iter 2/3 - loss 0.64772242 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 10:53:12,427 epoch 1 - iter 3/3 - loss 0.64428322 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 10:53:12,428 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:12,428 EPOCH 1 done: loss 0.6443 - lr 0.0200000\n",
-      "2021-09-08 10:53:12,428 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:37:54,542 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,543 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:37:54,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,543 Parameters:\n",
+      "2021-09-21 19:37:54,543  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:37:54,544  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:37:54,544  - patience: \"3\"\n",
+      "2021-09-21 19:37:54,544  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:37:54,545  - max_epochs: \"10\"\n",
+      "2021-09-21 19:37:54,545  - shuffle: \"True\"\n",
+      "2021-09-21 19:37:54,545  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:37:54,545  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:37:54,546 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,546 Model training base path: \"temp\"\n",
+      "2021-09-21 19:37:54,546 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,547 Device: cuda:0\n",
+      "2021-09-21 19:37:54,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,547 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:37:54,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,643 epoch 1 - iter 1/3 - loss 0.65449339 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 19:37:54,707 epoch 1 - iter 2/3 - loss 0.63889644 - samples/sec: 15.79 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:37:54,760 epoch 1 - iter 3/3 - loss 0.63556778 - samples/sec: 18.89 - lr: 0.020000\n",
+      "2021-09-21 19:37:54,761 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:37:54,761 EPOCH 1 done: loss 0.6356 - lr 0.0200000\n",
+      "2021-09-21 19:37:54,762 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:53:19,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:19,356 epoch 2 - iter 1/3 - loss 0.66779822 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,402 epoch 2 - iter 2/3 - loss 0.64666718 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,447 epoch 2 - iter 3/3 - loss 0.63999190 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,449 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:19,449 EPOCH 2 done: loss 0.6400 - lr 0.0200000\n",
-      "2021-09-08 10:53:19,449 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:19,452 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:19,508 epoch 3 - iter 1/3 - loss 0.62474006 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,552 epoch 3 - iter 2/3 - loss 0.63191044 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,596 epoch 3 - iter 3/3 - loss 0.63706066 - samples/sec: 23.12 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,597 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:19,597 EPOCH 3 done: loss 0.6371 - lr 0.0200000\n",
-      "2021-09-08 10:53:19,597 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:53:19,599 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:19,655 epoch 4 - iter 1/3 - loss 0.64424109 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,700 epoch 4 - iter 2/3 - loss 0.63731578 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,744 epoch 4 - iter 3/3 - loss 0.63645271 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,745 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:19,746 EPOCH 4 done: loss 0.6365 - lr 0.0200000\n",
-      "2021-09-08 10:53:19,746 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:53:19,750 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:19,809 epoch 5 - iter 1/3 - loss 0.61688882 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 10:53:19,854 epoch 5 - iter 2/3 - loss 0.62938851 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 10:53:20,027 epoch 5 - iter 3/3 - loss 0.63583974 - samples/sec: 5.81 - lr: 0.020000\n",
-      "2021-09-08 10:53:20,028 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,029 EPOCH 5 done: loss 0.6358 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:53:20,029 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:53:20,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,184 epoch 6 - iter 1/3 - loss 0.64422256 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 10:53:20,228 epoch 6 - iter 2/3 - loss 0.64807913 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 10:53:20,272 epoch 6 - iter 3/3 - loss 0.64630566 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 10:53:20,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,274 EPOCH 6 done: loss 0.6463 - lr 0.0100000\n",
-      "2021-09-08 10:53:20,274 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:38:00,089 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:00,293 epoch 2 - iter 1/3 - loss 0.63132387 - samples/sec: 5.27 - lr: 0.020000\n",
+      "2021-09-21 19:38:00,367 epoch 2 - iter 2/3 - loss 0.64830595 - samples/sec: 13.63 - lr: 0.020000\n",
+      "2021-09-21 19:38:00,435 epoch 2 - iter 3/3 - loss 0.64199213 - samples/sec: 14.93 - lr: 0.020000\n",
+      "2021-09-21 19:38:00,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:00,436 EPOCH 2 done: loss 0.6420 - lr 0.0200000\n",
+      "2021-09-21 19:38:00,436 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:53:32,046 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:32,103 epoch 7 - iter 1/3 - loss 0.65357083 - samples/sec: 22.75 - lr: 0.010000\n",
-      "2021-09-08 10:53:32,147 epoch 7 - iter 2/3 - loss 0.64360249 - samples/sec: 23.09 - lr: 0.010000\n",
-      "2021-09-08 10:53:32,190 epoch 7 - iter 3/3 - loss 0.64149900 - samples/sec: 23.53 - lr: 0.010000\n",
-      "2021-09-08 10:53:32,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:32,191 EPOCH 7 done: loss 0.6415 - lr 0.0100000\n",
-      "2021-09-08 10:53:32,192 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:34,194 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,253 epoch 8 - iter 1/3 - loss 0.62324780 - samples/sec: 21.82 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,310 epoch 8 - iter 2/3 - loss 0.63602921 - samples/sec: 17.54 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,373 epoch 8 - iter 3/3 - loss 0.63745089 - samples/sec: 16.09 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,374 EPOCH 8 done: loss 0.6375 - lr 0.0100000\n",
-      "2021-09-08 10:53:34,375 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:53:34,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,463 epoch 9 - iter 1/3 - loss 0.66243213 - samples/sec: 14.63 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,520 epoch 9 - iter 2/3 - loss 0.63476855 - samples/sec: 17.83 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,576 epoch 9 - iter 3/3 - loss 0.63541273 - samples/sec: 17.87 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,577 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,578 EPOCH 9 done: loss 0.6354 - lr 0.0100000\n",
-      "2021-09-08 10:53:34,578 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:53:34,580 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,657 epoch 10 - iter 1/3 - loss 0.62915778 - samples/sec: 21.63 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,721 epoch 10 - iter 2/3 - loss 0.63624319 - samples/sec: 15.71 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,787 epoch 10 - iter 3/3 - loss 0.63564593 - samples/sec: 15.31 - lr: 0.010000\n",
-      "2021-09-08 10:53:34,788 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,788 EPOCH 10 done: loss 0.6356 - lr 0.0100000\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:53:34,788 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:53:45,126 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.3524271844660194\n"
+      "2021-09-21 19:38:04,657 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:04,771 epoch 3 - iter 1/3 - loss 0.64602208 - samples/sec: 10.10 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,830 epoch 3 - iter 2/3 - loss 0.64128360 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,913 epoch 3 - iter 3/3 - loss 0.64903702 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 19:38:04,914 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:04,914 EPOCH 3 done: loss 0.6490 - lr 0.0200000\n",
+      "2021-09-21 19:38:04,915 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:38:10,781 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:10,877 epoch 4 - iter 1/3 - loss 0.62897283 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 19:38:10,947 epoch 4 - iter 2/3 - loss 0.63272280 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 19:38:11,023 epoch 4 - iter 3/3 - loss 0.63229398 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 19:38:11,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:11,024 EPOCH 4 done: loss 0.6323 - lr 0.0200000\n",
+      "2021-09-21 19:38:11,025 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:38:15,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:15,607 epoch 5 - iter 1/3 - loss 0.63404733 - samples/sec: 18.21 - lr: 0.020000\n",
+      "2021-09-21 19:38:15,681 epoch 5 - iter 2/3 - loss 0.63116410 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 19:38:15,756 epoch 5 - iter 3/3 - loss 0.63095931 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 19:38:15,758 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:15,759 EPOCH 5 done: loss 0.6310 - lr 0.0200000\n",
+      "2021-09-21 19:38:15,759 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:38:15,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:15,866 epoch 6 - iter 1/3 - loss 0.65309685 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 19:38:15,944 epoch 6 - iter 2/3 - loss 0.64318463 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 19:38:16,013 epoch 6 - iter 3/3 - loss 0.63880638 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 19:38:16,014 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,014 EPOCH 6 done: loss 0.6388 - lr 0.0200000\n",
+      "2021-09-21 19:38:16,015 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:38:16,017 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,123 epoch 7 - iter 1/3 - loss 0.62267911 - samples/sec: 12.44 - lr: 0.020000\n",
+      "2021-09-21 19:38:16,192 epoch 7 - iter 2/3 - loss 0.62829301 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 19:38:16,250 epoch 7 - iter 3/3 - loss 0.63689442 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 19:38:16,251 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,251 EPOCH 7 done: loss 0.6369 - lr 0.0200000\n",
+      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:38:16,252 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:38:16,254 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,345 epoch 8 - iter 1/3 - loss 0.63336134 - samples/sec: 16.13 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,403 epoch 8 - iter 2/3 - loss 0.62499356 - samples/sec: 17.24 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,471 epoch 8 - iter 3/3 - loss 0.62686284 - samples/sec: 14.84 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,473 EPOCH 8 done: loss 0.6269 - lr 0.0100000\n",
+      "2021-09-21 19:38:16,473 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:38:16,475 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,588 epoch 9 - iter 1/3 - loss 0.64054340 - samples/sec: 14.39 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,666 epoch 9 - iter 2/3 - loss 0.64516878 - samples/sec: 12.83 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,737 epoch 9 - iter 3/3 - loss 0.64210731 - samples/sec: 14.33 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,738 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,738 EPOCH 9 done: loss 0.6421 - lr 0.0100000\n",
+      "2021-09-21 19:38:16,738 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:38:16,741 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,831 epoch 10 - iter 1/3 - loss 0.62638551 - samples/sec: 16.17 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,890 epoch 10 - iter 2/3 - loss 0.63336718 - samples/sec: 16.92 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,949 epoch 10 - iter 3/3 - loss 0.63671269 - samples/sec: 17.02 - lr: 0.010000\n",
+      "2021-09-21 19:38:16,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:16,951 EPOCH 10 done: loss 0.6367 - lr 0.0100000\n",
+      "2021-09-21 19:38:16,951 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:38:22,233 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.34893203883495144\n"
      ]
     }
    ],
@@ -2332,6 +2319,26 @@
     "print(f'Accuracy Durchschnitt: {statistics.mean(avg_acc_list)}')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "3d6d5878",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.34951456310679613, 0.33980582524271846, 0.3572815533980582, 0.35436893203883496, 0.34368932038834954]\n",
+      "0.006486715762520401\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "440b1d3b",
@@ -2342,7 +2349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2350,25 +2357,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:01,726 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:38:47,627 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:54:05,950 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:38:52,076 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 17573.90it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 14734.09it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:05,951 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 10:54:05,960 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:05,962 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:38:52,078 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 19:38:52,087 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,089 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2681,27 +2688,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:05,963 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:05,963 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:54:05,963 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:05,964 Parameters:\n",
-      "2021-09-08 10:54:05,964  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:54:05,964  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:54:05,964  - patience: \"3\"\n",
-      "2021-09-08 10:54:05,965  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:54:05,965  - max_epochs: \"10\"\n",
-      "2021-09-08 10:54:05,965  - shuffle: \"True\"\n",
-      "2021-09-08 10:54:05,966  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:54:05,966  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:54:05,966 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:05,966 Model training base path: \"temp\"\n",
-      "2021-09-08 10:54:05,967 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:05,967 Device: cuda:0\n",
-      "2021-09-08 10:54:05,967 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:05,968 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:54:05,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:06,035 epoch 1 - iter 1/3 - loss 0.61871272 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 10:54:06,106 epoch 1 - iter 2/3 - loss 0.63151446 - samples/sec: 14.16 - lr: 0.020000\n"
+      "2021-09-21 19:38:52,089 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,089 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:38:52,090 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,090 Parameters:\n",
+      "2021-09-21 19:38:52,090  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:38:52,091  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:38:52,091  - patience: \"3\"\n",
+      "2021-09-21 19:38:52,091  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:38:52,091  - max_epochs: \"10\"\n",
+      "2021-09-21 19:38:52,092  - shuffle: \"True\"\n",
+      "2021-09-21 19:38:52,092  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:38:52,092  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:38:52,092 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,093 Model training base path: \"temp\"\n",
+      "2021-09-21 19:38:52,093 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,093 Device: cuda:0\n",
+      "2021-09-21 19:38:52,094 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,094 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:38:52,100 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,194 epoch 1 - iter 1/3 - loss 0.65034777 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 19:38:52,269 epoch 1 - iter 2/3 - loss 0.63948935 - samples/sec: 13.42 - lr: 0.020000\n"
      ]
     },
     {
@@ -2715,97 +2722,96 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:06,167 epoch 1 - iter 3/3 - loss 0.63481774 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 10:54:06,168 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:06,168 EPOCH 1 done: loss 0.6348 - lr 0.0200000\n",
-      "2021-09-08 10:54:06,169 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:38:52,359 epoch 1 - iter 3/3 - loss 0.63850770 - samples/sec: 11.16 - lr: 0.020000\n",
+      "2021-09-21 19:38:52,360 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:52,360 EPOCH 1 done: loss 0.6385 - lr 0.0200000\n",
+      "2021-09-21 19:38:52,360 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:54:10,676 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:10,757 epoch 2 - iter 1/3 - loss 0.62804163 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 10:54:10,827 epoch 2 - iter 2/3 - loss 0.63829866 - samples/sec: 14.34 - lr: 0.020000\n",
-      "2021-09-08 10:54:10,895 epoch 2 - iter 3/3 - loss 0.63960997 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 10:54:10,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:10,896 EPOCH 2 done: loss 0.6396 - lr 0.0200000\n",
-      "2021-09-08 10:54:10,897 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:38:56,457 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:56,558 epoch 2 - iter 1/3 - loss 0.64136750 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 19:38:56,635 epoch 2 - iter 2/3 - loss 0.64231539 - samples/sec: 13.03 - lr: 0.020000\n",
+      "2021-09-21 19:38:56,696 epoch 2 - iter 3/3 - loss 0.63772827 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 19:38:56,697 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:56,698 EPOCH 2 done: loss 0.6377 - lr 0.0200000\n",
+      "2021-09-21 19:38:56,698 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:38:56,703 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:56,774 epoch 3 - iter 1/3 - loss 0.61509997 - samples/sec: 21.13 - lr: 0.020000\n",
+      "2021-09-21 19:38:56,848 epoch 3 - iter 2/3 - loss 0.63223046 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 19:38:56,910 epoch 3 - iter 3/3 - loss 0.62502358 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 19:38:56,911 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:56,912 EPOCH 3 done: loss 0.6250 - lr 0.0200000\n",
+      "2021-09-21 19:38:56,912 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:38:56,915 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:57,020 epoch 4 - iter 1/3 - loss 0.63477737 - samples/sec: 12.54 - lr: 0.020000\n",
+      "2021-09-21 19:38:57,077 epoch 4 - iter 2/3 - loss 0.63326016 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 19:38:57,141 epoch 4 - iter 3/3 - loss 0.64715576 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 19:38:57,142 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:38:57,143 EPOCH 4 done: loss 0.6472 - lr 0.0200000\n",
+      "2021-09-21 19:38:57,143 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:54:17,402 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:17,480 epoch 3 - iter 1/3 - loss 0.63286519 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,547 epoch 3 - iter 2/3 - loss 0.63303027 - samples/sec: 15.18 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,604 epoch 3 - iter 3/3 - loss 0.63143901 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:17,606 EPOCH 3 done: loss 0.6314 - lr 0.0200000\n",
-      "2021-09-08 10:54:17,606 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:17,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:17,684 epoch 4 - iter 1/3 - loss 0.63473630 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,736 epoch 4 - iter 2/3 - loss 0.63589230 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,799 epoch 4 - iter 3/3 - loss 0.65131187 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 10:54:17,800 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:17,801 EPOCH 4 done: loss 0.6513 - lr 0.0200000\n",
-      "2021-09-08 10:54:17,801 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:54:22,441 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:22,500 epoch 5 - iter 1/3 - loss 0.63564253 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,543 epoch 5 - iter 2/3 - loss 0.63035032 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,587 epoch 5 - iter 3/3 - loss 0.62433728 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:22,588 EPOCH 5 done: loss 0.6243 - lr 0.0200000\n",
-      "2021-09-08 10:54:22,588 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:22,590 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:22,645 epoch 6 - iter 1/3 - loss 0.64557940 - samples/sec: 23.62 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,688 epoch 6 - iter 2/3 - loss 0.63674593 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,731 epoch 6 - iter 3/3 - loss 0.63198209 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:22,733 EPOCH 6 done: loss 0.6320 - lr 0.0200000\n",
-      "2021-09-08 10:54:22,733 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:22,735 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:22,790 epoch 7 - iter 1/3 - loss 0.64283925 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,833 epoch 7 - iter 2/3 - loss 0.64527833 - samples/sec: 23.35 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,876 epoch 7 - iter 3/3 - loss 0.65338439 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 10:54:22,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:22,877 EPOCH 7 done: loss 0.6534 - lr 0.0200000\n",
-      "2021-09-08 10:54:22,877 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:54:31,059 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:31,118 epoch 8 - iter 1/3 - loss 0.66134924 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,162 epoch 8 - iter 2/3 - loss 0.65531775 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,206 epoch 8 - iter 3/3 - loss 0.65205197 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,207 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:31,207 EPOCH 8 done: loss 0.6521 - lr 0.0200000\n",
-      "2021-09-08 10:54:31,208 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:31,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:31,294 epoch 9 - iter 1/3 - loss 0.63761109 - samples/sec: 23.63 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,340 epoch 9 - iter 2/3 - loss 0.64317083 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,383 epoch 9 - iter 3/3 - loss 0.63527258 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 10:54:31,384 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:31,384 EPOCH 9 done: loss 0.6353 - lr 0.0200000\n",
-      "2021-09-08 10:54:31,385 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:32,046 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:32,108 epoch 10 - iter 1/3 - loss 0.62757462 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,183 epoch 10 - iter 2/3 - loss 0.63746688 - samples/sec: 13.34 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,241 epoch 10 - iter 3/3 - loss 0.63168150 - samples/sec: 17.29 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,242 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:32,243 EPOCH 10 done: loss 0.6317 - lr 0.0200000\n",
-      "2021-09-08 10:54:32,243 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:54:41,858 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:54:59,841 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:39:05,266 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:05,349 epoch 5 - iter 1/3 - loss 0.63411736 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,401 epoch 5 - iter 2/3 - loss 0.64458033 - samples/sec: 19.53 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,459 epoch 5 - iter 3/3 - loss 0.64046260 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,460 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:05,460 EPOCH 5 done: loss 0.6405 - lr 0.0200000\n",
+      "2021-09-21 19:39:05,461 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:39:05,462 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:05,540 epoch 6 - iter 1/3 - loss 0.64882904 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,596 epoch 6 - iter 2/3 - loss 0.63377529 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,655 epoch 6 - iter 3/3 - loss 0.62886645 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,656 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:05,656 EPOCH 6 done: loss 0.6289 - lr 0.0200000\n",
+      "2021-09-21 19:39:05,656 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:39:05,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:05,747 epoch 7 - iter 1/3 - loss 0.62523186 - samples/sec: 16.17 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,800 epoch 7 - iter 2/3 - loss 0.63840193 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,850 epoch 7 - iter 3/3 - loss 0.64369394 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,851 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:05,852 EPOCH 7 done: loss 0.6437 - lr 0.0200000\n",
+      "2021-09-21 19:39:05,852 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:39:05,854 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:05,936 epoch 8 - iter 1/3 - loss 0.65994215 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 19:39:05,998 epoch 8 - iter 2/3 - loss 0.64033630 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 19:39:06,062 epoch 8 - iter 3/3 - loss 0.63895323 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 19:39:06,063 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:06,064 EPOCH 8 done: loss 0.6390 - lr 0.0200000\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:39:06,064 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:39:06,066 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:06,154 epoch 9 - iter 1/3 - loss 0.62984776 - samples/sec: 16.53 - lr: 0.010000\n",
+      "2021-09-21 19:39:06,217 epoch 9 - iter 2/3 - loss 0.63265365 - samples/sec: 16.12 - lr: 0.010000\n",
+      "2021-09-21 19:39:06,276 epoch 9 - iter 3/3 - loss 0.63469551 - samples/sec: 17.21 - lr: 0.010000\n",
+      "2021-09-21 19:39:06,277 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:06,278 EPOCH 9 done: loss 0.6347 - lr 0.0100000\n",
+      "2021-09-21 19:39:06,278 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:39:06,280 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:06,364 epoch 10 - iter 1/3 - loss 0.64001757 - samples/sec: 17.01 - lr: 0.010000\n",
+      "2021-09-21 19:39:06,424 epoch 10 - iter 2/3 - loss 0.64553156 - samples/sec: 16.88 - lr: 0.010000\n",
+      "2021-09-21 19:39:06,485 epoch 10 - iter 3/3 - loss 0.63742010 - samples/sec: 16.67 - lr: 0.010000\n",
+      "2021-09-21 19:39:06,486 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:06,486 EPOCH 10 done: loss 0.6374 - lr 0.0100000\n",
+      "2021-09-21 19:39:06,487 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:39:13,862 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:39:44,899 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:55:04,364 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:39:49,331 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 16578.28it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 14529.92it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:04,366 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 10:55:04,551 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,553 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:39:49,334 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 19:39:49,352 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,354 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3118,24 +3124,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:04,553 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,554 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:55:04,554 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,554 Parameters:\n",
-      "2021-09-08 10:55:04,554  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:55:04,555  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:55:04,555  - patience: \"3\"\n",
-      "2021-09-08 10:55:04,555  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:55:04,556  - max_epochs: \"10\"\n",
-      "2021-09-08 10:55:04,556  - shuffle: \"True\"\n",
-      "2021-09-08 10:55:04,556  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:55:04,556  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:55:04,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,557 Model training base path: \"temp\"\n",
-      "2021-09-08 10:55:04,557 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,557 Device: cuda:0\n",
-      "2021-09-08 10:55:04,558 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,558 Embeddings storage mode: cpu\n"
+      "2021-09-21 19:39:49,355 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,355 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:39:49,356 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,356 Parameters:\n",
+      "2021-09-21 19:39:49,357  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:39:49,357  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:39:49,357  - patience: \"3\"\n",
+      "2021-09-21 19:39:49,358  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:39:49,358  - max_epochs: \"10\"\n",
+      "2021-09-21 19:39:49,358  - shuffle: \"True\"\n",
+      "2021-09-21 19:39:49,359  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:39:49,359  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:39:49,359 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,360 Model training base path: \"temp\"\n",
+      "2021-09-21 19:39:49,360 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,361 Device: cuda:0\n",
+      "2021-09-21 19:39:49,361 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,361 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:39:49,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,461 epoch 1 - iter 1/3 - loss 0.63543218 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 19:39:49,526 epoch 1 - iter 2/3 - loss 0.63907793 - samples/sec: 15.52 - lr: 0.020000\n"
      ]
     },
     {
@@ -3149,99 +3158,97 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:04,736 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,809 epoch 1 - iter 1/3 - loss 0.67470789 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 10:55:04,876 epoch 1 - iter 2/3 - loss 0.66782865 - samples/sec: 15.04 - lr: 0.020000\n",
-      "2021-09-08 10:55:04,938 epoch 1 - iter 3/3 - loss 0.65984356 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 10:55:04,940 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:04,940 EPOCH 1 done: loss 0.6598 - lr 0.0200000\n",
-      "2021-09-08 10:55:04,940 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:39:49,599 epoch 1 - iter 3/3 - loss 0.63449065 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 19:39:49,601 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:49,601 EPOCH 1 done: loss 0.6345 - lr 0.0200000\n",
+      "2021-09-21 19:39:49,601 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:11,583 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:11,667 epoch 2 - iter 1/3 - loss 0.63717777 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 10:55:11,726 epoch 2 - iter 2/3 - loss 0.64901516 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 10:55:11,782 epoch 2 - iter 3/3 - loss 0.64169302 - samples/sec: 17.86 - lr: 0.020000\n",
-      "2021-09-08 10:55:11,783 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:11,784 EPOCH 2 done: loss 0.6417 - lr 0.0200000\n",
-      "2021-09-08 10:55:11,784 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:13,200 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,287 epoch 3 - iter 1/3 - loss 0.62855095 - samples/sec: 13.46 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,346 epoch 3 - iter 2/3 - loss 0.64399624 - samples/sec: 17.17 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,406 epoch 3 - iter 3/3 - loss 0.64026219 - samples/sec: 16.75 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,407 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,408 EPOCH 3 done: loss 0.6403 - lr 0.0200000\n",
-      "2021-09-08 10:55:13,408 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:55:13,410 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,502 epoch 4 - iter 1/3 - loss 0.65306860 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,569 epoch 4 - iter 2/3 - loss 0.65163821 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,638 epoch 4 - iter 3/3 - loss 0.65165073 - samples/sec: 14.57 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,639 EPOCH 4 done: loss 0.6517 - lr 0.0200000\n",
-      "2021-09-08 10:55:13,639 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:55:13,642 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,717 epoch 5 - iter 1/3 - loss 0.62939376 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,761 epoch 5 - iter 2/3 - loss 0.64416912 - samples/sec: 22.70 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,835 epoch 5 - iter 3/3 - loss 0.64348900 - samples/sec: 13.60 - lr: 0.020000\n",
-      "2021-09-08 10:55:13,836 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,836 EPOCH 5 done: loss 0.6435 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:55:13,837 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:55:13,839 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:13,914 epoch 6 - iter 1/3 - loss 0.64949435 - samples/sec: 20.59 - lr: 0.010000\n",
-      "2021-09-08 10:55:13,988 epoch 6 - iter 2/3 - loss 0.63474157 - samples/sec: 13.63 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,049 epoch 6 - iter 3/3 - loss 0.63686609 - samples/sec: 16.48 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,050 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,051 EPOCH 6 done: loss 0.6369 - lr 0.0100000\n",
-      "2021-09-08 10:55:14,051 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:14,053 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,137 epoch 7 - iter 1/3 - loss 0.65361112 - samples/sec: 15.92 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,200 epoch 7 - iter 2/3 - loss 0.64459550 - samples/sec: 16.13 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,261 epoch 7 - iter 3/3 - loss 0.64315263 - samples/sec: 16.54 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,262 EPOCH 7 done: loss 0.6432 - lr 0.0100000\n",
-      "2021-09-08 10:55:14,263 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:55:14,265 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,344 epoch 8 - iter 1/3 - loss 0.61841142 - samples/sec: 16.41 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,391 epoch 8 - iter 2/3 - loss 0.64404029 - samples/sec: 21.45 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,453 epoch 8 - iter 3/3 - loss 0.63284196 - samples/sec: 16.39 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,454 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,454 EPOCH 8 done: loss 0.6328 - lr 0.0100000\n",
-      "2021-09-08 10:55:14,455 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:55:14,457 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,530 epoch 9 - iter 1/3 - loss 0.63595963 - samples/sec: 20.58 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,582 epoch 9 - iter 2/3 - loss 0.61755520 - samples/sec: 19.27 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,637 epoch 9 - iter 3/3 - loss 0.62194699 - samples/sec: 18.59 - lr: 0.010000\n",
-      "2021-09-08 10:55:14,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,638 EPOCH 9 done: loss 0.6219 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:55:14,639 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:55:14,641 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,710 epoch 10 - iter 1/3 - loss 0.63483024 - samples/sec: 22.57 - lr: 0.005000\n",
-      "2021-09-08 10:55:14,783 epoch 10 - iter 2/3 - loss 0.63319865 - samples/sec: 13.75 - lr: 0.005000\n",
-      "2021-09-08 10:55:14,833 epoch 10 - iter 3/3 - loss 0.63698677 - samples/sec: 20.22 - lr: 0.005000\n",
-      "2021-09-08 10:55:14,834 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:14,835 EPOCH 10 done: loss 0.6370 - lr 0.0050000\n",
-      "2021-09-08 10:55:14,835 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:23,017 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:55:39,179 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:39:53,487 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:53,595 epoch 2 - iter 1/3 - loss 0.64231402 - samples/sec: 12.57 - lr: 0.020000\n",
+      "2021-09-21 19:39:53,669 epoch 2 - iter 2/3 - loss 0.63878444 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 19:39:53,735 epoch 2 - iter 3/3 - loss 0.64683723 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 19:39:53,736 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:53,736 EPOCH 2 done: loss 0.6468 - lr 0.0200000\n",
+      "2021-09-21 19:39:53,737 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:39:57,503 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:57,597 epoch 3 - iter 1/3 - loss 0.63179636 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 19:39:57,669 epoch 3 - iter 2/3 - loss 0.63896665 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 19:39:57,748 epoch 3 - iter 3/3 - loss 0.63417502 - samples/sec: 12.73 - lr: 0.020000\n",
+      "2021-09-21 19:39:57,749 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:57,750 EPOCH 3 done: loss 0.6342 - lr 0.0200000\n",
+      "2021-09-21 19:39:57,750 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:39:57,758 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:57,864 epoch 4 - iter 1/3 - loss 0.63538104 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 19:39:57,937 epoch 4 - iter 2/3 - loss 0.63138628 - samples/sec: 13.73 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,006 epoch 4 - iter 3/3 - loss 0.63215629 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,007 EPOCH 4 done: loss 0.6322 - lr 0.0200000\n",
+      "2021-09-21 19:39:58,007 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:39:58,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,098 epoch 5 - iter 1/3 - loss 0.65251750 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,176 epoch 5 - iter 2/3 - loss 0.64084074 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,245 epoch 5 - iter 3/3 - loss 0.64233045 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,247 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,247 EPOCH 5 done: loss 0.6423 - lr 0.0200000\n",
+      "2021-09-21 19:39:58,248 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:39:58,250 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,341 epoch 6 - iter 1/3 - loss 0.62997466 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,411 epoch 6 - iter 2/3 - loss 0.63637787 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,482 epoch 6 - iter 3/3 - loss 0.64058594 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 19:39:58,483 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,484 EPOCH 6 done: loss 0.6406 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:39:58,484 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:39:58,486 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,578 epoch 7 - iter 1/3 - loss 0.62889594 - samples/sec: 15.31 - lr: 0.010000\n",
+      "2021-09-21 19:39:58,649 epoch 7 - iter 2/3 - loss 0.62611675 - samples/sec: 14.11 - lr: 0.010000\n",
+      "2021-09-21 19:39:58,714 epoch 7 - iter 3/3 - loss 0.62983070 - samples/sec: 15.65 - lr: 0.010000\n",
+      "2021-09-21 19:39:58,715 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,715 EPOCH 7 done: loss 0.6298 - lr 0.0100000\n",
+      "2021-09-21 19:39:58,716 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:39:58,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,804 epoch 8 - iter 1/3 - loss 0.63511223 - samples/sec: 15.41 - lr: 0.010000\n",
+      "2021-09-21 19:39:58,873 epoch 8 - iter 2/3 - loss 0.64528787 - samples/sec: 14.57 - lr: 0.010000\n",
+      "2021-09-21 19:39:58,947 epoch 8 - iter 3/3 - loss 0.63892976 - samples/sec: 13.68 - lr: 0.010000\n",
+      "2021-09-21 19:39:58,949 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:58,949 EPOCH 8 done: loss 0.6389 - lr 0.0100000\n",
+      "2021-09-21 19:39:58,949 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:39:58,951 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:59,044 epoch 9 - iter 1/3 - loss 0.65299302 - samples/sec: 14.84 - lr: 0.010000\n",
+      "2021-09-21 19:39:59,116 epoch 9 - iter 2/3 - loss 0.65517563 - samples/sec: 14.06 - lr: 0.010000\n",
+      "2021-09-21 19:39:59,187 epoch 9 - iter 3/3 - loss 0.64942777 - samples/sec: 14.06 - lr: 0.010000\n",
+      "2021-09-21 19:39:59,189 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:39:59,189 EPOCH 9 done: loss 0.6494 - lr 0.0100000\n",
+      "2021-09-21 19:39:59,189 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:40:05,049 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:05,113 epoch 10 - iter 1/3 - loss 0.66234791 - samples/sec: 20.03 - lr: 0.010000\n",
+      "2021-09-21 19:40:05,162 epoch 10 - iter 2/3 - loss 0.65490100 - samples/sec: 20.81 - lr: 0.010000\n",
+      "2021-09-21 19:40:05,210 epoch 10 - iter 3/3 - loss 0.64648022 - samples/sec: 21.07 - lr: 0.010000\n",
+      "2021-09-21 19:40:05,211 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:05,212 EPOCH 10 done: loss 0.6465 - lr 0.0100000\n",
+      "2021-09-21 19:40:05,212 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:40:21,086 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:40:49,477 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:55:43,361 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:40:54,224 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 19569.07it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 11607.85it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:43,363 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 10:55:43,371 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,373 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:40:54,227 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 19:40:54,265 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,267 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3554,32 +3561,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:43,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,374 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:55:43,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,375 Parameters:\n",
-      "2021-09-08 10:55:43,375  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:55:43,375  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:55:43,375  - patience: \"3\"\n",
-      "2021-09-08 10:55:43,376  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:55:43,376  - max_epochs: \"10\"\n",
-      "2021-09-08 10:55:43,376  - shuffle: \"True\"\n",
-      "2021-09-08 10:55:43,376  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:55:43,377  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:55:43,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,377 Model training base path: \"temp\"\n",
-      "2021-09-08 10:55:43,378 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,378 Device: cuda:0\n",
-      "2021-09-08 10:55:43,378 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,378 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:55:43,385 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,441 epoch 1 - iter 1/3 - loss 0.63684648 - samples/sec: 25.03 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,492 epoch 1 - iter 2/3 - loss 0.64001733 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,549 epoch 1 - iter 3/3 - loss 0.64407899 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,550 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,550 EPOCH 1 done: loss 0.6441 - lr 0.0200000\n",
-      "2021-09-08 10:55:43,551 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n"
+      "2021-09-21 19:40:54,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,268 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:40:54,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,268 Parameters:\n",
+      "2021-09-21 19:40:54,269  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:40:54,269  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:40:54,269  - patience: \"3\"\n",
+      "2021-09-21 19:40:54,270  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:40:54,270  - max_epochs: \"10\"\n",
+      "2021-09-21 19:40:54,270  - shuffle: \"True\"\n",
+      "2021-09-21 19:40:54,270  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:40:54,271  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:40:54,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,271 Model training base path: \"temp\"\n",
+      "2021-09-21 19:40:54,272 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,272 Device: cuda:0\n",
+      "2021-09-21 19:40:54,272 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,273 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:40:54,279 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,367 epoch 1 - iter 1/3 - loss 0.62585634 - samples/sec: 16.58 - lr: 0.020000\n"
      ]
     },
     {
@@ -3593,105 +3594,98 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:49,045 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:49,103 epoch 2 - iter 1/3 - loss 0.64325410 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,148 epoch 2 - iter 2/3 - loss 0.64400592 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,191 epoch 2 - iter 3/3 - loss 0.64377292 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,192 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:49,192 EPOCH 2 done: loss 0.6438 - lr 0.0200000\n",
-      "2021-09-08 10:55:49,192 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:49,355 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:49,410 epoch 3 - iter 1/3 - loss 0.62772530 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,453 epoch 3 - iter 2/3 - loss 0.64321178 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,496 epoch 3 - iter 3/3 - loss 0.63866291 - samples/sec: 23.52 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:49,497 EPOCH 3 done: loss 0.6387 - lr 0.0200000\n",
-      "2021-09-08 10:55:49,498 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:55:49,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:49,662 epoch 4 - iter 1/3 - loss 0.66376579 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,706 epoch 4 - iter 2/3 - loss 0.65165040 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,748 epoch 4 - iter 3/3 - loss 0.64519254 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 10:55:49,749 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:49,750 EPOCH 4 done: loss 0.6452 - lr 0.0200000\n",
-      "2021-09-08 10:55:49,750 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:40:54,434 epoch 1 - iter 2/3 - loss 0.64109391 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 19:40:54,506 epoch 1 - iter 3/3 - loss 0.64063636 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 19:40:54,507 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:54,508 EPOCH 1 done: loss 0.6406 - lr 0.0200000\n",
+      "2021-09-21 19:40:54,508 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:56,339 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:56,413 epoch 5 - iter 1/3 - loss 0.66491979 - samples/sec: 16.72 - lr: 0.020000\n",
-      "2021-09-08 10:55:56,475 epoch 5 - iter 2/3 - loss 0.65405425 - samples/sec: 16.47 - lr: 0.020000\n",
-      "2021-09-08 10:55:56,531 epoch 5 - iter 3/3 - loss 0.64650273 - samples/sec: 17.97 - lr: 0.020000\n",
-      "2021-09-08 10:55:56,532 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:56,532 EPOCH 5 done: loss 0.6465 - lr 0.0200000\n",
-      "2021-09-08 10:55:56,532 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:40:59,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:59,300 epoch 2 - iter 1/3 - loss 0.64406371 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 19:40:59,371 epoch 2 - iter 2/3 - loss 0.63421938 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 19:40:59,440 epoch 2 - iter 3/3 - loss 0.64083974 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 19:40:59,441 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:40:59,441 EPOCH 2 done: loss 0.6408 - lr 0.0200000\n",
+      "2021-09-21 19:40:59,442 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:56:01,926 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,012 epoch 6 - iter 1/3 - loss 0.61597270 - samples/sec: 14.90 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,069 epoch 6 - iter 2/3 - loss 0.63328826 - samples/sec: 17.85 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,114 epoch 6 - iter 3/3 - loss 0.63752065 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,115 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,115 EPOCH 6 done: loss 0.6375 - lr 0.0200000\n",
-      "2021-09-08 10:56:02,115 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:02,195 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,276 epoch 7 - iter 1/3 - loss 0.62377608 - samples/sec: 16.81 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,331 epoch 7 - iter 2/3 - loss 0.63246313 - samples/sec: 18.40 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,385 epoch 7 - iter 3/3 - loss 0.62618774 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,386 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,386 EPOCH 7 done: loss 0.6262 - lr 0.0200000\n",
-      "2021-09-08 10:56:02,387 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:56:02,463 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,555 epoch 8 - iter 1/3 - loss 0.64102626 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,614 epoch 8 - iter 2/3 - loss 0.63837677 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,684 epoch 8 - iter 3/3 - loss 0.63488388 - samples/sec: 14.40 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,685 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,685 EPOCH 8 done: loss 0.6349 - lr 0.0200000\n",
-      "2021-09-08 10:56:02,685 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:56:02,764 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,843 epoch 9 - iter 1/3 - loss 0.63098687 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,914 epoch 9 - iter 2/3 - loss 0.63242421 - samples/sec: 14.17 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,967 epoch 9 - iter 3/3 - loss 0.63471840 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 10:56:02,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:02,969 EPOCH 9 done: loss 0.6347 - lr 0.0200000\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:56:02,969 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:56:03,623 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:03,714 epoch 10 - iter 1/3 - loss 0.64387858 - samples/sec: 16.20 - lr: 0.010000\n",
-      "2021-09-08 10:56:03,761 epoch 10 - iter 2/3 - loss 0.64259657 - samples/sec: 21.29 - lr: 0.010000\n",
-      "2021-09-08 10:56:03,826 epoch 10 - iter 3/3 - loss 0.64209147 - samples/sec: 15.55 - lr: 0.010000\n",
-      "2021-09-08 10:56:03,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:03,828 EPOCH 10 done: loss 0.6421 - lr 0.0100000\n",
-      "2021-09-08 10:56:03,828 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:08,895 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:56:31,387 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:41:03,524 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:03,616 epoch 3 - iter 1/3 - loss 0.64011747 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 19:41:03,683 epoch 3 - iter 2/3 - loss 0.63300288 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 19:41:03,747 epoch 3 - iter 3/3 - loss 0.63584958 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 19:41:03,748 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:03,749 EPOCH 3 done: loss 0.6358 - lr 0.0200000\n",
+      "2021-09-21 19:41:03,749 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:41:03,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:03,849 epoch 4 - iter 1/3 - loss 0.64617431 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 19:41:03,918 epoch 4 - iter 2/3 - loss 0.64681983 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 19:41:03,991 epoch 4 - iter 3/3 - loss 0.64009519 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 19:41:03,992 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:03,992 EPOCH 4 done: loss 0.6401 - lr 0.0200000\n",
+      "2021-09-21 19:41:03,993 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:41:03,995 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:04,083 epoch 5 - iter 1/3 - loss 0.64173633 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 19:41:04,139 epoch 5 - iter 2/3 - loss 0.64119700 - samples/sec: 18.05 - lr: 0.020000\n",
+      "2021-09-21 19:41:04,199 epoch 5 - iter 3/3 - loss 0.64004310 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 19:41:04,200 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:04,200 EPOCH 5 done: loss 0.6400 - lr 0.0200000\n",
+      "2021-09-21 19:41:04,200 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:41:04,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:04,278 epoch 6 - iter 1/3 - loss 0.62048715 - samples/sec: 18.33 - lr: 0.020000\n",
+      "2021-09-21 19:41:04,334 epoch 6 - iter 2/3 - loss 0.63004798 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 19:41:04,395 epoch 6 - iter 3/3 - loss 0.63380309 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 19:41:04,396 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:04,396 EPOCH 6 done: loss 0.6338 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:41:04,397 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:41:04,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:04,489 epoch 7 - iter 1/3 - loss 0.64668721 - samples/sec: 14.74 - lr: 0.010000\n",
+      "2021-09-21 19:41:04,562 epoch 7 - iter 2/3 - loss 0.64756349 - samples/sec: 13.86 - lr: 0.010000\n",
+      "2021-09-21 19:41:04,634 epoch 7 - iter 3/3 - loss 0.64220432 - samples/sec: 13.97 - lr: 0.010000\n",
+      "2021-09-21 19:41:04,635 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:04,635 EPOCH 7 done: loss 0.6422 - lr 0.0100000\n",
+      "2021-09-21 19:41:04,635 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:41:08,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:08,904 epoch 8 - iter 1/3 - loss 0.64647472 - samples/sec: 15.18 - lr: 0.010000\n",
+      "2021-09-21 19:41:08,963 epoch 8 - iter 2/3 - loss 0.64125669 - samples/sec: 17.21 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,023 epoch 8 - iter 3/3 - loss 0.64131874 - samples/sec: 16.63 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,024 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:09,024 EPOCH 8 done: loss 0.6413 - lr 0.0100000\n",
+      "2021-09-21 19:41:09,025 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:41:09,027 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:09,126 epoch 9 - iter 1/3 - loss 0.61689943 - samples/sec: 17.62 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,189 epoch 9 - iter 2/3 - loss 0.63101643 - samples/sec: 15.97 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,258 epoch 9 - iter 3/3 - loss 0.64140155 - samples/sec: 14.60 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,259 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:09,259 EPOCH 9 done: loss 0.6414 - lr 0.0100000\n",
+      "2021-09-21 19:41:09,260 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:41:09,264 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:09,360 epoch 10 - iter 1/3 - loss 0.60718209 - samples/sec: 16.59 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,422 epoch 10 - iter 2/3 - loss 0.62347367 - samples/sec: 16.26 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,481 epoch 10 - iter 3/3 - loss 0.62381490 - samples/sec: 16.89 - lr: 0.010000\n",
+      "2021-09-21 19:41:09,482 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:09,483 EPOCH 10 done: loss 0.6238 - lr 0.0100000\n",
+      "2021-09-21 19:41:09,483 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:41:13,667 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:41:35,548 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:56:36,518 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 18209.71it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:56:36,519 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n"
+      "2021-09-21 19:41:39,681 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 3/3 [00:00<00:00, 10271.76it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:39,292 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,294 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:41:39,683 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 19:41:39,698 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,700 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4004,123 +3998,131 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:39,294 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,295 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:56:39,295 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,295 Parameters:\n",
-      "2021-09-08 10:56:39,296  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:56:39,296  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:56:39,296  - patience: \"3\"\n",
-      "2021-09-08 10:56:39,297  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:56:39,297  - max_epochs: \"10\"\n",
-      "2021-09-08 10:56:39,297  - shuffle: \"True\"\n",
-      "2021-09-08 10:56:39,297  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:56:39,298  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:56:39,298 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,298 Model training base path: \"temp\"\n",
-      "2021-09-08 10:56:39,298 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,299 Device: cuda:0\n",
-      "2021-09-08 10:56:39,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,299 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:56:40,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:40,222 epoch 1 - iter 1/3 - loss 0.67455381 - samples/sec: 18.73 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,280 epoch 1 - iter 2/3 - loss 0.65970537 - samples/sec: 17.49 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,338 epoch 1 - iter 3/3 - loss 0.64542254 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,339 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:40,339 EPOCH 1 done: loss 0.6454 - lr 0.0200000\n",
-      "2021-09-08 10:56:40,339 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:56:56,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:57,033 epoch 2 - iter 1/3 - loss 0.62392187 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,077 epoch 2 - iter 2/3 - loss 0.62870809 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,120 epoch 2 - iter 3/3 - loss 0.63509444 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,122 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:57,122 EPOCH 2 done: loss 0.6351 - lr 0.0200000\n",
-      "2021-09-08 10:56:57,122 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:58,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,268 epoch 3 - iter 1/3 - loss 0.63430113 - samples/sec: 17.38 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,323 epoch 3 - iter 2/3 - loss 0.62984845 - samples/sec: 18.20 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,376 epoch 3 - iter 3/3 - loss 0.63362507 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,377 EPOCH 3 done: loss 0.6336 - lr 0.0200000\n",
-      "2021-09-08 10:56:58,377 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:56:58,379 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,449 epoch 4 - iter 1/3 - loss 0.62434793 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,504 epoch 4 - iter 2/3 - loss 0.62559506 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,569 epoch 4 - iter 3/3 - loss 0.62420980 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,569 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,570 EPOCH 4 done: loss 0.6242 - lr 0.0200000\n",
-      "2021-09-08 10:56:58,570 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:56:58,578 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,650 epoch 5 - iter 1/3 - loss 0.61899221 - samples/sec: 16.94 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,708 epoch 5 - iter 2/3 - loss 0.62707123 - samples/sec: 17.45 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,763 epoch 5 - iter 3/3 - loss 0.62570584 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,764 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,765 EPOCH 5 done: loss 0.6257 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:56:58,765 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:56:58,767 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,851 epoch 6 - iter 1/3 - loss 0.62776732 - samples/sec: 16.63 - lr: 0.010000\n",
-      "2021-09-08 10:56:58,900 epoch 6 - iter 2/3 - loss 0.63587075 - samples/sec: 20.76 - lr: 0.010000\n",
-      "2021-09-08 10:56:58,952 epoch 6 - iter 3/3 - loss 0.63150905 - samples/sec: 19.21 - lr: 0.010000\n",
-      "2021-09-08 10:56:58,953 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,954 EPOCH 6 done: loss 0.6315 - lr 0.0100000\n",
-      "2021-09-08 10:56:58,954 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:58,956 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,031 epoch 7 - iter 1/3 - loss 0.63964111 - samples/sec: 16.42 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,090 epoch 7 - iter 2/3 - loss 0.64407784 - samples/sec: 16.86 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,150 epoch 7 - iter 3/3 - loss 0.64422421 - samples/sec: 16.83 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,152 EPOCH 7 done: loss 0.6442 - lr 0.0100000\n",
-      "2021-09-08 10:56:59,152 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:56:59,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,233 epoch 8 - iter 1/3 - loss 0.66986418 - samples/sec: 16.66 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,290 epoch 8 - iter 2/3 - loss 0.65326747 - samples/sec: 17.55 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,342 epoch 8 - iter 3/3 - loss 0.65710318 - samples/sec: 19.34 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,343 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,343 EPOCH 8 done: loss 0.6571 - lr 0.0100000\n",
-      "2021-09-08 10:56:59,344 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:57:04,424 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:04,483 epoch 9 - iter 1/3 - loss 0.61492258 - samples/sec: 22.70 - lr: 0.010000\n",
-      "2021-09-08 10:57:04,526 epoch 9 - iter 2/3 - loss 0.62749350 - samples/sec: 23.16 - lr: 0.010000\n",
-      "2021-09-08 10:57:04,570 epoch 9 - iter 3/3 - loss 0.62629650 - samples/sec: 22.97 - lr: 0.010000\n",
-      "2021-09-08 10:57:04,571 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:04,572 EPOCH 9 done: loss 0.6263 - lr 0.0100000\n",
-      "2021-09-08 10:57:04,572 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:57:04,652 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:04,707 epoch 10 - iter 1/3 - loss 0.63075346 - samples/sec: 23.18 - lr: 0.010000\n",
-      "2021-09-08 10:57:04,751 epoch 10 - iter 2/3 - loss 0.63375399 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 10:57:04,797 epoch 10 - iter 3/3 - loss 0.63253170 - samples/sec: 21.84 - lr: 0.010000\n",
-      "2021-09-08 10:57:04,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:04,799 EPOCH 10 done: loss 0.6325 - lr 0.0100000\n",
-      "2021-09-08 10:57:04,799 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:57:13,229 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:57:36,826 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
-      "init TARS\n"
+      "2021-09-21 19:41:39,701 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,701 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:41:39,701 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,702 Parameters:\n",
+      "2021-09-21 19:41:39,702  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:41:39,702  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:41:39,702  - patience: \"3\"\n",
+      "2021-09-21 19:41:39,703  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:41:39,703  - max_epochs: \"10\"\n",
+      "2021-09-21 19:41:39,703  - shuffle: \"True\"\n",
+      "2021-09-21 19:41:39,704  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:41:39,704  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:41:39,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,705 Model training base path: \"temp\"\n",
+      "2021-09-21 19:41:39,705 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,705 Device: cuda:0\n",
+      "2021-09-21 19:41:39,705 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,706 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:41:39,712 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,793 epoch 1 - iter 1/3 - loss 0.63191801 - samples/sec: 19.26 - lr: 0.020000\n",
+      "2021-09-21 19:41:39,849 epoch 1 - iter 2/3 - loss 0.63300228 - samples/sec: 18.87 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:40,970 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:41:39,903 epoch 1 - iter 3/3 - loss 0.63257456 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 19:41:39,904 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:39,904 EPOCH 1 done: loss 0.6326 - lr 0.0200000\n",
+      "2021-09-21 19:41:39,905 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:41:43,892 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:43,983 epoch 2 - iter 1/3 - loss 0.63040334 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 19:41:44,043 epoch 2 - iter 2/3 - loss 0.63017318 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 19:41:44,107 epoch 2 - iter 3/3 - loss 0.63237951 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 19:41:44,108 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:44,108 EPOCH 2 done: loss 0.6324 - lr 0.0200000\n",
+      "2021-09-21 19:41:44,109 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:41:44,112 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:44,193 epoch 3 - iter 1/3 - loss 0.61990434 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 19:41:44,253 epoch 3 - iter 2/3 - loss 0.63433784 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 19:41:44,315 epoch 3 - iter 3/3 - loss 0.64436915 - samples/sec: 16.23 - lr: 0.020000\n",
+      "2021-09-21 19:41:44,316 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:44,316 EPOCH 3 done: loss 0.6444 - lr 0.0200000\n",
+      "2021-09-21 19:41:44,317 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:41:50,869 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:50,949 epoch 4 - iter 1/3 - loss 0.64728302 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 19:41:51,010 epoch 4 - iter 2/3 - loss 0.64057714 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 19:41:51,068 epoch 4 - iter 3/3 - loss 0.63834083 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 19:41:51,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:51,070 EPOCH 4 done: loss 0.6383 - lr 0.0200000\n",
+      "2021-09-21 19:41:51,070 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:41:51,072 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:51,161 epoch 5 - iter 1/3 - loss 0.65488887 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 19:41:51,221 epoch 5 - iter 2/3 - loss 0.65224484 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 19:41:51,285 epoch 5 - iter 3/3 - loss 0.65274930 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 19:41:51,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:51,287 EPOCH 5 done: loss 0.6527 - lr 0.0200000\n",
+      "2021-09-21 19:41:51,287 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:41:55,253 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,327 epoch 6 - iter 1/3 - loss 0.65314770 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,381 epoch 6 - iter 2/3 - loss 0.64166895 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,436 epoch 6 - iter 3/3 - loss 0.63883444 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,438 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,438 EPOCH 6 done: loss 0.6388 - lr 0.0200000\n",
+      "2021-09-21 19:41:55,438 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:41:55,440 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,509 epoch 7 - iter 1/3 - loss 0.63280225 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,562 epoch 7 - iter 2/3 - loss 0.63835615 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,617 epoch 7 - iter 3/3 - loss 0.63175988 - samples/sec: 18.62 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,618 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,618 EPOCH 7 done: loss 0.6318 - lr 0.0200000\n",
+      "2021-09-21 19:41:55,618 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:41:55,620 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,689 epoch 8 - iter 1/3 - loss 0.64183599 - samples/sec: 18.55 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,743 epoch 8 - iter 2/3 - loss 0.62503511 - samples/sec: 18.69 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,796 epoch 8 - iter 3/3 - loss 0.63634183 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,797 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,798 EPOCH 8 done: loss 0.6363 - lr 0.0200000\n",
+      "2021-09-21 19:41:55,798 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:41:55,800 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,868 epoch 9 - iter 1/3 - loss 0.65034312 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,923 epoch 9 - iter 2/3 - loss 0.63556284 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,976 epoch 9 - iter 3/3 - loss 0.63841428 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 19:41:55,977 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:55,978 EPOCH 9 done: loss 0.6384 - lr 0.0200000\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:41:55,978 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:41:55,980 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:56,048 epoch 10 - iter 1/3 - loss 0.63052577 - samples/sec: 18.64 - lr: 0.010000\n",
+      "2021-09-21 19:41:56,103 epoch 10 - iter 2/3 - loss 0.63614395 - samples/sec: 18.56 - lr: 0.010000\n",
+      "2021-09-21 19:41:56,157 epoch 10 - iter 3/3 - loss 0.63785062 - samples/sec: 18.58 - lr: 0.010000\n",
+      "2021-09-21 19:41:56,158 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:41:56,158 EPOCH 10 done: loss 0.6379 - lr 0.0100000\n",
+      "2021-09-21 19:41:56,159 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:42:00,391 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:42:15,651 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "init TARS\n",
+      "2021-09-21 19:42:19,761 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 18183.40it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 18921.67it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:40,972 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
-      "2021-09-08 10:57:40,981 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:40,982 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:42:19,763 [b'This text entails a positive sentiment', b'This text entails a neutral sentiment', b'This text entails a negative sentiment']\n",
+      "2021-09-21 19:42:19,774 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,776 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4433,27 +4435,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:40,983 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:40,983 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:57:40,983 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:40,984 Parameters:\n",
-      "2021-09-08 10:57:40,984  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:57:40,984  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:57:40,985  - patience: \"3\"\n",
-      "2021-09-08 10:57:40,985  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:57:40,985  - max_epochs: \"10\"\n",
-      "2021-09-08 10:57:40,985  - shuffle: \"True\"\n",
-      "2021-09-08 10:57:40,986  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:57:40,986  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:57:40,986 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:40,987 Model training base path: \"temp\"\n",
-      "2021-09-08 10:57:40,987 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:40,987 Device: cuda:0\n",
-      "2021-09-08 10:57:40,987 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:40,988 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:57:40,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:41,053 epoch 1 - iter 1/3 - loss 0.62604290 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 10:57:41,127 epoch 1 - iter 2/3 - loss 0.63218904 - samples/sec: 13.71 - lr: 0.020000\n"
+      "2021-09-21 19:42:19,776 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,777 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:42:19,777 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,777 Parameters:\n",
+      "2021-09-21 19:42:19,778  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:42:19,778  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:42:19,778  - patience: \"3\"\n",
+      "2021-09-21 19:42:19,779  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:42:19,779  - max_epochs: \"10\"\n",
+      "2021-09-21 19:42:19,779  - shuffle: \"True\"\n",
+      "2021-09-21 19:42:19,779  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:42:19,780  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:42:19,780 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,780 Model training base path: \"temp\"\n",
+      "2021-09-21 19:42:19,780 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,781 Device: cuda:0\n",
+      "2021-09-21 19:42:19,781 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,781 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:42:19,788 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,839 epoch 1 - iter 1/3 - loss 0.62438768 - samples/sec: 25.93 - lr: 0.020000\n",
+      "2021-09-21 19:42:19,885 epoch 1 - iter 2/3 - loss 0.63541502 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 19:42:19,932 epoch 1 - iter 3/3 - loss 0.64113782 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 19:42:19,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:19,933 EPOCH 1 done: loss 0.6411 - lr 0.0200000\n",
+      "2021-09-21 19:42:19,934 BAD EPOCHS (no improvement): 0\n"
      ]
     },
     {
@@ -4467,79 +4473,75 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:41,184 epoch 1 - iter 3/3 - loss 0.63537016 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 10:57:41,185 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:41,186 EPOCH 1 done: loss 0.6354 - lr 0.0200000\n",
-      "2021-09-08 10:57:41,186 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:57:47,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:47,158 epoch 2 - iter 1/3 - loss 0.61542362 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 10:57:47,221 epoch 2 - iter 2/3 - loss 0.62419003 - samples/sec: 16.21 - lr: 0.020000\n",
-      "2021-09-08 10:57:47,280 epoch 2 - iter 3/3 - loss 0.63296046 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 10:57:47,281 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:47,282 EPOCH 2 done: loss 0.6330 - lr 0.0200000\n",
-      "2021-09-08 10:57:47,282 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:57:47,835 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:47,924 epoch 3 - iter 1/3 - loss 0.62887555 - samples/sec: 17.38 - lr: 0.020000\n",
-      "2021-09-08 10:57:47,971 epoch 3 - iter 2/3 - loss 0.62639633 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 10:57:48,026 epoch 3 - iter 3/3 - loss 0.62949439 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 10:57:48,027 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:48,027 EPOCH 3 done: loss 0.6295 - lr 0.0200000\n",
-      "2021-09-08 10:57:48,027 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:57:48,031 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:48,110 epoch 4 - iter 1/3 - loss 0.64181244 - samples/sec: 17.37 - lr: 0.020000\n",
-      "2021-09-08 10:57:48,166 epoch 4 - iter 2/3 - loss 0.64944169 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 10:57:48,209 epoch 4 - iter 3/3 - loss 0.64314018 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 10:57:48,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:48,211 EPOCH 4 done: loss 0.6431 - lr 0.0200000\n",
-      "2021-09-08 10:57:48,211 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:57:52,257 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,314 epoch 5 - iter 1/3 - loss 0.64844400 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,357 epoch 5 - iter 2/3 - loss 0.65474498 - samples/sec: 23.52 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,403 epoch 5 - iter 3/3 - loss 0.65170693 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 10:57:52,404 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:52,404 EPOCH 5 done: loss 0.6517 - lr 0.0200000\n",
-      "2021-09-08 10:57:52,404 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:42:23,787 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:23,859 epoch 2 - iter 1/3 - loss 0.64540356 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 19:42:23,917 epoch 2 - iter 2/3 - loss 0.65144509 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 19:42:23,970 epoch 2 - iter 3/3 - loss 0.64980664 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 19:42:23,971 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:23,971 EPOCH 2 done: loss 0.6498 - lr 0.0200000\n",
+      "2021-09-21 19:42:23,972 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:00,750 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:00,809 epoch 6 - iter 1/3 - loss 0.63796896 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 10:58:00,853 epoch 6 - iter 2/3 - loss 0.64207429 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 10:58:00,896 epoch 6 - iter 3/3 - loss 0.63859441 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 10:58:00,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:00,897 EPOCH 6 done: loss 0.6386 - lr 0.0200000\n",
-      "2021-09-08 10:58:00,898 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:04,482 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:04,540 epoch 7 - iter 1/3 - loss 0.63850302 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,586 epoch 7 - iter 2/3 - loss 0.64143637 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,631 epoch 7 - iter 3/3 - loss 0.63691660 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 10:58:04,632 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:04,632 EPOCH 7 done: loss 0.6369 - lr 0.0200000\n",
-      "2021-09-08 10:58:04,633 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:07,054 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,112 epoch 8 - iter 1/3 - loss 0.63792950 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,181 epoch 8 - iter 2/3 - loss 0.63239560 - samples/sec: 14.68 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,244 epoch 8 - iter 3/3 - loss 0.63332621 - samples/sec: 15.79 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,246 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,246 EPOCH 8 done: loss 0.6333 - lr 0.0200000\n",
-      "2021-09-08 10:58:07,246 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:07,249 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,325 epoch 9 - iter 1/3 - loss 0.63462812 - samples/sec: 16.01 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,388 epoch 9 - iter 2/3 - loss 0.63708979 - samples/sec: 15.95 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,456 epoch 9 - iter 3/3 - loss 0.63262906 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,457 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,457 EPOCH 9 done: loss 0.6326 - lr 0.0200000\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:58:07,458 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:58:07,460 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,525 epoch 10 - iter 1/3 - loss 0.62578100 - samples/sec: 22.76 - lr: 0.010000\n",
-      "2021-09-08 10:58:07,573 epoch 10 - iter 2/3 - loss 0.63230777 - samples/sec: 21.41 - lr: 0.010000\n",
-      "2021-09-08 10:58:07,620 epoch 10 - iter 3/3 - loss 0.62718878 - samples/sec: 21.45 - lr: 0.010000\n",
-      "2021-09-08 10:58:07,621 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,622 EPOCH 10 done: loss 0.6272 - lr 0.0100000\n",
-      "2021-09-08 10:58:07,622 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:18,644 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.34199203187251\n"
+      "2021-09-21 19:42:28,026 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,100 epoch 3 - iter 1/3 - loss 0.64427328 - samples/sec: 18.43 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,156 epoch 3 - iter 2/3 - loss 0.62811759 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,214 epoch 3 - iter 3/3 - loss 0.63009743 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,215 EPOCH 3 done: loss 0.6301 - lr 0.0200000\n",
+      "2021-09-21 19:42:28,215 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:42:28,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,301 epoch 4 - iter 1/3 - loss 0.63770109 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,354 epoch 4 - iter 2/3 - loss 0.63253608 - samples/sec: 18.77 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,407 epoch 4 - iter 3/3 - loss 0.63290401 - samples/sec: 19.10 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,408 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,409 EPOCH 4 done: loss 0.6329 - lr 0.0200000\n",
+      "2021-09-21 19:42:28,409 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:42:28,411 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,483 epoch 5 - iter 1/3 - loss 0.62483567 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,541 epoch 5 - iter 2/3 - loss 0.63379642 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,599 epoch 5 - iter 3/3 - loss 0.63401248 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,600 EPOCH 5 done: loss 0.6340 - lr 0.0200000\n",
+      "2021-09-21 19:42:28,601 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:42:28,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,675 epoch 6 - iter 1/3 - loss 0.62813467 - samples/sec: 19.92 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,725 epoch 6 - iter 2/3 - loss 0.63471639 - samples/sec: 20.09 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,773 epoch 6 - iter 3/3 - loss 0.63652458 - samples/sec: 20.98 - lr: 0.020000\n",
+      "2021-09-21 19:42:28,774 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,774 EPOCH 6 done: loss 0.6365 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:42:28,775 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:42:28,921 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:28,997 epoch 7 - iter 1/3 - loss 0.64446276 - samples/sec: 18.54 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,049 epoch 7 - iter 2/3 - loss 0.63128459 - samples/sec: 19.24 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,100 epoch 7 - iter 3/3 - loss 0.62821110 - samples/sec: 19.67 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,101 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:29,102 EPOCH 7 done: loss 0.6282 - lr 0.0100000\n",
+      "2021-09-21 19:42:29,102 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:42:29,104 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:29,186 epoch 8 - iter 1/3 - loss 0.63477981 - samples/sec: 17.41 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,244 epoch 8 - iter 2/3 - loss 0.63956359 - samples/sec: 17.32 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,296 epoch 8 - iter 3/3 - loss 0.64415328 - samples/sec: 19.42 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,297 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:29,298 EPOCH 8 done: loss 0.6442 - lr 0.0100000\n",
+      "2021-09-21 19:42:29,298 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:42:29,300 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:29,378 epoch 9 - iter 1/3 - loss 0.63623923 - samples/sec: 18.02 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,432 epoch 9 - iter 2/3 - loss 0.63473681 - samples/sec: 18.51 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,487 epoch 9 - iter 3/3 - loss 0.64507715 - samples/sec: 18.50 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:29,488 EPOCH 9 done: loss 0.6451 - lr 0.0100000\n",
+      "2021-09-21 19:42:29,488 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:42:29,490 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:29,568 epoch 10 - iter 1/3 - loss 0.65008759 - samples/sec: 19.68 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,635 epoch 10 - iter 2/3 - loss 0.64846560 - samples/sec: 14.89 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,695 epoch 10 - iter 3/3 - loss 0.64891410 - samples/sec: 16.88 - lr: 0.010000\n",
+      "2021-09-21 19:42:29,696 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:29,696 EPOCH 10 done: loss 0.6489 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:42:29,697 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:42:33,787 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.34677290836653385\n"
      ]
     }
    ],
@@ -4607,11 +4609,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "86988d74",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.32669322709163345, 0.34422310756972113, 0.3593625498007968, 0.3561752988047809, 0.34741035856573704]\n",
+      "0.011463031399747853\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -4623,7 +4637,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -4631,25 +4645,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:37,478 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:42:55,842 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:58:42,619 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:42:59,827 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 18504.28it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 16173.41it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:42,621 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
-      "2021-09-08 10:58:42,635 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,636 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:42:59,828 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 19:42:59,837 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:59,839 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4962,27 +4976,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:42,637 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,637 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:58:42,637 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,638 Parameters:\n",
-      "2021-09-08 10:58:42,638  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:58:42,638  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:58:42,638  - patience: \"3\"\n",
-      "2021-09-08 10:58:42,639  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:58:42,639  - max_epochs: \"10\"\n",
-      "2021-09-08 10:58:42,639  - shuffle: \"True\"\n",
-      "2021-09-08 10:58:42,640  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:58:42,640  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:58:42,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,640 Model training base path: \"temp\"\n",
-      "2021-09-08 10:58:42,641 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,641 Device: cuda:0\n",
-      "2021-09-08 10:58:42,641 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,642 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:58:42,648 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,723 epoch 1 - iter 1/3 - loss 0.62035543 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 10:58:42,796 epoch 1 - iter 2/3 - loss 0.63529897 - samples/sec: 13.66 - lr: 0.020000\n"
+      "2021-09-21 19:42:59,839 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:59,839 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:42:59,840 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:59,840 Parameters:\n",
+      "2021-09-21 19:42:59,840  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:42:59,841  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:42:59,841  - patience: \"3\"\n",
+      "2021-09-21 19:42:59,841  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:42:59,841  - max_epochs: \"10\"\n",
+      "2021-09-21 19:42:59,842  - shuffle: \"True\"\n",
+      "2021-09-21 19:42:59,842  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:42:59,842  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:42:59,843 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:59,843 Model training base path: \"temp\"\n",
+      "2021-09-21 19:42:59,843 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:59,843 Device: cuda:0\n",
+      "2021-09-21 19:42:59,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:59,844 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:42:59,850 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:42:59,914 epoch 1 - iter 1/3 - loss 0.65942693 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 19:42:59,970 epoch 1 - iter 2/3 - loss 0.65397570 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 19:43:00,020 epoch 1 - iter 3/3 - loss 0.64643180 - samples/sec: 20.23 - lr: 0.020000\n",
+      "2021-09-21 19:43:00,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:00,021 EPOCH 1 done: loss 0.6464 - lr 0.0200000\n",
+      "2021-09-21 19:43:00,021 BAD EPOCHS (no improvement): 0\n"
      ]
     },
     {
@@ -4996,109 +5014,93 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:42,847 epoch 1 - iter 3/3 - loss 0.63681684 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 10:58:42,848 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:42,848 EPOCH 1 done: loss 0.6368 - lr 0.0200000\n",
-      "2021-09-08 10:58:42,849 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:49,952 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:50,038 epoch 2 - iter 1/3 - loss 0.62670594 - samples/sec: 17.26 - lr: 0.020000\n",
-      "2021-09-08 10:58:50,090 epoch 2 - iter 2/3 - loss 0.63008732 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:58:50,149 epoch 2 - iter 3/3 - loss 0.63382618 - samples/sec: 16.91 - lr: 0.020000\n",
-      "2021-09-08 10:58:50,150 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:50,151 EPOCH 2 done: loss 0.6338 - lr 0.0200000\n",
-      "2021-09-08 10:58:50,151 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:54,193 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:54,272 epoch 3 - iter 1/3 - loss 0.63696557 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 10:58:54,333 epoch 3 - iter 2/3 - loss 0.63197738 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 10:58:54,396 epoch 3 - iter 3/3 - loss 0.63583330 - samples/sec: 16.11 - lr: 0.020000\n",
-      "2021-09-08 10:58:54,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:54,397 EPOCH 3 done: loss 0.6358 - lr 0.0200000\n",
-      "2021-09-08 10:58:54,398 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:54,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:54,546 epoch 4 - iter 1/3 - loss 0.67968017 - samples/sec: 18.86 - lr: 0.020000\n",
-      "2021-09-08 10:58:54,603 epoch 4 - iter 2/3 - loss 0.66075796 - samples/sec: 17.78 - lr: 0.020000\n",
-      "2021-09-08 10:58:54,649 epoch 4 - iter 3/3 - loss 0.64917368 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 10:58:54,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:54,650 EPOCH 4 done: loss 0.6492 - lr 0.0200000\n",
-      "2021-09-08 10:58:54,651 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:43:04,504 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:04,576 epoch 2 - iter 1/3 - loss 0.65769929 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 19:43:04,626 epoch 2 - iter 2/3 - loss 0.64711726 - samples/sec: 20.22 - lr: 0.020000\n",
+      "2021-09-21 19:43:04,679 epoch 2 - iter 3/3 - loss 0.64448833 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 19:43:04,680 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:04,681 EPOCH 2 done: loss 0.6445 - lr 0.0200000\n",
+      "2021-09-21 19:43:04,681 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:43:04,683 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:04,754 epoch 3 - iter 1/3 - loss 0.63204128 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 19:43:04,804 epoch 3 - iter 2/3 - loss 0.63287437 - samples/sec: 20.15 - lr: 0.020000\n",
+      "2021-09-21 19:43:04,852 epoch 3 - iter 3/3 - loss 0.63199633 - samples/sec: 21.10 - lr: 0.020000\n",
+      "2021-09-21 19:43:04,853 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:04,853 EPOCH 3 done: loss 0.6320 - lr 0.0200000\n",
+      "2021-09-21 19:43:04,854 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:43:04,856 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:04,932 epoch 4 - iter 1/3 - loss 0.64280230 - samples/sec: 17.94 - lr: 0.020000\n",
+      "2021-09-21 19:43:04,990 epoch 4 - iter 2/3 - loss 0.63476288 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 19:43:05,040 epoch 4 - iter 3/3 - loss 0.63191162 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 19:43:05,041 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:05,041 EPOCH 4 done: loss 0.6319 - lr 0.0200000\n",
+      "2021-09-21 19:43:05,041 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:43:05,045 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:05,121 epoch 5 - iter 1/3 - loss 0.64624077 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 19:43:05,173 epoch 5 - iter 2/3 - loss 0.63646001 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 19:43:05,227 epoch 5 - iter 3/3 - loss 0.63703726 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 19:43:05,228 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:05,228 EPOCH 5 done: loss 0.6370 - lr 0.0200000\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:43:05,229 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:43:05,231 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:05,303 epoch 6 - iter 1/3 - loss 0.62049270 - samples/sec: 20.81 - lr: 0.010000\n",
+      "2021-09-21 19:43:05,351 epoch 6 - iter 2/3 - loss 0.62148574 - samples/sec: 21.03 - lr: 0.010000\n",
+      "2021-09-21 19:43:05,406 epoch 6 - iter 3/3 - loss 0.62231765 - samples/sec: 18.33 - lr: 0.010000\n",
+      "2021-09-21 19:43:05,407 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:05,407 EPOCH 6 done: loss 0.6223 - lr 0.0100000\n",
+      "2021-09-21 19:43:05,408 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:43:05,410 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:05,478 epoch 7 - iter 1/3 - loss 0.61804569 - samples/sec: 18.96 - lr: 0.010000\n",
+      "2021-09-21 19:43:05,531 epoch 7 - iter 2/3 - loss 0.64193982 - samples/sec: 19.02 - lr: 0.010000\n",
+      "2021-09-21 19:43:05,587 epoch 7 - iter 3/3 - loss 0.65209299 - samples/sec: 18.22 - lr: 0.010000\n",
+      "2021-09-21 19:43:05,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:05,588 EPOCH 7 done: loss 0.6521 - lr 0.0100000\n",
+      "2021-09-21 19:43:05,588 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:58,756 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:58,848 epoch 5 - iter 1/3 - loss 0.62864602 - samples/sec: 13.51 - lr: 0.020000\n",
-      "2021-09-08 10:58:58,902 epoch 5 - iter 2/3 - loss 0.63439879 - samples/sec: 18.53 - lr: 0.020000\n",
-      "2021-09-08 10:58:58,963 epoch 5 - iter 3/3 - loss 0.63457000 - samples/sec: 16.46 - lr: 0.020000\n",
-      "2021-09-08 10:58:58,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:58,965 EPOCH 5 done: loss 0.6346 - lr 0.0200000\n",
-      "2021-09-08 10:58:58,965 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:58,967 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:59,040 epoch 6 - iter 1/3 - loss 0.63164526 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,089 epoch 6 - iter 2/3 - loss 0.62726223 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,147 epoch 6 - iter 3/3 - loss 0.62948944 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,148 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:59,148 EPOCH 6 done: loss 0.6295 - lr 0.0200000\n",
-      "2021-09-08 10:58:59,149 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:59,151 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:59,226 epoch 7 - iter 1/3 - loss 0.62795991 - samples/sec: 16.57 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,290 epoch 7 - iter 2/3 - loss 0.62430811 - samples/sec: 15.76 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,352 epoch 7 - iter 3/3 - loss 0.63432656 - samples/sec: 16.23 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,353 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:59,353 EPOCH 7 done: loss 0.6343 - lr 0.0200000\n",
-      "2021-09-08 10:58:59,353 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:59,355 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:59,430 epoch 8 - iter 1/3 - loss 0.66723317 - samples/sec: 16.16 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,478 epoch 8 - iter 2/3 - loss 0.65131015 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,543 epoch 8 - iter 3/3 - loss 0.65184663 - samples/sec: 15.48 - lr: 0.020000\n",
-      "2021-09-08 10:58:59,544 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:59,544 EPOCH 8 done: loss 0.6518 - lr 0.0200000\n",
-      "2021-09-08 10:58:59,545 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:43:09,589 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:09,665 epoch 8 - iter 1/3 - loss 0.67169094 - samples/sec: 18.75 - lr: 0.010000\n",
+      "2021-09-21 19:43:09,714 epoch 8 - iter 2/3 - loss 0.66228235 - samples/sec: 20.67 - lr: 0.010000\n",
+      "2021-09-21 19:43:09,761 epoch 8 - iter 3/3 - loss 0.65820577 - samples/sec: 21.53 - lr: 0.010000\n",
+      "2021-09-21 19:43:09,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:09,762 EPOCH 8 done: loss 0.6582 - lr 0.0100000\n",
+      "2021-09-21 19:43:09,763 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:59:03,477 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:03,550 epoch 9 - iter 1/3 - loss 0.64548081 - samples/sec: 18.52 - lr: 0.020000\n",
-      "2021-09-08 10:59:03,601 epoch 9 - iter 2/3 - loss 0.63402402 - samples/sec: 19.59 - lr: 0.020000\n",
-      "2021-09-08 10:59:03,666 epoch 9 - iter 3/3 - loss 0.63559045 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 10:59:03,667 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:03,667 EPOCH 9 done: loss 0.6356 - lr 0.0200000\n",
-      "2021-09-08 10:59:03,667 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:03,669 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:03,744 epoch 10 - iter 1/3 - loss 0.66729969 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 10:59:03,807 epoch 10 - iter 2/3 - loss 0.65644598 - samples/sec: 15.94 - lr: 0.020000\n",
-      "2021-09-08 10:59:03,855 epoch 10 - iter 3/3 - loss 0.64871997 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 10:59:03,856 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:03,857 EPOCH 10 done: loss 0.6487 - lr 0.0200000\n",
-      "2021-09-08 10:59:03,857 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:07,759 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:59:22,095 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:43:17,550 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,627 epoch 9 - iter 1/3 - loss 0.65790945 - samples/sec: 15.99 - lr: 0.010000\n",
+      "2021-09-21 19:43:17,693 epoch 9 - iter 2/3 - loss 0.64967164 - samples/sec: 15.39 - lr: 0.010000\n",
+      "2021-09-21 19:43:17,751 epoch 9 - iter 3/3 - loss 0.65402067 - samples/sec: 17.16 - lr: 0.010000\n",
+      "2021-09-21 19:43:17,752 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,753 EPOCH 9 done: loss 0.6540 - lr 0.0100000\n",
+      "2021-09-21 19:43:17,753 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:43:17,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,832 epoch 10 - iter 1/3 - loss 0.61726367 - samples/sec: 17.93 - lr: 0.010000\n",
+      "2021-09-21 19:43:17,889 epoch 10 - iter 2/3 - loss 0.62261432 - samples/sec: 17.83 - lr: 0.010000\n",
+      "2021-09-21 19:43:17,946 epoch 10 - iter 3/3 - loss 0.62630258 - samples/sec: 17.55 - lr: 0.010000\n",
+      "2021-09-21 19:43:17,947 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:17,948 EPOCH 10 done: loss 0.6263 - lr 0.0100000\n",
+      "2021-09-21 19:43:17,948 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:43:29,336 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:43:44,136 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:59:26,475 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 18131.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:59:26,476 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n"
+      "2021-09-21 19:43:49,010 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 3/3 [00:00<00:00, 19508.39it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:28,631 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,633 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:43:49,012 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 19:43:49,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,148 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5411,119 +5413,130 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:28,633 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,633 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:59:28,634 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,634 Parameters:\n",
-      "2021-09-08 10:59:28,634  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:59:28,635  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:59:28,635  - patience: \"3\"\n",
-      "2021-09-08 10:59:28,635  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:59:28,635  - max_epochs: \"10\"\n",
-      "2021-09-08 10:59:28,636  - shuffle: \"True\"\n",
-      "2021-09-08 10:59:28,636  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:59:28,636  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:59:28,637 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,637 Model training base path: \"temp\"\n",
-      "2021-09-08 10:59:28,637 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,637 Device: cuda:0\n",
-      "2021-09-08 10:59:28,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,638 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:59:28,689 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,767 epoch 1 - iter 1/3 - loss 0.62661690 - samples/sec: 19.45 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,818 epoch 1 - iter 2/3 - loss 0.63240188 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,890 epoch 1 - iter 3/3 - loss 0.63267430 - samples/sec: 13.82 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,891 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,892 EPOCH 1 done: loss 0.6327 - lr 0.0200000\n",
-      "2021-09-08 10:59:28,892 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:59:35,135 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:35,193 epoch 2 - iter 1/3 - loss 0.63987184 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 10:59:35,238 epoch 2 - iter 2/3 - loss 0.63440242 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 10:59:35,282 epoch 2 - iter 3/3 - loss 0.63269653 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 10:59:35,283 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:35,283 EPOCH 2 done: loss 0.6327 - lr 0.0200000\n",
-      "2021-09-08 10:59:35,284 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:59:42,492 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:42,564 epoch 3 - iter 1/3 - loss 0.64523178 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 10:59:42,616 epoch 3 - iter 2/3 - loss 0.64292306 - samples/sec: 19.51 - lr: 0.020000\n",
-      "2021-09-08 10:59:42,678 epoch 3 - iter 3/3 - loss 0.64266906 - samples/sec: 16.06 - lr: 0.020000\n",
-      "2021-09-08 10:59:42,679 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:42,680 EPOCH 3 done: loss 0.6427 - lr 0.0200000\n",
-      "2021-09-08 10:59:42,680 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:43:49,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,149 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:43:49,149 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,149 Parameters:\n",
+      "2021-09-21 19:43:49,150  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:43:49,150  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:43:49,150  - patience: \"3\"\n",
+      "2021-09-21 19:43:49,150  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:43:49,151  - max_epochs: \"10\"\n",
+      "2021-09-21 19:43:49,151  - shuffle: \"True\"\n",
+      "2021-09-21 19:43:49,151  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:43:49,152  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:43:49,152 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,152 Model training base path: \"temp\"\n",
+      "2021-09-21 19:43:49,152 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,153 Device: cuda:0\n",
+      "2021-09-21 19:43:49,153 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,153 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 19:43:49,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,379 epoch 1 - iter 1/3 - loss 0.64057273 - samples/sec: 25.82 - lr: 0.020000\n",
+      "2021-09-21 19:43:49,425 epoch 1 - iter 2/3 - loss 0.63027430 - samples/sec: 22.20 - lr: 0.020000\n",
+      "2021-09-21 19:43:49,471 epoch 1 - iter 3/3 - loss 0.63276623 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 19:43:49,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:49,472 EPOCH 1 done: loss 0.6328 - lr 0.0200000\n",
+      "2021-09-21 19:43:49,472 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:59:48,973 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,065 epoch 4 - iter 1/3 - loss 0.63082534 - samples/sec: 13.39 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,111 epoch 4 - iter 2/3 - loss 0.64131847 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,182 epoch 4 - iter 3/3 - loss 0.64122804 - samples/sec: 14.08 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,184 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,184 EPOCH 4 done: loss 0.6412 - lr 0.0200000\n",
-      "2021-09-08 10:59:49,184 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:49,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,355 epoch 5 - iter 1/3 - loss 0.63671398 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,420 epoch 5 - iter 2/3 - loss 0.63424525 - samples/sec: 15.45 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,477 epoch 5 - iter 3/3 - loss 0.63968523 - samples/sec: 17.55 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,478 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,479 EPOCH 5 done: loss 0.6397 - lr 0.0200000\n",
-      "2021-09-08 10:59:49,479 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:49,554 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,651 epoch 6 - iter 1/3 - loss 0.64900023 - samples/sec: 15.35 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,706 epoch 6 - iter 2/3 - loss 0.65486592 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,756 epoch 6 - iter 3/3 - loss 0.64570536 - samples/sec: 20.06 - lr: 0.020000\n",
-      "2021-09-08 10:59:49,757 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:49,757 EPOCH 6 done: loss 0.6457 - lr 0.0200000\n",
-      "2021-09-08 10:59:49,758 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:43:56,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,243 epoch 2 - iter 1/3 - loss 0.64869779 - samples/sec: 21.50 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,289 epoch 2 - iter 2/3 - loss 0.63677835 - samples/sec: 21.91 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,335 epoch 2 - iter 3/3 - loss 0.63074913 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,336 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,337 EPOCH 2 done: loss 0.6307 - lr 0.0200000\n",
+      "2021-09-21 19:43:56,337 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:43:56,390 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,448 epoch 3 - iter 1/3 - loss 0.62507004 - samples/sec: 22.33 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,494 epoch 3 - iter 2/3 - loss 0.62682408 - samples/sec: 21.95 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,540 epoch 3 - iter 3/3 - loss 0.63129145 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,541 EPOCH 3 done: loss 0.6313 - lr 0.0200000\n",
+      "2021-09-21 19:43:56,541 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:43:56,545 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,603 epoch 4 - iter 1/3 - loss 0.63583636 - samples/sec: 22.07 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,649 epoch 4 - iter 2/3 - loss 0.63232654 - samples/sec: 21.95 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,695 epoch 4 - iter 3/3 - loss 0.62948277 - samples/sec: 21.88 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,696 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,696 EPOCH 4 done: loss 0.6295 - lr 0.0200000\n",
+      "2021-09-21 19:43:56,697 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:43:56,721 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,779 epoch 5 - iter 1/3 - loss 0.64135420 - samples/sec: 22.14 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,825 epoch 5 - iter 2/3 - loss 0.64338067 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,874 epoch 5 - iter 3/3 - loss 0.63553959 - samples/sec: 20.53 - lr: 0.020000\n",
+      "2021-09-21 19:43:56,875 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:43:56,876 EPOCH 5 done: loss 0.6355 - lr 0.0200000\n",
+      "2021-09-21 19:43:56,876 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:00:02,336 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,404 epoch 7 - iter 1/3 - loss 0.63299328 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,457 epoch 7 - iter 2/3 - loss 0.63262609 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,503 epoch 7 - iter 3/3 - loss 0.64479828 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,504 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,504 EPOCH 7 done: loss 0.6448 - lr 0.0200000\n",
-      "2021-09-08 11:00:03,505 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:03,631 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,714 epoch 8 - iter 1/3 - loss 0.62497693 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,765 epoch 8 - iter 2/3 - loss 0.62443757 - samples/sec: 19.80 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,833 epoch 8 - iter 3/3 - loss 0.62857411 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 11:00:03,834 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,835 EPOCH 8 done: loss 0.6286 - lr 0.0200000\n",
-      "2021-09-08 11:00:03,835 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:03,899 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:03,979 epoch 9 - iter 1/3 - loss 0.66312277 - samples/sec: 16.22 - lr: 0.020000\n",
-      "2021-09-08 11:00:04,050 epoch 9 - iter 2/3 - loss 0.65391988 - samples/sec: 14.18 - lr: 0.020000\n",
-      "2021-09-08 11:00:04,111 epoch 9 - iter 3/3 - loss 0.65358178 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 11:00:04,112 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:04,113 EPOCH 9 done: loss 0.6536 - lr 0.0200000\n",
-      "2021-09-08 11:00:04,113 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:44:01,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:01,159 epoch 6 - iter 1/3 - loss 0.61800039 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,215 epoch 6 - iter 2/3 - loss 0.62356538 - samples/sec: 18.20 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,264 epoch 6 - iter 3/3 - loss 0.63243598 - samples/sec: 20.51 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,265 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:01,266 EPOCH 6 done: loss 0.6324 - lr 0.0200000\n",
+      "2021-09-21 19:44:01,266 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:44:01,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:01,342 epoch 7 - iter 1/3 - loss 0.62660462 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,389 epoch 7 - iter 2/3 - loss 0.62702173 - samples/sec: 21.14 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,438 epoch 7 - iter 3/3 - loss 0.63225516 - samples/sec: 20.76 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:01,439 EPOCH 7 done: loss 0.6323 - lr 0.0200000\n",
+      "2021-09-21 19:44:01,440 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:44:01,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:01,516 epoch 8 - iter 1/3 - loss 0.63830382 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,571 epoch 8 - iter 2/3 - loss 0.64535618 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,630 epoch 8 - iter 3/3 - loss 0.64022011 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 19:44:01,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:01,632 EPOCH 8 done: loss 0.6402 - lr 0.0200000\n",
+      "2021-09-21 19:44:01,632 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:00:19,646 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:19,704 epoch 10 - iter 1/3 - loss 0.64181417 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 11:00:19,748 epoch 10 - iter 2/3 - loss 0.64232451 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 11:00:19,791 epoch 10 - iter 3/3 - loss 0.64376789 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:00:19,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:19,792 EPOCH 10 done: loss 0.6438 - lr 0.0200000\n",
-      "2021-09-08 11:00:19,792 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:39,397 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:00:54,136 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:44:05,998 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:06,070 epoch 9 - iter 1/3 - loss 0.64845115 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 19:44:06,127 epoch 9 - iter 2/3 - loss 0.63560882 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 19:44:06,182 epoch 9 - iter 3/3 - loss 0.63869842 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 19:44:06,183 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:06,184 EPOCH 9 done: loss 0.6387 - lr 0.0200000\n",
+      "2021-09-21 19:44:06,184 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:44:06,186 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:06,251 epoch 10 - iter 1/3 - loss 0.63004571 - samples/sec: 21.21 - lr: 0.020000\n",
+      "2021-09-21 19:44:06,308 epoch 10 - iter 2/3 - loss 0.63082793 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 19:44:06,368 epoch 10 - iter 3/3 - loss 0.63963528 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 19:44:06,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:06,369 EPOCH 10 done: loss 0.6396 - lr 0.0200000\n",
+      "2021-09-21 19:44:06,370 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:44:12,642 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:44:33,484 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:00:58,217 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:44:37,427 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 13873.11it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 18950.17it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:58,219 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
-      "2021-09-08 11:00:58,232 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,234 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:44:37,429 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 19:44:37,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,439 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5836,27 +5849,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:58,235 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,235 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:00:58,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,236 Parameters:\n",
-      "2021-09-08 11:00:58,236  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:00:58,237  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:00:58,237  - patience: \"3\"\n",
-      "2021-09-08 11:00:58,237  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:00:58,238  - max_epochs: \"10\"\n",
-      "2021-09-08 11:00:58,238  - shuffle: \"True\"\n",
-      "2021-09-08 11:00:58,239  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:00:58,239  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:00:58,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,240 Model training base path: \"temp\"\n",
-      "2021-09-08 11:00:58,240 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,240 Device: cuda:0\n",
-      "2021-09-08 11:00:58,241 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,241 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:00:58,255 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,335 epoch 1 - iter 1/3 - loss 0.64974397 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,399 epoch 1 - iter 2/3 - loss 0.65145263 - samples/sec: 16.04 - lr: 0.020000\n"
+      "2021-09-21 19:44:37,440 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,440 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:44:37,440 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,441 Parameters:\n",
+      "2021-09-21 19:44:37,441  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:44:37,441  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:44:37,441  - patience: \"3\"\n",
+      "2021-09-21 19:44:37,442  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:44:37,442  - max_epochs: \"10\"\n",
+      "2021-09-21 19:44:37,442  - shuffle: \"True\"\n",
+      "2021-09-21 19:44:37,442  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:44:37,443  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:44:37,443 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,443 Model training base path: \"temp\"\n",
+      "2021-09-21 19:44:37,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,444 Device: cuda:0\n",
+      "2021-09-21 19:44:37,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,444 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:44:37,451 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,516 epoch 1 - iter 1/3 - loss 0.62224442 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 19:44:37,569 epoch 1 - iter 2/3 - loss 0.61603409 - samples/sec: 19.08 - lr: 0.020000\n",
+      "2021-09-21 19:44:37,626 epoch 1 - iter 3/3 - loss 0.62078748 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 19:44:37,627 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:37,627 EPOCH 1 done: loss 0.6208 - lr 0.0200000\n",
+      "2021-09-21 19:44:37,627 BAD EPOCHS (no improvement): 0\n"
      ]
     },
     {
@@ -5870,96 +5887,95 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:58,457 epoch 1 - iter 3/3 - loss 0.64602518 - samples/sec: 17.39 - lr: 0.020000\n",
-      "2021-09-08 11:00:58,458 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:58,459 EPOCH 1 done: loss 0.6460 - lr 0.0200000\n",
-      "2021-09-08 11:00:58,459 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:01:10,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:10,260 epoch 2 - iter 1/3 - loss 0.62915164 - samples/sec: 15.82 - lr: 0.020000\n",
-      "2021-09-08 11:01:10,321 epoch 2 - iter 2/3 - loss 0.64057082 - samples/sec: 16.64 - lr: 0.020000\n",
-      "2021-09-08 11:01:10,373 epoch 2 - iter 3/3 - loss 0.63504394 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 11:01:10,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:10,375 EPOCH 2 done: loss 0.6350 - lr 0.0200000\n",
-      "2021-09-08 11:01:10,375 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:12,542 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:12,625 epoch 3 - iter 1/3 - loss 0.63797599 - samples/sec: 16.23 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,674 epoch 3 - iter 2/3 - loss 0.63896310 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,730 epoch 3 - iter 3/3 - loss 0.63379151 - samples/sec: 18.08 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:12,732 EPOCH 3 done: loss 0.6338 - lr 0.0200000\n",
-      "2021-09-08 11:01:12,732 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:01:12,744 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:12,832 epoch 4 - iter 1/3 - loss 0.61303061 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,898 epoch 4 - iter 2/3 - loss 0.61950606 - samples/sec: 15.13 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,959 epoch 4 - iter 3/3 - loss 0.62210387 - samples/sec: 16.49 - lr: 0.020000\n",
-      "2021-09-08 11:01:12,960 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:12,961 EPOCH 4 done: loss 0.6221 - lr 0.0200000\n",
-      "2021-09-08 11:01:12,961 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:01:12,971 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,047 epoch 5 - iter 1/3 - loss 0.61641186 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 11:01:13,111 epoch 5 - iter 2/3 - loss 0.62099779 - samples/sec: 15.70 - lr: 0.020000\n",
-      "2021-09-08 11:01:13,180 epoch 5 - iter 3/3 - loss 0.62840782 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 11:01:13,181 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,182 EPOCH 5 done: loss 0.6284 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:01:13,182 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:01:13,185 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,259 epoch 6 - iter 1/3 - loss 0.63951236 - samples/sec: 16.97 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,308 epoch 6 - iter 2/3 - loss 0.63706827 - samples/sec: 20.78 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,364 epoch 6 - iter 3/3 - loss 0.63854581 - samples/sec: 17.90 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,365 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,365 EPOCH 6 done: loss 0.6385 - lr 0.0100000\n",
-      "2021-09-08 11:01:13,366 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:13,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,489 epoch 7 - iter 1/3 - loss 0.61334699 - samples/sec: 15.78 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,549 epoch 7 - iter 2/3 - loss 0.62963212 - samples/sec: 16.79 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,601 epoch 7 - iter 3/3 - loss 0.64182065 - samples/sec: 19.70 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,602 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,602 EPOCH 7 done: loss 0.6418 - lr 0.0100000\n",
-      "2021-09-08 11:01:13,602 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:01:13,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,678 epoch 8 - iter 1/3 - loss 0.63124794 - samples/sec: 18.55 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,733 epoch 8 - iter 2/3 - loss 0.63516003 - samples/sec: 18.27 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,795 epoch 8 - iter 3/3 - loss 0.63775327 - samples/sec: 16.27 - lr: 0.010000\n",
-      "2021-09-08 11:01:13,796 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,797 EPOCH 8 done: loss 0.6378 - lr 0.0100000\n",
-      "2021-09-08 11:01:13,797 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:01:13,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:13,960 epoch 9 - iter 1/3 - loss 0.63255018 - samples/sec: 16.30 - lr: 0.010000\n",
-      "2021-09-08 11:01:14,033 epoch 9 - iter 2/3 - loss 0.63659471 - samples/sec: 13.92 - lr: 0.010000\n",
-      "2021-09-08 11:01:14,100 epoch 9 - iter 3/3 - loss 0.63665448 - samples/sec: 14.88 - lr: 0.010000\n",
-      "2021-09-08 11:01:14,102 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:14,102 EPOCH 9 done: loss 0.6367 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:01:14,102 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:01:14,192 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:14,291 epoch 10 - iter 1/3 - loss 0.63087350 - samples/sec: 14.76 - lr: 0.005000\n",
-      "2021-09-08 11:01:14,347 epoch 10 - iter 2/3 - loss 0.63539869 - samples/sec: 17.92 - lr: 0.005000\n",
-      "2021-09-08 11:01:14,410 epoch 10 - iter 3/3 - loss 0.63481520 - samples/sec: 15.93 - lr: 0.005000\n",
-      "2021-09-08 11:01:14,411 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:14,412 EPOCH 10 done: loss 0.6348 - lr 0.0050000\n",
-      "2021-09-08 11:01:14,412 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:23,605 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:01:42,377 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:44:41,730 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:41,803 epoch 2 - iter 1/3 - loss 0.64885408 - samples/sec: 18.89 - lr: 0.020000\n",
+      "2021-09-21 19:44:41,863 epoch 2 - iter 2/3 - loss 0.63570857 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 19:44:41,917 epoch 2 - iter 3/3 - loss 0.62763300 - samples/sec: 18.79 - lr: 0.020000\n",
+      "2021-09-21 19:44:41,918 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:41,918 EPOCH 2 done: loss 0.6276 - lr 0.0200000\n",
+      "2021-09-21 19:44:41,918 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:44:45,763 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:45,844 epoch 3 - iter 1/3 - loss 0.62479472 - samples/sec: 18.06 - lr: 0.020000\n",
+      "2021-09-21 19:44:45,896 epoch 3 - iter 2/3 - loss 0.63569191 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 19:44:45,954 epoch 3 - iter 3/3 - loss 0.63805280 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 19:44:45,955 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:45,955 EPOCH 3 done: loss 0.6381 - lr 0.0200000\n",
+      "2021-09-21 19:44:45,956 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:44:54,030 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:54,091 epoch 4 - iter 1/3 - loss 0.63696206 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 19:44:54,137 epoch 4 - iter 2/3 - loss 0.65215632 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 19:44:54,184 epoch 4 - iter 3/3 - loss 0.64459952 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 19:44:54,185 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:54,185 EPOCH 4 done: loss 0.6446 - lr 0.0200000\n",
+      "2021-09-21 19:44:54,185 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:44:59,071 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,132 epoch 5 - iter 1/3 - loss 0.62662500 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,179 epoch 5 - iter 2/3 - loss 0.64703739 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,226 epoch 5 - iter 3/3 - loss 0.64083523 - samples/sec: 21.30 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,227 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,227 EPOCH 5 done: loss 0.6408 - lr 0.0200000\n",
+      "2021-09-21 19:44:59,228 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:44:59,304 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,362 epoch 6 - iter 1/3 - loss 0.61550325 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,407 epoch 6 - iter 2/3 - loss 0.61803851 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,453 epoch 6 - iter 3/3 - loss 0.62816509 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,454 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,454 EPOCH 6 done: loss 0.6282 - lr 0.0200000\n",
+      "2021-09-21 19:44:59,455 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:44:59,534 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,592 epoch 7 - iter 1/3 - loss 0.63454109 - samples/sec: 22.18 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,637 epoch 7 - iter 2/3 - loss 0.63722509 - samples/sec: 22.22 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,684 epoch 7 - iter 3/3 - loss 0.63653400 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,685 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,685 EPOCH 7 done: loss 0.6365 - lr 0.0200000\n",
+      "2021-09-21 19:44:59,685 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:44:59,761 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,819 epoch 8 - iter 1/3 - loss 0.63624328 - samples/sec: 22.13 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,864 epoch 8 - iter 2/3 - loss 0.63712901 - samples/sec: 22.18 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,910 epoch 8 - iter 3/3 - loss 0.63861682 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 19:44:59,911 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:44:59,911 EPOCH 8 done: loss 0.6386 - lr 0.0200000\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:44:59,912 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:44:59,958 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:00,015 epoch 9 - iter 1/3 - loss 0.63546079 - samples/sec: 22.34 - lr: 0.010000\n",
+      "2021-09-21 19:45:00,060 epoch 9 - iter 2/3 - loss 0.63774335 - samples/sec: 22.32 - lr: 0.010000\n",
+      "2021-09-21 19:45:00,107 epoch 9 - iter 3/3 - loss 0.63119982 - samples/sec: 21.77 - lr: 0.010000\n",
+      "2021-09-21 19:45:00,108 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:00,108 EPOCH 9 done: loss 0.6312 - lr 0.0100000\n",
+      "2021-09-21 19:45:00,109 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:45:01,086 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:01,151 epoch 10 - iter 1/3 - loss 0.67461419 - samples/sec: 19.66 - lr: 0.010000\n",
+      "2021-09-21 19:45:01,212 epoch 10 - iter 2/3 - loss 0.66935104 - samples/sec: 16.47 - lr: 0.010000\n",
+      "2021-09-21 19:45:01,268 epoch 10 - iter 3/3 - loss 0.66787750 - samples/sec: 18.30 - lr: 0.010000\n",
+      "2021-09-21 19:45:01,269 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:01,269 EPOCH 10 done: loss 0.6679 - lr 0.0100000\n",
+      "2021-09-21 19:45:01,269 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:45:12,744 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:45:28,864 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:01:46,956 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:45:32,792 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 16666.11it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 17096.35it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:46,958 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
-      "2021-09-08 11:01:47,150 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,152 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:45:32,794 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 19:45:32,804 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,806 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6272,23 +6288,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:47,152 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,153 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:01:47,153 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,153 Parameters:\n",
-      "2021-09-08 11:01:47,154  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:01:47,154  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:01:47,154  - patience: \"3\"\n",
-      "2021-09-08 11:01:47,155  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:01:47,155  - max_epochs: \"10\"\n",
-      "2021-09-08 11:01:47,155  - shuffle: \"True\"\n",
-      "2021-09-08 11:01:47,155  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:01:47,156  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:01:47,156 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,156 Model training base path: \"temp\"\n",
-      "2021-09-08 11:01:47,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,157 Device: cuda:0\n",
-      "2021-09-08 11:01:47,157 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 19:45:32,806 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,807 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:45:32,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,807 Parameters:\n",
+      "2021-09-21 19:45:32,807  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:45:32,808  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:45:32,808  - patience: \"3\"\n",
+      "2021-09-21 19:45:32,808  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:45:32,809  - max_epochs: \"10\"\n",
+      "2021-09-21 19:45:32,809  - shuffle: \"True\"\n",
+      "2021-09-21 19:45:32,809  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:45:32,809  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:45:32,810 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,810 Model training base path: \"temp\"\n",
+      "2021-09-21 19:45:32,810 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,811 Device: cuda:0\n",
+      "2021-09-21 19:45:32,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,811 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:45:32,817 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,877 epoch 1 - iter 1/3 - loss 0.63758999 - samples/sec: 21.50 - lr: 0.020000\n",
+      "2021-09-21 19:45:32,924 epoch 1 - iter 2/3 - loss 0.63170806 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 19:45:32,969 epoch 1 - iter 3/3 - loss 0.63232199 - samples/sec: 22.08 - lr: 0.020000\n",
+      "2021-09-21 19:45:32,970 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:32,971 EPOCH 1 done: loss 0.6323 - lr 0.0200000\n",
+      "2021-09-21 19:45:32,971 BAD EPOCHS (no improvement): 0\n"
      ]
     },
     {
@@ -6302,101 +6326,94 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:47,157 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:01:47,374 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,450 epoch 1 - iter 1/3 - loss 0.63242036 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 11:01:47,524 epoch 1 - iter 2/3 - loss 0.63593376 - samples/sec: 13.72 - lr: 0.020000\n",
-      "2021-09-08 11:01:47,587 epoch 1 - iter 3/3 - loss 0.63647894 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 11:01:47,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,588 EPOCH 1 done: loss 0.6365 - lr 0.0200000\n",
-      "2021-09-08 11:01:47,589 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:01:56,387 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:56,448 epoch 2 - iter 1/3 - loss 0.62316638 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 11:01:56,493 epoch 2 - iter 2/3 - loss 0.61453998 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 11:01:56,537 epoch 2 - iter 3/3 - loss 0.62162554 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 11:01:56,539 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:56,539 EPOCH 2 done: loss 0.6216 - lr 0.0200000\n",
-      "2021-09-08 11:01:56,539 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:56,778 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:56,835 epoch 3 - iter 1/3 - loss 0.65733689 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 11:01:56,878 epoch 3 - iter 2/3 - loss 0.63518989 - samples/sec: 23.41 - lr: 0.020000\n",
-      "2021-09-08 11:01:56,921 epoch 3 - iter 3/3 - loss 0.62989320 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 11:01:56,922 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:56,923 EPOCH 3 done: loss 0.6299 - lr 0.0200000\n",
-      "2021-09-08 11:01:56,923 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:01:57,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:57,952 epoch 4 - iter 1/3 - loss 0.65307754 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 11:01:58,013 epoch 4 - iter 2/3 - loss 0.64227229 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 11:01:58,083 epoch 4 - iter 3/3 - loss 0.64281076 - samples/sec: 14.44 - lr: 0.020000\n",
-      "2021-09-08 11:01:58,084 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:58,084 EPOCH 4 done: loss 0.6428 - lr 0.0200000\n",
-      "2021-09-08 11:01:58,085 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:45:37,027 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:37,118 epoch 2 - iter 1/3 - loss 0.63402754 - samples/sec: 16.11 - lr: 0.020000\n",
+      "2021-09-21 19:45:37,175 epoch 2 - iter 2/3 - loss 0.63764486 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 19:45:37,228 epoch 2 - iter 3/3 - loss 0.63382904 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 19:45:37,229 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:37,230 EPOCH 2 done: loss 0.6338 - lr 0.0200000\n",
+      "2021-09-21 19:45:37,230 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:02:02,876 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:02,948 epoch 5 - iter 1/3 - loss 0.63717318 - samples/sec: 18.87 - lr: 0.020000\n",
-      "2021-09-08 11:02:02,997 epoch 5 - iter 2/3 - loss 0.63312337 - samples/sec: 20.64 - lr: 0.020000\n",
-      "2021-09-08 11:02:03,071 epoch 5 - iter 3/3 - loss 0.62775453 - samples/sec: 13.75 - lr: 0.020000\n",
-      "2021-09-08 11:02:03,072 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:03,072 EPOCH 5 done: loss 0.6278 - lr 0.0200000\n",
-      "2021-09-08 11:02:03,073 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:02:03,074 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:03,168 epoch 6 - iter 1/3 - loss 0.63767099 - samples/sec: 15.76 - lr: 0.020000\n",
-      "2021-09-08 11:02:03,211 epoch 6 - iter 2/3 - loss 0.63698950 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 11:02:03,255 epoch 6 - iter 3/3 - loss 0.64959472 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 11:02:03,256 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:03,257 EPOCH 6 done: loss 0.6496 - lr 0.0200000\n",
-      "2021-09-08 11:02:03,257 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:45:41,483 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:41,558 epoch 3 - iter 1/3 - loss 0.62479258 - samples/sec: 19.37 - lr: 0.020000\n",
+      "2021-09-21 19:45:41,605 epoch 3 - iter 2/3 - loss 0.62018597 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 19:45:41,660 epoch 3 - iter 3/3 - loss 0.62758082 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 19:45:41,661 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:41,661 EPOCH 3 done: loss 0.6276 - lr 0.0200000\n",
+      "2021-09-21 19:45:41,661 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:45:41,665 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:41,736 epoch 4 - iter 1/3 - loss 0.63024205 - samples/sec: 19.87 - lr: 0.020000\n",
+      "2021-09-21 19:45:41,785 epoch 4 - iter 2/3 - loss 0.63179135 - samples/sec: 20.61 - lr: 0.020000\n",
+      "2021-09-21 19:45:41,836 epoch 4 - iter 3/3 - loss 0.63702510 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 19:45:41,837 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:41,838 EPOCH 4 done: loss 0.6370 - lr 0.0200000\n",
+      "2021-09-21 19:45:41,838 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:02:07,496 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:07,579 epoch 7 - iter 1/3 - loss 0.62947619 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 11:02:07,631 epoch 7 - iter 2/3 - loss 0.63050574 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 11:02:07,687 epoch 7 - iter 3/3 - loss 0.62341617 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 11:02:07,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:07,689 EPOCH 7 done: loss 0.6234 - lr 0.0200000\n",
-      "2021-09-08 11:02:07,689 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:02:07,692 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:07,777 epoch 8 - iter 1/3 - loss 0.62544066 - samples/sec: 16.93 - lr: 0.020000\n",
-      "2021-09-08 11:02:07,825 epoch 8 - iter 2/3 - loss 0.64167875 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:02:07,887 epoch 8 - iter 3/3 - loss 0.64226902 - samples/sec: 16.28 - lr: 0.020000\n",
-      "2021-09-08 11:02:07,888 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:07,889 EPOCH 8 done: loss 0.6423 - lr 0.0200000\n",
-      "2021-09-08 11:02:07,889 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:02:07,891 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:07,976 epoch 9 - iter 1/3 - loss 0.59103119 - samples/sec: 13.80 - lr: 0.020000\n",
-      "2021-09-08 11:02:08,032 epoch 9 - iter 2/3 - loss 0.62563547 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 11:02:08,097 epoch 9 - iter 3/3 - loss 0.62858566 - samples/sec: 15.57 - lr: 0.020000\n",
-      "2021-09-08 11:02:08,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:08,098 EPOCH 9 done: loss 0.6286 - lr 0.0200000\n",
-      "2021-09-08 11:02:08,099 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:02:08,101 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:08,188 epoch 10 - iter 1/3 - loss 0.60785556 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 11:02:08,240 epoch 10 - iter 2/3 - loss 0.61610317 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 11:02:08,298 epoch 10 - iter 3/3 - loss 0.62495973 - samples/sec: 17.48 - lr: 0.020000\n",
-      "2021-09-08 11:02:08,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:08,299 EPOCH 10 done: loss 0.6250 - lr 0.0200000\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:02:08,300 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:02:13,012 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:02:35,718 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
+      "2021-09-21 19:45:47,107 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:47,185 epoch 5 - iter 1/3 - loss 0.62696588 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 19:45:47,237 epoch 5 - iter 2/3 - loss 0.64304098 - samples/sec: 19.24 - lr: 0.020000\n",
+      "2021-09-21 19:45:47,295 epoch 5 - iter 3/3 - loss 0.63669389 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 19:45:47,296 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:47,296 EPOCH 5 done: loss 0.6367 - lr 0.0200000\n",
+      "2021-09-21 19:45:47,297 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:45:47,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:47,366 epoch 6 - iter 1/3 - loss 0.64383048 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 19:45:47,418 epoch 6 - iter 2/3 - loss 0.64538521 - samples/sec: 19.58 - lr: 0.020000\n",
+      "2021-09-21 19:45:47,468 epoch 6 - iter 3/3 - loss 0.64506900 - samples/sec: 19.86 - lr: 0.020000\n",
+      "2021-09-21 19:45:47,469 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:47,470 EPOCH 6 done: loss 0.6451 - lr 0.0200000\n",
+      "2021-09-21 19:45:47,470 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:45:51,502 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:51,573 epoch 7 - iter 1/3 - loss 0.63507837 - samples/sec: 18.92 - lr: 0.020000\n",
+      "2021-09-21 19:45:51,626 epoch 7 - iter 2/3 - loss 0.64084575 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 19:45:51,677 epoch 7 - iter 3/3 - loss 0.64941045 - samples/sec: 19.75 - lr: 0.020000\n",
+      "2021-09-21 19:45:51,678 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:51,678 EPOCH 7 done: loss 0.6494 - lr 0.0200000\n",
+      "2021-09-21 19:45:51,679 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 19:45:55,856 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:55,936 epoch 8 - iter 1/3 - loss 0.62963861 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 19:45:55,990 epoch 8 - iter 2/3 - loss 0.64712039 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,041 epoch 8 - iter 3/3 - loss 0.64324498 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,042 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:56,042 EPOCH 8 done: loss 0.6432 - lr 0.0200000\n",
+      "2021-09-21 19:45:56,042 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:45:56,045 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:56,116 epoch 9 - iter 1/3 - loss 0.64214772 - samples/sec: 20.33 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,169 epoch 9 - iter 2/3 - loss 0.64104363 - samples/sec: 19.32 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,221 epoch 9 - iter 3/3 - loss 0.64814532 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:56,222 EPOCH 9 done: loss 0.6481 - lr 0.0200000\n",
+      "2021-09-21 19:45:56,222 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:45:56,224 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:56,304 epoch 10 - iter 1/3 - loss 0.63278103 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,353 epoch 10 - iter 2/3 - loss 0.63417345 - samples/sec: 20.58 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,407 epoch 10 - iter 3/3 - loss 0.62728443 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 19:45:56,408 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:45:56,408 EPOCH 10 done: loss 0.6273 - lr 0.0200000\n",
+      "2021-09-21 19:45:56,408 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:46:00,511 Test data not provided setting final score to 0\n",
+      "2021-09-21 19:46:21,277 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_amazon/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:02:39,677 Computing label dictionary. Progress:\n"
+      "2021-09-21 19:46:25,127 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 15927.74it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 18289.12it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:39,679 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
-      "2021-09-08 11:02:39,689 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,691 Model: \"TARSClassifier(\n",
+      "2021-09-21 19:46:25,129 [b'having the quality of something harmful or unpleasant', b'one who does not side with any party in a war or dispute', b'involving advantage or good']\n",
+      "2021-09-21 19:46:25,137 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,139 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6709,31 +6726,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:39,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,692 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:02:39,692 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,692 Parameters:\n",
-      "2021-09-08 11:02:39,693  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:02:39,693  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:02:39,693  - patience: \"3\"\n",
-      "2021-09-08 11:02:39,693  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:02:39,694  - max_epochs: \"10\"\n",
-      "2021-09-08 11:02:39,694  - shuffle: \"True\"\n",
-      "2021-09-08 11:02:39,694  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:02:39,694  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:02:39,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,695 Model training base path: \"temp\"\n",
-      "2021-09-08 11:02:39,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,696 Device: cuda:0\n",
-      "2021-09-08 11:02:39,696 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,696 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:02:39,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,753 epoch 1 - iter 1/3 - loss 0.63665849 - samples/sec: 27.28 - lr: 0.020000\n",
-      "2021-09-08 11:02:39,797 epoch 1 - iter 2/3 - loss 0.64079732 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 11:02:39,841 epoch 1 - iter 3/3 - loss 0.64114775 - samples/sec: 22.94 - lr: 0.020000\n",
-      "2021-09-08 11:02:39,842 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:39,842 EPOCH 1 done: loss 0.6411 - lr 0.0200000\n",
-      "2021-09-08 11:02:39,842 BAD EPOCHS (no improvement): 0\n"
+      "2021-09-21 19:46:25,140 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,140 Corpus: \"Corpus: 3 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 19:46:25,140 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,141 Parameters:\n",
+      "2021-09-21 19:46:25,141  - learning_rate: \"0.02\"\n",
+      "2021-09-21 19:46:25,141  - mini_batch_size: \"1\"\n",
+      "2021-09-21 19:46:25,142  - patience: \"3\"\n",
+      "2021-09-21 19:46:25,142  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 19:46:25,142  - max_epochs: \"10\"\n",
+      "2021-09-21 19:46:25,142  - shuffle: \"True\"\n",
+      "2021-09-21 19:46:25,143  - train_with_dev: \"False\"\n",
+      "2021-09-21 19:46:25,143  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 19:46:25,143 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,144 Model training base path: \"temp\"\n",
+      "2021-09-21 19:46:25,144 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,144 Device: cuda:0\n",
+      "2021-09-21 19:46:25,144 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,145 Embeddings storage mode: cpu\n",
+      "2021-09-21 19:46:25,151 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,202 epoch 1 - iter 1/3 - loss 0.64793366 - samples/sec: 26.24 - lr: 0.020000\n",
+      "2021-09-21 19:46:25,248 epoch 1 - iter 2/3 - loss 0.64025134 - samples/sec: 22.12 - lr: 0.020000\n",
+      "2021-09-21 19:46:25,294 epoch 1 - iter 3/3 - loss 0.64262064 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 19:46:25,295 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:25,295 EPOCH 1 done: loss 0.6426 - lr 0.0200000\n",
+      "2021-09-21 19:46:25,296 BAD EPOCHS (no improvement): 0\n"
      ]
     },
     {
@@ -6748,74 +6765,74 @@
      "output_type": "stream",
      "text": [
       "saving best model\n",
-      "2021-09-08 11:02:45,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:45,339 epoch 2 - iter 1/3 - loss 0.63094407 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 11:02:45,382 epoch 2 - iter 2/3 - loss 0.62521151 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 11:02:45,427 epoch 2 - iter 3/3 - loss 0.63144477 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 11:02:45,428 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:45,428 EPOCH 2 done: loss 0.6314 - lr 0.0200000\n",
-      "2021-09-08 11:02:45,428 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:02:45,505 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:45,560 epoch 3 - iter 1/3 - loss 0.63860351 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 11:02:45,606 epoch 3 - iter 2/3 - loss 0.63036948 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:02:45,649 epoch 3 - iter 3/3 - loss 0.64165646 - samples/sec: 23.38 - lr: 0.020000\n",
-      "2021-09-08 11:02:45,650 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:45,650 EPOCH 3 done: loss 0.6417 - lr 0.0200000\n",
-      "2021-09-08 11:02:45,651 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:02:55,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,229 epoch 4 - iter 1/3 - loss 0.62097025 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,292 epoch 4 - iter 2/3 - loss 0.63835046 - samples/sec: 15.84 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,351 epoch 4 - iter 3/3 - loss 0.64059293 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,352 EPOCH 4 done: loss 0.6406 - lr 0.0200000\n",
-      "2021-09-08 11:02:55,352 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:02:55,365 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,448 epoch 5 - iter 1/3 - loss 0.64141077 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,506 epoch 5 - iter 2/3 - loss 0.63322538 - samples/sec: 17.31 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,555 epoch 5 - iter 3/3 - loss 0.62929006 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,556 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,556 EPOCH 5 done: loss 0.6293 - lr 0.0200000\n",
-      "2021-09-08 11:02:55,556 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:02:55,558 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,633 epoch 6 - iter 1/3 - loss 0.66527683 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,692 epoch 6 - iter 2/3 - loss 0.64884073 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,755 epoch 6 - iter 3/3 - loss 0.64443090 - samples/sec: 16.02 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,756 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,757 EPOCH 6 done: loss 0.6444 - lr 0.0200000\n",
-      "2021-09-08 11:02:55,757 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 19:46:29,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:29,231 epoch 2 - iter 1/3 - loss 0.63795501 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 19:46:29,285 epoch 2 - iter 2/3 - loss 0.64464062 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 19:46:29,340 epoch 2 - iter 3/3 - loss 0.64839764 - samples/sec: 18.47 - lr: 0.020000\n",
+      "2021-09-21 19:46:29,341 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:29,341 EPOCH 2 done: loss 0.6484 - lr 0.0200000\n",
+      "2021-09-21 19:46:29,342 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:03:08,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:08,213 epoch 7 - iter 1/3 - loss 0.63739353 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:03:08,257 epoch 7 - iter 2/3 - loss 0.63934904 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 11:03:08,301 epoch 7 - iter 3/3 - loss 0.63821691 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 11:03:08,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:08,302 EPOCH 7 done: loss 0.6382 - lr 0.0200000\n",
-      "2021-09-08 11:03:08,302 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:03:10,758 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:10,820 epoch 8 - iter 1/3 - loss 0.66568971 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 11:03:10,888 epoch 8 - iter 2/3 - loss 0.65293029 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 11:03:10,958 epoch 8 - iter 3/3 - loss 0.64116267 - samples/sec: 14.27 - lr: 0.020000\n",
-      "2021-09-08 11:03:10,959 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:10,959 EPOCH 8 done: loss 0.6412 - lr 0.0200000\n",
-      "2021-09-08 11:03:10,960 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:03:10,962 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:11,041 epoch 9 - iter 1/3 - loss 0.62065530 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,111 epoch 9 - iter 2/3 - loss 0.62083450 - samples/sec: 14.52 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,164 epoch 9 - iter 3/3 - loss 0.62768483 - samples/sec: 18.85 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,165 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:11,165 EPOCH 9 done: loss 0.6277 - lr 0.0200000\n",
-      "2021-09-08 11:03:11,166 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:03:11,168 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:11,250 epoch 10 - iter 1/3 - loss 0.62837243 - samples/sec: 16.39 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,299 epoch 10 - iter 2/3 - loss 0.64039344 - samples/sec: 20.55 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,347 epoch 10 - iter 3/3 - loss 0.64344909 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 11:03:11,348 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:11,348 EPOCH 10 done: loss 0.6434 - lr 0.0200000\n",
-      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:03:11,349 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:03:20,123 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.34299917149958575\n"
+      "2021-09-21 19:46:36,947 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,020 epoch 3 - iter 1/3 - loss 0.62797898 - samples/sec: 18.26 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,076 epoch 3 - iter 2/3 - loss 0.63041919 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,134 epoch 3 - iter 3/3 - loss 0.63248680 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,135 EPOCH 3 done: loss 0.6325 - lr 0.0200000\n",
+      "2021-09-21 19:46:37,135 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:46:37,226 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,294 epoch 4 - iter 1/3 - loss 0.65554816 - samples/sec: 18.74 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,348 epoch 4 - iter 2/3 - loss 0.64669657 - samples/sec: 18.61 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,403 epoch 4 - iter 3/3 - loss 0.64334681 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,405 EPOCH 4 done: loss 0.6433 - lr 0.0200000\n",
+      "2021-09-21 19:46:37,405 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:46:37,416 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,486 epoch 5 - iter 1/3 - loss 0.63904804 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,541 epoch 5 - iter 2/3 - loss 0.63642767 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,595 epoch 5 - iter 3/3 - loss 0.62975037 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,596 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,596 EPOCH 5 done: loss 0.6298 - lr 0.0200000\n",
+      "2021-09-21 19:46:37,597 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:46:37,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,669 epoch 6 - iter 1/3 - loss 0.62993437 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,724 epoch 6 - iter 2/3 - loss 0.63896158 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,778 epoch 6 - iter 3/3 - loss 0.63550723 - samples/sec: 18.54 - lr: 0.020000\n",
+      "2021-09-21 19:46:37,779 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,779 EPOCH 6 done: loss 0.6355 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 19:46:37,780 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:46:37,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:37,912 epoch 7 - iter 1/3 - loss 0.64385581 - samples/sec: 18.68 - lr: 0.010000\n",
+      "2021-09-21 19:46:37,966 epoch 7 - iter 2/3 - loss 0.62299731 - samples/sec: 18.72 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,020 epoch 7 - iter 3/3 - loss 0.62898807 - samples/sec: 18.60 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:38,022 EPOCH 7 done: loss 0.6290 - lr 0.0100000\n",
+      "2021-09-21 19:46:38,022 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 19:46:38,041 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:38,109 epoch 8 - iter 1/3 - loss 0.63110751 - samples/sec: 18.74 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,163 epoch 8 - iter 2/3 - loss 0.63364574 - samples/sec: 18.80 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,221 epoch 8 - iter 3/3 - loss 0.64286415 - samples/sec: 17.23 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:38,223 EPOCH 8 done: loss 0.6429 - lr 0.0100000\n",
+      "2021-09-21 19:46:38,223 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 19:46:38,236 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:38,305 epoch 9 - iter 1/3 - loss 0.63897383 - samples/sec: 18.31 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,359 epoch 9 - iter 2/3 - loss 0.64188832 - samples/sec: 18.72 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,414 epoch 9 - iter 3/3 - loss 0.64204003 - samples/sec: 18.51 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,415 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:38,415 EPOCH 9 done: loss 0.6420 - lr 0.0100000\n",
+      "2021-09-21 19:46:38,415 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 19:46:38,441 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:38,509 epoch 10 - iter 1/3 - loss 0.62591279 - samples/sec: 18.79 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,566 epoch 10 - iter 2/3 - loss 0.62678063 - samples/sec: 17.68 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,625 epoch 10 - iter 3/3 - loss 0.63183641 - samples/sec: 17.15 - lr: 0.010000\n",
+      "2021-09-21 19:46:38,626 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 19:46:38,626 EPOCH 10 done: loss 0.6318 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 19:46:38,626 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 19:46:45,424 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.35376967688483846\n"
      ]
     }
    ],
@@ -6884,11 +6901,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a310936c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.3512841756420878, 0.3496271748135874, 0.3620546810273405, 0.35791217895608946, 0.347970173985087]\n",
+      "0.005343667107405826\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/oneshot/sentiment_twitter_oneshot.ipynb b/oneshot/sentiment_twitter_oneshot.ipynb
index be04869..2149950 100644
--- a/oneshot/sentiment_twitter_oneshot.ipynb
+++ b/oneshot/sentiment_twitter_oneshot.ipynb
@@ -70,25 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:49:45,039 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:16:21,597 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:50:00,528 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:16:29,562 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 21076.90it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 11149.13it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:00,530 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
-      "2021-09-08 10:50:00,722 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:00,724 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:16:29,565 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 20:16:29,569 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,571 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -401,24 +401,28 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:00,725 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:00,725 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:50:00,725 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:00,726 Parameters:\n",
-      "2021-09-08 10:50:00,726  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:50:00,726  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:50:00,726  - patience: \"3\"\n",
-      "2021-09-08 10:50:00,727  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:50:00,727  - max_epochs: \"10\"\n",
-      "2021-09-08 10:50:00,727  - shuffle: \"True\"\n",
-      "2021-09-08 10:50:00,727  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:50:00,728  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:50:00,728 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:00,728 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:50:00,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:00,729 Device: cuda:0\n",
-      "2021-09-08 10:50:00,729 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:00,729 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:16:29,571 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,572 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:16:29,572 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,572 Parameters:\n",
+      "2021-09-21 20:16:29,573  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:16:29,573  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:16:29,573  - patience: \"3\"\n",
+      "2021-09-21 20:16:29,573  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:16:29,574  - max_epochs: \"10\"\n",
+      "2021-09-21 20:16:29,574  - shuffle: \"True\"\n",
+      "2021-09-21 20:16:29,574  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:16:29,575  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:16:29,575 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,575 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:16:29,575 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,576 Device: cuda:0\n",
+      "2021-09-21 20:16:29,576 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,576 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:16:29,583 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,650 epoch 1 - iter 1/5 - loss 0.58580673 - samples/sec: 19.50 - lr: 0.020000\n",
+      "2021-09-21 20:16:29,707 epoch 1 - iter 2/5 - loss 0.54923257 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 20:16:29,763 epoch 1 - iter 3/5 - loss 0.54769258 - samples/sec: 18.05 - lr: 0.020000\n"
      ]
     },
     {
@@ -432,125 +436,122 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:00,902 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:00,967 epoch 1 - iter 1/5 - loss 1.27811778 - samples/sec: 20.71 - lr: 0.020000\n",
-      "2021-09-08 10:50:01,026 epoch 1 - iter 2/5 - loss 0.99617156 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 10:50:01,091 epoch 1 - iter 3/5 - loss 0.92684825 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 10:50:01,142 epoch 1 - iter 4/5 - loss 0.79983026 - samples/sec: 19.60 - lr: 0.020000\n",
-      "2021-09-08 10:50:01,192 epoch 1 - iter 5/5 - loss 0.77341318 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 10:50:01,193 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:01,193 EPOCH 1 done: loss 0.7734 - lr 0.0200000\n",
-      "2021-09-08 10:50:01,193 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:16:29,870 epoch 1 - iter 4/5 - loss 0.42097576 - samples/sec: 9.34 - lr: 0.020000\n",
+      "2021-09-21 20:16:29,920 epoch 1 - iter 5/5 - loss 0.45060666 - samples/sec: 20.46 - lr: 0.020000\n",
+      "2021-09-21 20:16:29,921 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:29,921 EPOCH 1 done: loss 0.4506 - lr 0.0200000\n",
+      "2021-09-21 20:16:29,921 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:50:09,829 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:09,891 epoch 2 - iter 1/5 - loss 0.65540868 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 10:50:09,940 epoch 2 - iter 2/5 - loss 0.49948232 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 10:50:09,987 epoch 2 - iter 3/5 - loss 0.56823845 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,033 epoch 2 - iter 4/5 - loss 0.69261044 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,085 epoch 2 - iter 5/5 - loss 0.68523791 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,086 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,086 EPOCH 2 done: loss 0.6852 - lr 0.0200000\n",
-      "2021-09-08 10:50:10,086 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:10,088 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,146 epoch 3 - iter 1/5 - loss 0.77356172 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,194 epoch 3 - iter 2/5 - loss 0.63226235 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,240 epoch 3 - iter 3/5 - loss 0.50821854 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,291 epoch 3 - iter 4/5 - loss 0.53037500 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,337 epoch 3 - iter 5/5 - loss 0.55171917 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,338 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,339 EPOCH 3 done: loss 0.5517 - lr 0.0200000\n",
-      "2021-09-08 10:50:10,339 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:10,341 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,400 epoch 4 - iter 1/5 - loss 0.65170068 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,453 epoch 4 - iter 2/5 - loss 0.67468220 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,499 epoch 4 - iter 3/5 - loss 0.58896781 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,545 epoch 4 - iter 4/5 - loss 0.59774860 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,591 epoch 4 - iter 5/5 - loss 0.54193033 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,593 EPOCH 4 done: loss 0.5419 - lr 0.0200000\n",
-      "2021-09-08 10:50:10,593 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:10,595 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,653 epoch 5 - iter 1/5 - loss 0.33677569 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,700 epoch 5 - iter 2/5 - loss 0.50718112 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,746 epoch 5 - iter 3/5 - loss 0.37631067 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,798 epoch 5 - iter 4/5 - loss 0.38886216 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,844 epoch 5 - iter 5/5 - loss 0.37789294 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 10:50:10,845 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,845 EPOCH 5 done: loss 0.3779 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:50:10,845 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:50:10,847 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:10,903 epoch 6 - iter 1/5 - loss 0.04534568 - samples/sec: 23.25 - lr: 0.010000\n",
-      "2021-09-08 10:50:10,954 epoch 6 - iter 2/5 - loss 0.19233655 - samples/sec: 19.70 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,001 epoch 6 - iter 3/5 - loss 0.45366795 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,048 epoch 6 - iter 4/5 - loss 0.51382641 - samples/sec: 21.62 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,091 epoch 6 - iter 5/5 - loss 0.41502295 - samples/sec: 23.19 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,092 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,093 EPOCH 6 done: loss 0.4150 - lr 0.0100000\n",
-      "2021-09-08 10:50:11,093 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:11,095 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,156 epoch 7 - iter 1/5 - loss 0.72972280 - samples/sec: 20.59 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,199 epoch 7 - iter 2/5 - loss 0.37753609 - samples/sec: 23.43 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,250 epoch 7 - iter 3/5 - loss 0.31459249 - samples/sec: 19.62 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,293 epoch 7 - iter 4/5 - loss 0.24969397 - samples/sec: 23.52 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,341 epoch 7 - iter 5/5 - loss 0.27676123 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,341 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,342 EPOCH 7 done: loss 0.2768 - lr 0.0100000\n",
-      "2021-09-08 10:50:11,342 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:50:11,344 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,404 epoch 8 - iter 1/5 - loss 0.08210202 - samples/sec: 21.10 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,450 epoch 8 - iter 2/5 - loss 0.23500752 - samples/sec: 21.87 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,493 epoch 8 - iter 3/5 - loss 0.16260858 - samples/sec: 23.41 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,536 epoch 8 - iter 4/5 - loss 0.12316844 - samples/sec: 23.50 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,584 epoch 8 - iter 5/5 - loss 0.24454740 - samples/sec: 21.26 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,585 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,585 EPOCH 8 done: loss 0.2445 - lr 0.0100000\n",
-      "2021-09-08 10:50:11,585 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:50:11,587 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,646 epoch 9 - iter 1/5 - loss 0.71592569 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,692 epoch 9 - iter 2/5 - loss 0.67274997 - samples/sec: 21.82 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,747 epoch 9 - iter 3/5 - loss 0.47831037 - samples/sec: 18.30 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,791 epoch 9 - iter 4/5 - loss 0.36556075 - samples/sec: 23.44 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,834 epoch 9 - iter 5/5 - loss 0.29397366 - samples/sec: 23.49 - lr: 0.010000\n",
-      "2021-09-08 10:50:11,835 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,835 EPOCH 9 done: loss 0.2940 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:50:11,835 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:50:11,837 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:11,896 epoch 10 - iter 1/5 - loss 0.39210522 - samples/sec: 21.40 - lr: 0.005000\n",
-      "2021-09-08 10:50:11,944 epoch 10 - iter 2/5 - loss 0.22181765 - samples/sec: 21.10 - lr: 0.005000\n",
-      "2021-09-08 10:50:11,990 epoch 10 - iter 3/5 - loss 0.30434380 - samples/sec: 21.79 - lr: 0.005000\n"
+      "2021-09-21 20:16:38,904 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:38,974 epoch 2 - iter 1/5 - loss 0.17647113 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 20:16:39,078 epoch 2 - iter 2/5 - loss 0.10391180 - samples/sec: 9.66 - lr: 0.020000\n",
+      "2021-09-21 20:16:39,130 epoch 2 - iter 3/5 - loss 0.24540306 - samples/sec: 19.12 - lr: 0.020000\n",
+      "2021-09-21 20:16:39,178 epoch 2 - iter 4/5 - loss 0.23837589 - samples/sec: 21.43 - lr: 0.020000\n",
+      "2021-09-21 20:16:39,228 epoch 2 - iter 5/5 - loss 0.74125689 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 20:16:39,229 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:39,230 EPOCH 2 done: loss 0.7413 - lr 0.0200000\n",
+      "2021-09-21 20:16:39,230 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:16:56,945 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:57,014 epoch 3 - iter 1/5 - loss 1.19411922 - samples/sec: 18.12 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,121 epoch 3 - iter 2/5 - loss 0.82255723 - samples/sec: 9.36 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,175 epoch 3 - iter 3/5 - loss 0.71120167 - samples/sec: 18.79 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,222 epoch 3 - iter 4/5 - loss 0.59064693 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,272 epoch 3 - iter 5/5 - loss 0.50643138 - samples/sec: 20.09 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,273 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:57,273 EPOCH 3 done: loss 0.5064 - lr 0.0200000\n",
+      "2021-09-21 20:16:57,273 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:16:57,681 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:57,740 epoch 4 - iter 1/5 - loss 0.05958305 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,784 epoch 4 - iter 2/5 - loss 0.03778808 - samples/sec: 23.04 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,879 epoch 4 - iter 3/5 - loss 0.07651272 - samples/sec: 10.49 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,928 epoch 4 - iter 4/5 - loss 0.19643251 - samples/sec: 20.64 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,979 epoch 4 - iter 5/5 - loss 0.27960965 - samples/sec: 20.08 - lr: 0.020000\n",
+      "2021-09-21 20:16:57,979 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:57,980 EPOCH 4 done: loss 0.2796 - lr 0.0200000\n",
+      "2021-09-21 20:16:57,980 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:16:58,120 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:58,179 epoch 5 - iter 1/5 - loss 0.01856309 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,230 epoch 5 - iter 2/5 - loss 0.15146292 - samples/sec: 19.83 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,274 epoch 5 - iter 3/5 - loss 0.10560902 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,371 epoch 5 - iter 4/5 - loss 0.22136619 - samples/sec: 10.43 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,419 epoch 5 - iter 5/5 - loss 0.30219018 - samples/sec: 20.63 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,420 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:58,421 EPOCH 5 done: loss 0.3022 - lr 0.0200000\n",
+      "2021-09-21 20:16:58,421 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:16:58,438 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:58,497 epoch 6 - iter 1/5 - loss 0.02529115 - samples/sec: 21.50 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,593 epoch 6 - iter 2/5 - loss 0.15448620 - samples/sec: 10.47 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,636 epoch 6 - iter 3/5 - loss 0.10499111 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,685 epoch 6 - iter 4/5 - loss 0.23326323 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,736 epoch 6 - iter 5/5 - loss 0.46047257 - samples/sec: 20.01 - lr: 0.020000\n",
+      "2021-09-21 20:16:58,737 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:58,737 EPOCH 6 done: loss 0.4605 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:16:58,737 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:16:58,847 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:58,908 epoch 7 - iter 1/5 - loss 0.01769291 - samples/sec: 20.88 - lr: 0.010000\n",
+      "2021-09-21 20:16:58,960 epoch 7 - iter 2/5 - loss 0.22587312 - samples/sec: 19.53 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,003 epoch 7 - iter 3/5 - loss 0.15533778 - samples/sec: 23.23 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,095 epoch 7 - iter 4/5 - loss 0.12839587 - samples/sec: 11.00 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,144 epoch 7 - iter 5/5 - loss 0.42497400 - samples/sec: 20.66 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,145 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:59,145 EPOCH 7 done: loss 0.4250 - lr 0.0100000\n",
+      "2021-09-21 20:16:59,145 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:16:59,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:59,219 epoch 8 - iter 1/5 - loss 0.01128806 - samples/sec: 20.96 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,263 epoch 8 - iter 2/5 - loss 0.00750754 - samples/sec: 23.15 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,312 epoch 8 - iter 3/5 - loss 0.28122326 - samples/sec: 20.57 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,363 epoch 8 - iter 4/5 - loss 0.28516654 - samples/sec: 19.84 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,458 epoch 8 - iter 5/5 - loss 0.24565908 - samples/sec: 10.49 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,459 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:59,460 EPOCH 8 done: loss 0.2457 - lr 0.0100000\n",
+      "2021-09-21 20:16:59,460 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:16:59,495 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:59,556 epoch 9 - iter 1/5 - loss 0.56478232 - samples/sec: 20.62 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,603 epoch 9 - iter 2/5 - loss 0.32099501 - samples/sec: 21.43 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,694 epoch 9 - iter 3/5 - loss 0.22098087 - samples/sec: 11.01 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,737 epoch 9 - iter 4/5 - loss 0.16654604 - samples/sec: 23.22 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,786 epoch 9 - iter 5/5 - loss 0.13460595 - samples/sec: 20.91 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,787 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:59,787 EPOCH 9 done: loss 0.1346 - lr 0.0100000\n",
+      "2021-09-21 20:16:59,787 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:16:59,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:16:59,865 epoch 10 - iter 1/5 - loss 0.00572173 - samples/sec: 21.68 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,908 epoch 10 - iter 2/5 - loss 0.00541874 - samples/sec: 23.24 - lr: 0.010000\n",
+      "2021-09-21 20:16:59,959 epoch 10 - iter 3/5 - loss 0.04829608 - samples/sec: 20.10 - lr: 0.010000\n",
+      "2021-09-21 20:17:00,008 epoch 10 - iter 4/5 - loss 0.09625951 - samples/sec: 20.53 - lr: 0.010000\n",
+      "2021-09-21 20:17:00,100 epoch 10 - iter 5/5 - loss 0.07950802 - samples/sec: 10.85 - lr: 0.010000\n",
+      "2021-09-21 20:17:00,101 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:00,102 EPOCH 10 done: loss 0.0795 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:17:00,102 BAD EPOCHS (no improvement): 4\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:50:12,034 epoch 10 - iter 4/5 - loss 0.23080670 - samples/sec: 23.04 - lr: 0.005000\n",
-      "2021-09-08 10:50:12,079 epoch 10 - iter 5/5 - loss 0.18869687 - samples/sec: 22.68 - lr: 0.005000\n",
-      "2021-09-08 10:50:12,080 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:50:12,080 EPOCH 10 done: loss 0.1887 - lr 0.0050000\n",
-      "2021-09-08 10:50:12,081 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:50:34,163 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:51:04,805 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:17:07,306 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:17:36,322 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:51:08,946 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:17:40,425 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 10063.11it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 18110.12it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:08,948 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
-      "2021-09-08 10:51:08,959 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:08,960 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:17:40,427 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 20:17:40,436 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,438 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -863,27 +864,28 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:08,961 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:08,961 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:51:08,962 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:08,962 Parameters:\n",
-      "2021-09-08 10:51:08,962  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:51:08,962  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:51:08,963  - patience: \"3\"\n",
-      "2021-09-08 10:51:08,963  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:51:08,963  - max_epochs: \"10\"\n",
-      "2021-09-08 10:51:08,964  - shuffle: \"True\"\n",
-      "2021-09-08 10:51:08,964  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:51:08,964  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:51:08,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:08,965 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:51:08,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:08,965 Device: cuda:0\n",
-      "2021-09-08 10:51:08,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:08,966 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:51:08,972 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:09,032 epoch 1 - iter 1/5 - loss 0.74151629 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 10:51:09,083 epoch 1 - iter 2/5 - loss 0.75561517 - samples/sec: 19.99 - lr: 0.020000\n"
+      "2021-09-21 20:17:40,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,439 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:17:40,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,439 Parameters:\n",
+      "2021-09-21 20:17:40,440  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:17:40,440  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:17:40,440  - patience: \"3\"\n",
+      "2021-09-21 20:17:40,441  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:17:40,441  - max_epochs: \"10\"\n",
+      "2021-09-21 20:17:40,441  - shuffle: \"True\"\n",
+      "2021-09-21 20:17:40,441  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:17:40,442  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:17:40,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,442 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:17:40,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,443 Device: cuda:0\n",
+      "2021-09-21 20:17:40,443 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,443 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:17:40,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,503 epoch 1 - iter 1/5 - loss 0.41537464 - samples/sec: 25.02 - lr: 0.020000\n",
+      "2021-09-21 20:17:40,574 epoch 1 - iter 2/5 - loss 0.61964467 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 20:17:40,620 epoch 1 - iter 3/5 - loss 0.73947815 - samples/sec: 21.91 - lr: 0.020000\n"
      ]
     },
     {
@@ -897,122 +899,122 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:09,218 epoch 1 - iter 3/5 - loss 0.84460266 - samples/sec: 7.41 - lr: 0.020000\n",
-      "2021-09-08 10:51:09,273 epoch 1 - iter 4/5 - loss 0.81735291 - samples/sec: 18.34 - lr: 0.020000\n",
-      "2021-09-08 10:51:09,351 epoch 1 - iter 5/5 - loss 0.81986171 - samples/sec: 12.96 - lr: 0.020000\n",
-      "2021-09-08 10:51:09,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:09,352 EPOCH 1 done: loss 0.8199 - lr 0.0200000\n",
-      "2021-09-08 10:51:09,352 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:17:40,667 epoch 1 - iter 4/5 - loss 0.67110490 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:17:40,713 epoch 1 - iter 5/5 - loss 0.72289622 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 20:17:40,714 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:40,714 EPOCH 1 done: loss 0.7229 - lr 0.0200000\n",
+      "2021-09-21 20:17:40,714 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:51:13,259 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:13,324 epoch 2 - iter 1/5 - loss 0.30308953 - samples/sec: 19.82 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,393 epoch 2 - iter 2/5 - loss 0.57159434 - samples/sec: 14.47 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,509 epoch 2 - iter 3/5 - loss 0.61418270 - samples/sec: 8.69 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,556 epoch 2 - iter 4/5 - loss 0.61481271 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,604 epoch 2 - iter 5/5 - loss 0.56151837 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,605 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:13,605 EPOCH 2 done: loss 0.5615 - lr 0.0200000\n",
-      "2021-09-08 10:51:13,606 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:13,608 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:13,667 epoch 3 - iter 1/5 - loss 0.42025504 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,770 epoch 3 - iter 2/5 - loss 0.66077258 - samples/sec: 9.76 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,817 epoch 3 - iter 3/5 - loss 0.49563462 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,880 epoch 3 - iter 4/5 - loss 0.50758385 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,929 epoch 3 - iter 5/5 - loss 0.52326049 - samples/sec: 20.66 - lr: 0.020000\n",
-      "2021-09-08 10:51:13,930 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:13,930 EPOCH 3 done: loss 0.5233 - lr 0.0200000\n",
-      "2021-09-08 10:51:13,931 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:51:13,933 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:13,992 epoch 4 - iter 1/5 - loss 0.23467524 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,053 epoch 4 - iter 2/5 - loss 0.45953584 - samples/sec: 16.41 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,157 epoch 4 - iter 3/5 - loss 0.47198324 - samples/sec: 9.65 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,204 epoch 4 - iter 4/5 - loss 0.44356407 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,252 epoch 4 - iter 5/5 - loss 0.40024242 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,253 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:14,253 EPOCH 4 done: loss 0.4002 - lr 0.0200000\n",
-      "2021-09-08 10:51:14,254 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:51:14,256 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:14,312 epoch 5 - iter 1/5 - loss 0.05795778 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,415 epoch 5 - iter 2/5 - loss 0.31400732 - samples/sec: 9.73 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,464 epoch 5 - iter 3/5 - loss 0.37084770 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,513 epoch 5 - iter 4/5 - loss 0.31888347 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,575 epoch 5 - iter 5/5 - loss 0.40177917 - samples/sec: 16.27 - lr: 0.020000\n",
-      "2021-09-08 10:51:14,576 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:14,576 EPOCH 5 done: loss 0.4018 - lr 0.0200000\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:51:14,577 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:51:14,579 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:14,652 epoch 6 - iter 1/5 - loss 0.66568136 - samples/sec: 16.35 - lr: 0.010000\n",
-      "2021-09-08 10:51:14,756 epoch 6 - iter 2/5 - loss 0.44467872 - samples/sec: 9.68 - lr: 0.010000\n",
-      "2021-09-08 10:51:14,802 epoch 6 - iter 3/5 - loss 0.32207675 - samples/sec: 21.65 - lr: 0.010000\n",
-      "2021-09-08 10:51:14,853 epoch 6 - iter 4/5 - loss 0.24316756 - samples/sec: 19.85 - lr: 0.010000\n",
-      "2021-09-08 10:51:14,929 epoch 6 - iter 5/5 - loss 0.32661292 - samples/sec: 13.23 - lr: 0.010000\n",
-      "2021-09-08 10:51:14,932 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:14,932 EPOCH 6 done: loss 0.3266 - lr 0.0100000\n",
-      "2021-09-08 10:51:14,933 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:14,935 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:15,002 epoch 7 - iter 1/5 - loss 0.00478988 - samples/sec: 21.37 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,052 epoch 7 - iter 2/5 - loss 0.25637358 - samples/sec: 20.04 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,098 epoch 7 - iter 3/5 - loss 0.17416776 - samples/sec: 22.29 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,205 epoch 7 - iter 4/5 - loss 0.20773280 - samples/sec: 9.37 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,269 epoch 7 - iter 5/5 - loss 0.27453785 - samples/sec: 15.85 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:15,270 EPOCH 7 done: loss 0.2745 - lr 0.0100000\n",
-      "2021-09-08 10:51:15,270 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:51:15,272 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:15,391 epoch 8 - iter 1/5 - loss 0.13863075 - samples/sec: 9.48 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,437 epoch 8 - iter 2/5 - loss 0.08543990 - samples/sec: 22.20 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,500 epoch 8 - iter 3/5 - loss 0.29051038 - samples/sec: 16.02 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,546 epoch 8 - iter 4/5 - loss 0.22033175 - samples/sec: 22.02 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,594 epoch 8 - iter 5/5 - loss 0.24179691 - samples/sec: 20.69 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:15,596 EPOCH 8 done: loss 0.2418 - lr 0.0100000\n",
-      "2021-09-08 10:51:15,596 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:51:15,598 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:15,720 epoch 9 - iter 1/5 - loss 0.10134868 - samples/sec: 9.24 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,767 epoch 9 - iter 2/5 - loss 0.05525517 - samples/sec: 21.54 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,816 epoch 9 - iter 3/5 - loss 0.11619996 - samples/sec: 20.52 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,885 epoch 9 - iter 4/5 - loss 0.24326137 - samples/sec: 14.61 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,931 epoch 9 - iter 5/5 - loss 0.19588992 - samples/sec: 21.87 - lr: 0.010000\n",
-      "2021-09-08 10:51:15,932 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:15,933 EPOCH 9 done: loss 0.1959 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:51:15,933 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:51:16,014 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:51:16,076 epoch 10 - iter 1/5 - loss 0.07843979 - samples/sec: 20.90 - lr: 0.005000\n",
-      "2021-09-08 10:51:16,141 epoch 10 - iter 2/5 - loss 0.21659216 - samples/sec: 15.53 - lr: 0.005000\n",
-      "2021-09-08 10:51:16,193 epoch 10 - iter 3/5 - loss 0.17357988 - samples/sec: 19.42 - lr: 0.005000\n",
-      "2021-09-08 10:51:16,244 epoch 10 - iter 4/5 - loss 0.13795198 - samples/sec: 19.46 - lr: 0.005000\n",
-      "2021-09-08 10:51:16,346 epoch 10 - iter 5/5 - loss 0.11913444 - samples/sec: 9.86 - lr: 0.005000\n",
-      "2021-09-08 10:51:16,347 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:17:47,229 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:47,290 epoch 2 - iter 1/5 - loss 1.44298458 - samples/sec: 21.11 - lr: 0.020000\n",
+      "2021-09-21 20:17:47,337 epoch 2 - iter 2/5 - loss 1.00261864 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 20:17:47,410 epoch 2 - iter 3/5 - loss 0.85533319 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 20:17:47,456 epoch 2 - iter 4/5 - loss 0.67867164 - samples/sec: 21.95 - lr: 0.020000\n",
+      "2021-09-21 20:17:47,502 epoch 2 - iter 5/5 - loss 0.79993468 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:17:47,503 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:47,503 EPOCH 2 done: loss 0.7999 - lr 0.0200000\n",
+      "2021-09-21 20:17:47,504 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:17:55,287 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:55,350 epoch 3 - iter 1/5 - loss 0.43464997 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,422 epoch 3 - iter 2/5 - loss 0.61133973 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,468 epoch 3 - iter 3/5 - loss 0.46648245 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,515 epoch 3 - iter 4/5 - loss 0.48495338 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,561 epoch 3 - iter 5/5 - loss 0.44868982 - samples/sec: 21.88 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,562 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:55,562 EPOCH 3 done: loss 0.4487 - lr 0.0200000\n",
+      "2021-09-21 20:17:55,563 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:17:55,565 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:55,623 epoch 4 - iter 1/5 - loss 0.68136621 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,671 epoch 4 - iter 2/5 - loss 0.43958813 - samples/sec: 21.08 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,731 epoch 4 - iter 3/5 - loss 0.30152478 - samples/sec: 16.86 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,777 epoch 4 - iter 4/5 - loss 0.49112798 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,823 epoch 4 - iter 5/5 - loss 0.56916266 - samples/sec: 22.00 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:55,824 EPOCH 4 done: loss 0.5692 - lr 0.0200000\n",
+      "2021-09-21 20:17:55,825 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:17:55,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:55,885 epoch 5 - iter 1/5 - loss 0.49251926 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,931 epoch 5 - iter 2/5 - loss 0.55742452 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 20:17:55,994 epoch 5 - iter 3/5 - loss 0.72103661 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,040 epoch 5 - iter 4/5 - loss 0.57173727 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,087 epoch 5 - iter 5/5 - loss 0.50917937 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,087 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,088 EPOCH 5 done: loss 0.5092 - lr 0.0200000\n",
+      "2021-09-21 20:17:56,088 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:17:56,090 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,150 epoch 6 - iter 1/5 - loss 0.81968802 - samples/sec: 20.90 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,197 epoch 6 - iter 2/5 - loss 0.66821331 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,243 epoch 6 - iter 3/5 - loss 0.50651985 - samples/sec: 21.83 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,306 epoch 6 - iter 4/5 - loss 0.50908405 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,352 epoch 6 - iter 5/5 - loss 0.52911738 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:17:56,353 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,353 EPOCH 6 done: loss 0.5291 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:17:56,354 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:17:56,356 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,410 epoch 7 - iter 1/5 - loss 0.10097047 - samples/sec: 23.56 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,453 epoch 7 - iter 2/5 - loss 0.07047999 - samples/sec: 23.61 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,516 epoch 7 - iter 3/5 - loss 0.10461356 - samples/sec: 16.02 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,562 epoch 7 - iter 4/5 - loss 0.12340903 - samples/sec: 21.89 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,610 epoch 7 - iter 5/5 - loss 0.17579143 - samples/sec: 20.86 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,611 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,612 EPOCH 7 done: loss 0.1758 - lr 0.0100000\n",
+      "2021-09-21 20:17:56,612 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:17:56,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,674 epoch 8 - iter 1/5 - loss 0.09204919 - samples/sec: 21.84 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,737 epoch 8 - iter 2/5 - loss 0.09023535 - samples/sec: 15.93 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,783 epoch 8 - iter 3/5 - loss 0.08524602 - samples/sec: 21.99 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,829 epoch 8 - iter 4/5 - loss 0.29025486 - samples/sec: 21.81 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,872 epoch 8 - iter 5/5 - loss 0.23435949 - samples/sec: 23.48 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,873 EPOCH 8 done: loss 0.2344 - lr 0.0100000\n",
+      "2021-09-21 20:17:56,874 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:17:56,876 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:56,930 epoch 9 - iter 1/5 - loss 0.00738552 - samples/sec: 23.61 - lr: 0.010000\n",
+      "2021-09-21 20:17:56,973 epoch 9 - iter 2/5 - loss 0.02862018 - samples/sec: 23.64 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,018 epoch 9 - iter 3/5 - loss 0.02561698 - samples/sec: 22.51 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,078 epoch 9 - iter 4/5 - loss 0.02490869 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,126 epoch 9 - iter 5/5 - loss 0.06522868 - samples/sec: 20.97 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,127 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:57,127 EPOCH 9 done: loss 0.0652 - lr 0.0100000\n",
+      "2021-09-21 20:17:57,127 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:17:57,129 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:57,187 epoch 10 - iter 1/5 - loss 0.36578920 - samples/sec: 21.95 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,230 epoch 10 - iter 2/5 - loss 0.19244222 - samples/sec: 23.50 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,273 epoch 10 - iter 3/5 - loss 0.13435782 - samples/sec: 23.33 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,333 epoch 10 - iter 4/5 - loss 0.10552966 - samples/sec: 16.91 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,376 epoch 10 - iter 5/5 - loss 0.09467089 - samples/sec: 23.57 - lr: 0.010000\n",
+      "2021-09-21 20:17:57,377 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:17:57,377 EPOCH 10 done: loss 0.0947 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:17:57,378 BAD EPOCHS (no improvement): 4\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:51:16,348 EPOCH 10 done: loss 0.1191 - lr 0.0050000\n",
-      "2021-09-08 10:51:16,348 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:51:25,470 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:51:55,692 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:18:03,279 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:18:33,563 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:52:00,375 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:18:37,573 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 13981.01it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 13644.45it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:00,377 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
-      "2021-09-08 10:52:00,513 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:00,515 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:18:37,575 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 20:18:37,584 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,585 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1325,24 +1327,28 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:00,515 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:00,515 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:52:00,516 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:00,516 Parameters:\n",
-      "2021-09-08 10:52:00,516  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:52:00,517  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:52:00,517  - patience: \"3\"\n",
-      "2021-09-08 10:52:00,517  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:52:00,517  - max_epochs: \"10\"\n",
-      "2021-09-08 10:52:00,518  - shuffle: \"True\"\n",
-      "2021-09-08 10:52:00,518  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:52:00,518  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:52:00,519 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:00,519 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:52:00,519 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:00,519 Device: cuda:0\n",
-      "2021-09-08 10:52:00,520 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:00,520 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:18:37,586 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,586 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:18:37,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,587 Parameters:\n",
+      "2021-09-21 20:18:37,587  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:18:37,587  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:18:37,588  - patience: \"3\"\n",
+      "2021-09-21 20:18:37,588  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:18:37,588  - max_epochs: \"10\"\n",
+      "2021-09-21 20:18:37,589  - shuffle: \"True\"\n",
+      "2021-09-21 20:18:37,589  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:18:37,589  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:18:37,589 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,590 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:18:37,590 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,590 Device: cuda:0\n",
+      "2021-09-21 20:18:37,590 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,591 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:18:37,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,656 epoch 1 - iter 1/5 - loss 0.63101697 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:18:37,703 epoch 1 - iter 2/5 - loss 0.56027211 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:18:37,766 epoch 1 - iter 3/5 - loss 0.84181175 - samples/sec: 15.91 - lr: 0.020000\n"
      ]
     },
     {
@@ -1356,126 +1362,123 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:00,751 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:00,811 epoch 1 - iter 1/5 - loss 0.16422738 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 10:52:00,878 epoch 1 - iter 2/5 - loss 0.55375985 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 10:52:00,932 epoch 1 - iter 3/5 - loss 0.74990589 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 10:52:00,980 epoch 1 - iter 4/5 - loss 0.57254801 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 10:52:01,039 epoch 1 - iter 5/5 - loss 0.49401435 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 10:52:01,040 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:01,040 EPOCH 1 done: loss 0.4940 - lr 0.0200000\n",
-      "2021-09-08 10:52:01,041 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:18:37,813 epoch 1 - iter 4/5 - loss 0.71687800 - samples/sec: 21.33 - lr: 0.020000\n",
+      "2021-09-21 20:18:37,862 epoch 1 - iter 5/5 - loss 0.61039341 - samples/sec: 20.80 - lr: 0.020000\n",
+      "2021-09-21 20:18:37,863 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:37,863 EPOCH 1 done: loss 0.6104 - lr 0.0200000\n",
+      "2021-09-21 20:18:37,864 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:18:42,418 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:42,496 epoch 2 - iter 1/5 - loss 0.72073644 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 20:18:42,545 epoch 2 - iter 2/5 - loss 0.96645531 - samples/sec: 20.37 - lr: 0.020000\n",
+      "2021-09-21 20:18:42,591 epoch 2 - iter 3/5 - loss 0.76419100 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 20:18:42,643 epoch 2 - iter 4/5 - loss 0.75785264 - samples/sec: 19.40 - lr: 0.020000\n",
+      "2021-09-21 20:18:42,690 epoch 2 - iter 5/5 - loss 0.63697202 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 20:18:42,691 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:42,691 EPOCH 2 done: loss 0.6370 - lr 0.0200000\n",
+      "2021-09-21 20:18:42,692 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:52:07,400 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:07,465 epoch 2 - iter 1/5 - loss 0.05141036 - samples/sec: 19.96 - lr: 0.020000\n",
-      "2021-09-08 10:52:07,513 epoch 2 - iter 2/5 - loss 0.31729560 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 10:52:07,560 epoch 2 - iter 3/5 - loss 0.43217883 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 10:52:07,622 epoch 2 - iter 4/5 - loss 0.38379015 - samples/sec: 16.30 - lr: 0.020000\n",
-      "2021-09-08 10:52:07,671 epoch 2 - iter 5/5 - loss 0.51368261 - samples/sec: 20.71 - lr: 0.020000\n",
-      "2021-09-08 10:52:07,671 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:07,672 EPOCH 2 done: loss 0.5137 - lr 0.0200000\n",
-      "2021-09-08 10:52:07,672 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:18:55,490 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:55,560 epoch 3 - iter 1/5 - loss 0.73642701 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 20:18:55,610 epoch 3 - iter 2/5 - loss 0.76998603 - samples/sec: 20.35 - lr: 0.020000\n",
+      "2021-09-21 20:18:55,656 epoch 3 - iter 3/5 - loss 0.56811066 - samples/sec: 21.75 - lr: 0.020000\n",
+      "2021-09-21 20:18:55,717 epoch 3 - iter 4/5 - loss 0.76363891 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 20:18:55,764 epoch 3 - iter 5/5 - loss 0.70335011 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 20:18:55,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:55,765 EPOCH 3 done: loss 0.7034 - lr 0.0200000\n",
+      "2021-09-21 20:18:55,765 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:52:12,891 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:12,982 epoch 3 - iter 1/5 - loss 0.41854474 - samples/sec: 15.37 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,051 epoch 3 - iter 2/5 - loss 0.25414625 - samples/sec: 14.45 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,109 epoch 3 - iter 3/5 - loss 0.42243103 - samples/sec: 17.61 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,153 epoch 3 - iter 4/5 - loss 0.32002418 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,202 epoch 3 - iter 5/5 - loss 0.45032500 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,203 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:13,204 EPOCH 3 done: loss 0.4503 - lr 0.0200000\n",
-      "2021-09-08 10:52:13,204 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:13,206 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:13,270 epoch 4 - iter 1/5 - loss 1.39928091 - samples/sec: 20.08 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,330 epoch 4 - iter 2/5 - loss 1.09092909 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,382 epoch 4 - iter 3/5 - loss 1.07755836 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,433 epoch 4 - iter 4/5 - loss 0.90097475 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,484 epoch 4 - iter 5/5 - loss 0.76934597 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 10:52:13,485 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:13,485 EPOCH 4 done: loss 0.7693 - lr 0.0200000\n",
-      "2021-09-08 10:52:13,486 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:18:59,865 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:18:59,944 epoch 4 - iter 1/5 - loss 0.51650929 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,000 epoch 4 - iter 2/5 - loss 0.64176866 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,048 epoch 4 - iter 3/5 - loss 0.54106977 - samples/sec: 20.72 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,092 epoch 4 - iter 4/5 - loss 0.41869693 - samples/sec: 23.07 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,135 epoch 4 - iter 5/5 - loss 0.33634724 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,137 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:00,137 EPOCH 4 done: loss 0.3363 - lr 0.0200000\n",
+      "2021-09-21 20:19:00,137 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:19:00,140 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:00,200 epoch 5 - iter 1/5 - loss 0.50690579 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,258 epoch 5 - iter 2/5 - loss 0.45292170 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,309 epoch 5 - iter 3/5 - loss 0.49438952 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,355 epoch 5 - iter 4/5 - loss 0.61027824 - samples/sec: 21.83 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,402 epoch 5 - iter 5/5 - loss 0.77005090 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 20:19:00,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:00,403 EPOCH 5 done: loss 0.7701 - lr 0.0200000\n",
+      "2021-09-21 20:19:00,404 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:52:20,414 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:20,476 epoch 5 - iter 1/5 - loss 0.28023306 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,521 epoch 5 - iter 2/5 - loss 0.16183762 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,585 epoch 5 - iter 3/5 - loss 0.16861655 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,634 epoch 5 - iter 4/5 - loss 0.19049374 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,681 epoch 5 - iter 5/5 - loss 0.19988657 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 10:52:20,682 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:20,682 EPOCH 5 done: loss 0.1999 - lr 0.0200000\n",
-      "2021-09-08 10:52:20,683 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:28,350 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,440 epoch 6 - iter 1/5 - loss 1.16733444 - samples/sec: 13.43 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,506 epoch 6 - iter 2/5 - loss 0.90663165 - samples/sec: 15.35 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,561 epoch 6 - iter 3/5 - loss 0.62543542 - samples/sec: 18.45 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,610 epoch 6 - iter 4/5 - loss 0.69614078 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,690 epoch 6 - iter 5/5 - loss 0.57737721 - samples/sec: 12.62 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,691 EPOCH 6 done: loss 0.5774 - lr 0.0200000\n",
-      "2021-09-08 10:52:28,691 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:52:28,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,776 epoch 7 - iter 1/5 - loss 0.02057446 - samples/sec: 17.20 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,827 epoch 7 - iter 2/5 - loss 0.30694148 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,878 epoch 7 - iter 3/5 - loss 0.28698379 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,926 epoch 7 - iter 4/5 - loss 0.22538951 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,984 epoch 7 - iter 5/5 - loss 0.46522131 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 10:52:28,985 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:28,986 EPOCH 7 done: loss 0.4652 - lr 0.0200000\n",
-      "2021-09-08 10:52:28,986 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:52:28,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:29,059 epoch 8 - iter 1/5 - loss 0.28798041 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,106 epoch 8 - iter 2/5 - loss 0.16675889 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,157 epoch 8 - iter 3/5 - loss 0.26891188 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,204 epoch 8 - iter 4/5 - loss 0.20304662 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,259 epoch 8 - iter 5/5 - loss 0.40055786 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 10:52:29,260 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:29,261 EPOCH 8 done: loss 0.4006 - lr 0.0200000\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:52:29,261 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:52:29,265 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:29,326 epoch 9 - iter 1/5 - loss 0.67449600 - samples/sec: 20.84 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,374 epoch 9 - iter 2/5 - loss 0.62775898 - samples/sec: 20.92 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,424 epoch 9 - iter 3/5 - loss 0.56207584 - samples/sec: 20.43 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,476 epoch 9 - iter 4/5 - loss 0.42660274 - samples/sec: 19.21 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,520 epoch 9 - iter 5/5 - loss 0.34667015 - samples/sec: 23.04 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,521 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:29,522 EPOCH 9 done: loss 0.3467 - lr 0.0100000\n",
-      "2021-09-08 10:52:29,522 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:52:29,599 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:29,661 epoch 10 - iter 1/5 - loss 0.23174746 - samples/sec: 20.63 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,715 epoch 10 - iter 2/5 - loss 0.12982339 - samples/sec: 18.46 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,760 epoch 10 - iter 3/5 - loss 0.09796890 - samples/sec: 22.77 - lr: 0.010000\n"
+      "2021-09-21 20:19:04,403 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:04,464 epoch 6 - iter 1/5 - loss 0.44553983 - samples/sec: 21.27 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,513 epoch 6 - iter 2/5 - loss 0.31759205 - samples/sec: 20.77 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,575 epoch 6 - iter 3/5 - loss 0.27292194 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,624 epoch 6 - iter 4/5 - loss 0.30591525 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,679 epoch 6 - iter 5/5 - loss 0.38867379 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,680 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:04,681 EPOCH 6 done: loss 0.3887 - lr 0.0200000\n",
+      "2021-09-21 20:19:04,681 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:19:04,683 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:04,741 epoch 7 - iter 1/5 - loss 0.39670691 - samples/sec: 21.96 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,794 epoch 7 - iter 2/5 - loss 0.24950513 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,841 epoch 7 - iter 3/5 - loss 0.23549730 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,893 epoch 7 - iter 4/5 - loss 0.40207017 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,937 epoch 7 - iter 5/5 - loss 0.32769412 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 20:19:04,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:04,938 EPOCH 7 done: loss 0.3277 - lr 0.0200000\n",
+      "2021-09-21 20:19:04,939 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:19:04,940 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:04,995 epoch 8 - iter 1/5 - loss 0.03532154 - samples/sec: 23.67 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,039 epoch 8 - iter 2/5 - loss 0.01916747 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,089 epoch 8 - iter 3/5 - loss 0.25422798 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,146 epoch 8 - iter 4/5 - loss 0.21380654 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,189 epoch 8 - iter 5/5 - loss 0.17229971 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,190 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:05,191 EPOCH 8 done: loss 0.1723 - lr 0.0200000\n",
+      "2021-09-21 20:19:05,191 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:19:05,193 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:05,250 epoch 9 - iter 1/5 - loss 0.00114195 - samples/sec: 22.05 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,306 epoch 9 - iter 2/5 - loss 0.10829586 - samples/sec: 18.01 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,353 epoch 9 - iter 3/5 - loss 0.59507791 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,404 epoch 9 - iter 4/5 - loss 0.60428356 - samples/sec: 19.81 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,447 epoch 9 - iter 5/5 - loss 0.48684857 - samples/sec: 23.57 - lr: 0.020000\n",
+      "2021-09-21 20:19:05,448 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:05,448 EPOCH 9 done: loss 0.4868 - lr 0.0200000\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:19:05,448 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:19:05,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:05,505 epoch 10 - iter 1/5 - loss 0.00657638 - samples/sec: 23.72 - lr: 0.010000\n",
+      "2021-09-21 20:19:05,552 epoch 10 - iter 2/5 - loss 0.11924902 - samples/sec: 21.49 - lr: 0.010000\n",
+      "2021-09-21 20:19:05,601 epoch 10 - iter 3/5 - loss 0.19701607 - samples/sec: 20.64 - lr: 0.010000\n",
+      "2021-09-21 20:19:05,654 epoch 10 - iter 4/5 - loss 0.15395844 - samples/sec: 18.74 - lr: 0.010000\n",
+      "2021-09-21 20:19:05,705 epoch 10 - iter 5/5 - loss 0.26874923 - samples/sec: 19.73 - lr: 0.010000\n",
+      "2021-09-21 20:19:05,706 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:05,707 EPOCH 10 done: loss 0.2687 - lr 0.0100000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:52:29,807 epoch 10 - iter 4/5 - loss 0.15211714 - samples/sec: 21.16 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,851 epoch 10 - iter 5/5 - loss 0.12964598 - samples/sec: 23.09 - lr: 0.010000\n",
-      "2021-09-08 10:52:29,852 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:52:29,852 EPOCH 10 done: loss 0.1296 - lr 0.0100000\n",
-      "2021-09-08 10:52:29,853 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:52:43,197 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:53:16,217 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:19:05,707 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:19:09,742 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:19:39,882 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:53:20,231 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:19:44,399 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 12648.69it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 15442.95it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:20,233 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
-      "2021-09-08 10:53:20,424 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,426 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:19:44,400 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
+      "2021-09-21 20:19:44,409 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,411 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1788,24 +1791,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:20,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,427 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:53:20,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,427 Parameters:\n",
-      "2021-09-08 10:53:20,428  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:53:20,428  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:53:20,428  - patience: \"3\"\n",
-      "2021-09-08 10:53:20,429  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:53:20,429  - max_epochs: \"10\"\n",
-      "2021-09-08 10:53:20,429  - shuffle: \"True\"\n",
-      "2021-09-08 10:53:20,429  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:53:20,430  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:53:20,430 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,430 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:53:20,430 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,431 Device: cuda:0\n",
-      "2021-09-08 10:53:20,431 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,431 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:19:44,411 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,412 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:19:44,412 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,412 Parameters:\n",
+      "2021-09-21 20:19:44,412  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:19:44,413  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:19:44,413  - patience: \"3\"\n",
+      "2021-09-21 20:19:44,413  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:19:44,414  - max_epochs: \"10\"\n",
+      "2021-09-21 20:19:44,414  - shuffle: \"True\"\n",
+      "2021-09-21 20:19:44,414  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:19:44,414  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:19:44,415 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,415 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:19:44,415 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,416 Device: cuda:0\n",
+      "2021-09-21 20:19:44,416 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,416 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:19:44,422 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,534 epoch 1 - iter 1/5 - loss 0.52977234 - samples/sec: 14.89 - lr: 0.020000\n"
      ]
     },
     {
@@ -1819,125 +1824,137 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:20,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,695 epoch 1 - iter 1/5 - loss 0.59035867 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 10:53:20,757 epoch 1 - iter 2/5 - loss 0.78867671 - samples/sec: 16.38 - lr: 0.020000\n",
-      "2021-09-08 10:53:20,808 epoch 1 - iter 3/5 - loss 0.72998971 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 10:53:20,865 epoch 1 - iter 4/5 - loss 0.72250566 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 10:53:20,919 epoch 1 - iter 5/5 - loss 0.75916907 - samples/sec: 18.71 - lr: 0.020000\n",
-      "2021-09-08 10:53:20,921 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:20,921 EPOCH 1 done: loss 0.7592 - lr 0.0200000\n",
-      "2021-09-08 10:53:20,921 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:19:44,600 epoch 1 - iter 2/5 - loss 0.77040693 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,654 epoch 1 - iter 3/5 - loss 0.70614664 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,737 epoch 1 - iter 4/5 - loss 0.69629769 - samples/sec: 12.20 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,806 epoch 1 - iter 5/5 - loss 0.74078341 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,807 EPOCH 1 done: loss 0.7408 - lr 0.0200000\n",
+      "2021-09-21 20:19:44,807 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:53:34,217 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,309 epoch 2 - iter 1/5 - loss 0.63472140 - samples/sec: 15.55 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,383 epoch 2 - iter 2/5 - loss 0.88915461 - samples/sec: 13.64 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,475 epoch 2 - iter 3/5 - loss 0.70161228 - samples/sec: 10.96 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,537 epoch 2 - iter 4/5 - loss 0.70929465 - samples/sec: 16.26 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,617 epoch 2 - iter 5/5 - loss 0.58108556 - samples/sec: 12.59 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,618 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,618 EPOCH 2 done: loss 0.5811 - lr 0.0200000\n",
-      "2021-09-08 10:53:34,619 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:34,621 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,717 epoch 3 - iter 1/5 - loss 0.24991019 - samples/sec: 12.26 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,799 epoch 3 - iter 2/5 - loss 0.44019148 - samples/sec: 12.32 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,847 epoch 3 - iter 3/5 - loss 0.53253695 - samples/sec: 20.88 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,898 epoch 3 - iter 4/5 - loss 0.47545492 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,949 epoch 3 - iter 5/5 - loss 0.56071862 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 10:53:34,951 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:34,951 EPOCH 3 done: loss 0.5607 - lr 0.0200000\n",
-      "2021-09-08 10:53:34,951 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:53:35,018 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,091 epoch 4 - iter 1/5 - loss 0.07671461 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,143 epoch 4 - iter 2/5 - loss 0.15964102 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,205 epoch 4 - iter 3/5 - loss 0.29229573 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,256 epoch 4 - iter 4/5 - loss 0.31520336 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,305 epoch 4 - iter 5/5 - loss 0.29775982 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,306 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,306 EPOCH 4 done: loss 0.2978 - lr 0.0200000\n",
-      "2021-09-08 10:53:35,307 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:53:35,381 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,441 epoch 5 - iter 1/5 - loss 0.01469757 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,486 epoch 5 - iter 2/5 - loss 0.03598190 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,546 epoch 5 - iter 3/5 - loss 0.02485176 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,601 epoch 5 - iter 4/5 - loss 0.09499571 - samples/sec: 18.31 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,648 epoch 5 - iter 5/5 - loss 0.20229133 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 10:53:35,649 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,650 EPOCH 5 done: loss 0.2023 - lr 0.0200000\n",
+      "2021-09-21 20:19:49,063 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:49,125 epoch 2 - iter 1/5 - loss 0.69179183 - samples/sec: 20.98 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,173 epoch 2 - iter 2/5 - loss 0.50124599 - samples/sec: 21.00 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,221 epoch 2 - iter 3/5 - loss 0.44069084 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,273 epoch 2 - iter 4/5 - loss 0.48155122 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,328 epoch 2 - iter 5/5 - loss 0.50795147 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,329 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:49,329 EPOCH 2 done: loss 0.5080 - lr 0.0200000\n",
+      "2021-09-21 20:19:49,329 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:19:49,398 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:49,460 epoch 3 - iter 1/5 - loss 0.09960949 - samples/sec: 20.91 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,508 epoch 3 - iter 2/5 - loss 0.21669520 - samples/sec: 21.08 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,560 epoch 3 - iter 3/5 - loss 0.25691261 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,609 epoch 3 - iter 4/5 - loss 0.30911971 - samples/sec: 20.60 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,661 epoch 3 - iter 5/5 - loss 0.37521693 - samples/sec: 19.05 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,663 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:49,663 EPOCH 3 done: loss 0.3752 - lr 0.0200000\n",
+      "2021-09-21 20:19:49,663 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:19:49,735 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:49,797 epoch 4 - iter 1/5 - loss 0.30386367 - samples/sec: 20.37 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,844 epoch 4 - iter 2/5 - loss 0.16012739 - samples/sec: 21.54 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,891 epoch 4 - iter 3/5 - loss 0.60008843 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,936 epoch 4 - iter 4/5 - loss 0.45145358 - samples/sec: 22.28 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,984 epoch 4 - iter 5/5 - loss 0.43867820 - samples/sec: 21.00 - lr: 0.020000\n",
+      "2021-09-21 20:19:49,985 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:49,985 EPOCH 4 done: loss 0.4387 - lr 0.0200000\n",
+      "2021-09-21 20:19:49,986 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:19:50,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:50,115 epoch 5 - iter 1/5 - loss 0.88890129 - samples/sec: 20.53 - lr: 0.020000\n",
+      "2021-09-21 20:19:50,166 epoch 5 - iter 2/5 - loss 0.52096581 - samples/sec: 20.00 - lr: 0.020000\n",
+      "2021-09-21 20:19:50,216 epoch 5 - iter 3/5 - loss 0.38207864 - samples/sec: 20.17 - lr: 0.020000\n",
+      "2021-09-21 20:19:50,260 epoch 5 - iter 4/5 - loss 0.29760381 - samples/sec: 23.02 - lr: 0.020000\n",
+      "2021-09-21 20:19:50,306 epoch 5 - iter 5/5 - loss 0.29284886 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 20:19:50,307 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:50,307 EPOCH 5 done: loss 0.2928 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:53:35,650 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:53:35,752 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:35,811 epoch 6 - iter 1/5 - loss 0.91895002 - samples/sec: 21.29 - lr: 0.010000\n",
-      "2021-09-08 10:53:35,858 epoch 6 - iter 2/5 - loss 0.49291677 - samples/sec: 21.47 - lr: 0.010000\n",
-      "2021-09-08 10:53:35,918 epoch 6 - iter 3/5 - loss 0.66700348 - samples/sec: 16.86 - lr: 0.010000\n",
-      "2021-09-08 10:53:35,970 epoch 6 - iter 4/5 - loss 0.50116666 - samples/sec: 19.53 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,018 epoch 6 - iter 5/5 - loss 0.65083722 - samples/sec: 21.07 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,019 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:36,019 EPOCH 6 done: loss 0.6508 - lr 0.0100000\n",
-      "2021-09-08 10:53:36,019 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:36,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:36,173 epoch 7 - iter 1/5 - loss 0.38687956 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,221 epoch 7 - iter 2/5 - loss 0.59539816 - samples/sec: 21.14 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,272 epoch 7 - iter 3/5 - loss 0.40700482 - samples/sec: 19.52 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,316 epoch 7 - iter 4/5 - loss 0.31130297 - samples/sec: 23.17 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,375 epoch 7 - iter 5/5 - loss 0.32622709 - samples/sec: 16.87 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,376 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:36,377 EPOCH 7 done: loss 0.3262 - lr 0.0100000\n",
-      "2021-09-08 10:53:36,377 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:53:36,476 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:36,551 epoch 8 - iter 1/5 - loss 0.25319791 - samples/sec: 16.07 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,603 epoch 8 - iter 2/5 - loss 0.14272746 - samples/sec: 19.50 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,650 epoch 8 - iter 3/5 - loss 0.14160472 - samples/sec: 21.15 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,698 epoch 8 - iter 4/5 - loss 0.13599756 - samples/sec: 21.37 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,741 epoch 8 - iter 5/5 - loss 0.11060945 - samples/sec: 23.20 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,742 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:36,743 EPOCH 8 done: loss 0.1106 - lr 0.0100000\n",
-      "2021-09-08 10:53:36,743 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:53:36,857 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:36,924 epoch 9 - iter 1/5 - loss 0.07542677 - samples/sec: 18.38 - lr: 0.010000\n",
-      "2021-09-08 10:53:36,972 epoch 9 - iter 2/5 - loss 0.39602990 - samples/sec: 21.13 - lr: 0.010000\n",
-      "2021-09-08 10:53:37,031 epoch 9 - iter 3/5 - loss 0.26584038 - samples/sec: 17.06 - lr: 0.010000\n",
-      "2021-09-08 10:53:37,078 epoch 9 - iter 4/5 - loss 0.46361474 - samples/sec: 21.38 - lr: 0.010000\n",
-      "2021-09-08 10:53:37,122 epoch 9 - iter 5/5 - loss 0.37424885 - samples/sec: 23.21 - lr: 0.010000\n",
-      "2021-09-08 10:53:37,123 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:37,123 EPOCH 9 done: loss 0.3742 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:53:37,124 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:53:37,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:37,317 epoch 10 - iter 1/5 - loss 0.01036552 - samples/sec: 23.12 - lr: 0.005000\n",
-      "2021-09-08 10:53:37,365 epoch 10 - iter 2/5 - loss 0.27023544 - samples/sec: 21.18 - lr: 0.005000\n",
-      "2021-09-08 10:53:37,425 epoch 10 - iter 3/5 - loss 0.36697822 - samples/sec: 16.86 - lr: 0.005000\n"
+      "2021-09-21 20:19:50,308 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:19:50,368 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:50,425 epoch 6 - iter 1/5 - loss 0.01190674 - samples/sec: 22.98 - lr: 0.010000\n",
+      "2021-09-21 20:19:50,471 epoch 6 - iter 2/5 - loss 0.54852318 - samples/sec: 21.84 - lr: 0.010000\n",
+      "2021-09-21 20:19:50,518 epoch 6 - iter 3/5 - loss 0.76257527 - samples/sec: 21.43 - lr: 0.010000\n",
+      "2021-09-21 20:19:50,567 epoch 6 - iter 4/5 - loss 0.86203998 - samples/sec: 20.69 - lr: 0.010000\n",
+      "2021-09-21 20:19:50,615 epoch 6 - iter 5/5 - loss 0.76026783 - samples/sec: 20.85 - lr: 0.010000\n",
+      "2021-09-21 20:19:50,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:50,617 EPOCH 6 done: loss 0.7603 - lr 0.0100000\n",
+      "2021-09-21 20:19:50,617 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:20:02,062 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:02,134 epoch 7 - iter 1/5 - loss 0.08245702 - samples/sec: 17.34 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,185 epoch 7 - iter 2/5 - loss 0.07031591 - samples/sec: 20.03 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,234 epoch 7 - iter 3/5 - loss 0.37748793 - samples/sec: 20.93 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,283 epoch 7 - iter 4/5 - loss 0.53750514 - samples/sec: 20.68 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,329 epoch 7 - iter 5/5 - loss 0.43317782 - samples/sec: 22.35 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,330 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:02,331 EPOCH 7 done: loss 0.4332 - lr 0.0100000\n",
+      "2021-09-21 20:20:02,331 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:20:02,335 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:02,400 epoch 8 - iter 1/5 - loss 0.87819809 - samples/sec: 19.51 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,449 epoch 8 - iter 2/5 - loss 0.51534145 - samples/sec: 20.78 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,495 epoch 8 - iter 3/5 - loss 0.34963877 - samples/sec: 22.25 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,540 epoch 8 - iter 4/5 - loss 0.27437142 - samples/sec: 22.63 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,593 epoch 8 - iter 5/5 - loss 0.30899839 - samples/sec: 19.17 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,595 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:02,595 EPOCH 8 done: loss 0.3090 - lr 0.0100000\n",
+      "2021-09-21 20:20:02,596 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:20:02,598 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:02,656 epoch 9 - iter 1/5 - loss 0.05718693 - samples/sec: 22.11 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,705 epoch 9 - iter 2/5 - loss 0.09963684 - samples/sec: 21.08 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,751 epoch 9 - iter 3/5 - loss 0.07038192 - samples/sec: 21.93 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,812 epoch 9 - iter 4/5 - loss 0.08030871 - samples/sec: 16.55 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,866 epoch 9 - iter 5/5 - loss 0.20804642 - samples/sec: 18.90 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,867 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:02,868 EPOCH 9 done: loss 0.2080 - lr 0.0100000\n",
+      "2021-09-21 20:20:02,868 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:20:02,870 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:02,932 epoch 10 - iter 1/5 - loss 0.03095348 - samples/sec: 20.61 - lr: 0.010000\n",
+      "2021-09-21 20:20:02,983 epoch 10 - iter 2/5 - loss 0.24361614 - samples/sec: 19.80 - lr: 0.010000\n",
+      "2021-09-21 20:20:03,031 epoch 10 - iter 3/5 - loss 0.18567501 - samples/sec: 21.00 - lr: 0.010000\n",
+      "2021-09-21 20:20:03,080 epoch 10 - iter 4/5 - loss 0.14214650 - samples/sec: 20.81 - lr: 0.010000\n",
+      "2021-09-21 20:20:03,128 epoch 10 - iter 5/5 - loss 0.32758348 - samples/sec: 21.28 - lr: 0.010000\n",
+      "2021-09-21 20:20:03,131 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:53:37,472 epoch 10 - iter 4/5 - loss 0.34095885 - samples/sec: 21.33 - lr: 0.005000\n",
-      "2021-09-08 10:53:37,530 epoch 10 - iter 5/5 - loss 0.28855971 - samples/sec: 17.51 - lr: 0.005000\n",
-      "2021-09-08 10:53:37,531 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:53:37,531 EPOCH 10 done: loss 0.2886 - lr 0.0050000\n",
-      "2021-09-08 10:53:37,532 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:53:48,948 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:54:21,057 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:20:03,131 EPOCH 10 done: loss 0.3276 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:20:03,132 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:20:07,303 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:20:44,086 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:54:25,595 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:20:48,037 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 9379.03it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 14685.94it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:25,597 [b'awful', b'bad', b'neutral', b'good', b'great']\n",
-      "2021-09-08 10:54:25,771 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:25,773 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:20:48,039 [b'awful', b'bad', b'neutral', b'good', b'great']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:20:54,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,515 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2250,138 +2267,125 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:25,774 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:25,774 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:54:25,774 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:25,774 Parameters:\n",
-      "2021-09-08 10:54:25,775  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:54:25,775  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:54:25,775  - patience: \"3\"\n",
-      "2021-09-08 10:54:25,776  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:54:25,776  - max_epochs: \"10\"\n",
-      "2021-09-08 10:54:25,776  - shuffle: \"True\"\n",
-      "2021-09-08 10:54:25,776  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:54:25,777  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:54:25,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:25,777 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:54:25,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:25,778 Device: cuda:0\n",
-      "2021-09-08 10:54:25,778 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:25,778 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:54:26,014 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:26,069 epoch 1 - iter 1/5 - loss 0.61306483 - samples/sec: 24.33 - lr: 0.020000\n",
-      "2021-09-08 10:54:26,116 epoch 1 - iter 2/5 - loss 0.71130624 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 10:54:26,197 epoch 1 - iter 3/5 - loss 0.66850772 - samples/sec: 12.44 - lr: 0.020000\n",
-      "2021-09-08 10:54:26,306 epoch 1 - iter 4/5 - loss 0.75142999 - samples/sec: 9.18 - lr: 0.020000\n",
-      "2021-09-08 10:54:26,353 epoch 1 - iter 5/5 - loss 0.70736454 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 10:54:26,354 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:26,354 EPOCH 1 done: loss 0.7074 - lr 0.0200000\n",
-      "2021-09-08 10:54:26,355 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 10:54:32,053 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:32,227 epoch 2 - iter 1/5 - loss 0.72936624 - samples/sec: 6.32 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,278 epoch 2 - iter 2/5 - loss 1.13213256 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,324 epoch 2 - iter 3/5 - loss 0.79129885 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,367 epoch 2 - iter 4/5 - loss 0.60429456 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,439 epoch 2 - iter 5/5 - loss 0.57027212 - samples/sec: 14.10 - lr: 0.020000\n",
-      "2021-09-08 10:54:32,440 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:32,440 EPOCH 2 done: loss 0.5703 - lr 0.0200000\n",
-      "2021-09-08 10:54:32,440 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:34,182 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:34,278 epoch 3 - iter 1/5 - loss 0.35839811 - samples/sec: 12.07 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,329 epoch 3 - iter 2/5 - loss 0.97216965 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,381 epoch 3 - iter 3/5 - loss 0.92507322 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,479 epoch 3 - iter 4/5 - loss 0.81340944 - samples/sec: 10.30 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,527 epoch 3 - iter 5/5 - loss 0.94742300 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 10:54:34,528 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:34,529 EPOCH 3 done: loss 0.9474 - lr 0.0200000\n",
-      "2021-09-08 10:54:34,529 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:20:54,515 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,516 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:20:54,516 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,516 Parameters:\n",
+      "2021-09-21 20:20:54,516  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:20:54,517  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:20:54,517  - patience: \"3\"\n",
+      "2021-09-21 20:20:54,517  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:20:54,518  - max_epochs: \"10\"\n",
+      "2021-09-21 20:20:54,518  - shuffle: \"True\"\n",
+      "2021-09-21 20:20:54,518  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:20:54,518  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:20:54,519 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,519 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:20:54,519 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,519 Device: cuda:0\n",
+      "2021-09-21 20:20:54,520 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,520 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:20:54,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,602 epoch 1 - iter 1/5 - loss 0.69685441 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,657 epoch 1 - iter 2/5 - loss 0.76062274 - samples/sec: 18.33 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,715 epoch 1 - iter 3/5 - loss 0.96046642 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,776 epoch 1 - iter 4/5 - loss 0.89589712 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,830 epoch 1 - iter 5/5 - loss 0.84655803 - samples/sec: 18.91 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,831 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,832 EPOCH 1 done: loss 0.8466 - lr 0.0200000\n",
+      "2021-09-21 20:20:54,832 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:54:41,881 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:41,942 epoch 4 - iter 1/5 - loss 0.51002043 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 10:54:42,022 epoch 4 - iter 2/5 - loss 0.32654417 - samples/sec: 12.57 - lr: 0.020000\n",
-      "2021-09-08 10:54:42,131 epoch 4 - iter 3/5 - loss 0.29139252 - samples/sec: 9.21 - lr: 0.020000\n",
-      "2021-09-08 10:54:42,182 epoch 4 - iter 4/5 - loss 0.35825539 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 10:54:42,229 epoch 4 - iter 5/5 - loss 0.37388085 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 10:54:42,230 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:42,230 EPOCH 4 done: loss 0.3739 - lr 0.0200000\n",
-      "2021-09-08 10:54:42,230 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:44,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,530 epoch 5 - iter 1/5 - loss 0.12922262 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,638 epoch 5 - iter 2/5 - loss 0.12040314 - samples/sec: 9.35 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,762 epoch 5 - iter 3/5 - loss 0.09999429 - samples/sec: 8.09 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,812 epoch 5 - iter 4/5 - loss 0.25282208 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,873 epoch 5 - iter 5/5 - loss 0.33529909 - samples/sec: 16.73 - lr: 0.020000\n",
-      "2021-09-08 10:54:44,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,874 EPOCH 5 done: loss 0.3353 - lr 0.0200000\n",
-      "2021-09-08 10:54:44,874 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:44,879 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:44,982 epoch 6 - iter 1/5 - loss 0.05210604 - samples/sec: 11.70 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,038 epoch 6 - iter 2/5 - loss 0.22523245 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,164 epoch 6 - iter 3/5 - loss 0.44678082 - samples/sec: 8.01 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,214 epoch 6 - iter 4/5 - loss 0.47158730 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,258 epoch 6 - iter 5/5 - loss 0.38139565 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,259 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:45,259 EPOCH 6 done: loss 0.3814 - lr 0.0200000\n",
-      "2021-09-08 10:54:45,260 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:54:45,271 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:45,337 epoch 7 - iter 1/5 - loss 0.41117015 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,464 epoch 7 - iter 2/5 - loss 0.28872972 - samples/sec: 7.92 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,551 epoch 7 - iter 3/5 - loss 0.21652325 - samples/sec: 11.43 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,608 epoch 7 - iter 4/5 - loss 0.30982809 - samples/sec: 17.98 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,652 epoch 7 - iter 5/5 - loss 0.24933403 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 10:54:45,654 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:45,654 EPOCH 7 done: loss 0.2493 - lr 0.0200000\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:54:45,654 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:54:45,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:45,871 epoch 8 - iter 1/5 - loss 0.02769687 - samples/sec: 8.19 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,930 epoch 8 - iter 2/5 - loss 0.06364089 - samples/sec: 17.24 - lr: 0.010000\n",
-      "2021-09-08 10:54:45,981 epoch 8 - iter 3/5 - loss 0.14157928 - samples/sec: 19.56 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,071 epoch 8 - iter 4/5 - loss 0.11126794 - samples/sec: 11.23 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,123 epoch 8 - iter 5/5 - loss 0.18355893 - samples/sec: 19.55 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,124 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:46,124 EPOCH 8 done: loss 0.1836 - lr 0.0100000\n",
-      "2021-09-08 10:54:46,124 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:54:46,219 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:46,281 epoch 9 - iter 1/5 - loss 0.02520454 - samples/sec: 20.93 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,333 epoch 9 - iter 2/5 - loss 0.18756657 - samples/sec: 19.62 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,422 epoch 9 - iter 3/5 - loss 0.13417629 - samples/sec: 11.23 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,482 epoch 9 - iter 4/5 - loss 0.11872119 - samples/sec: 16.98 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,611 epoch 9 - iter 5/5 - loss 0.12178599 - samples/sec: 7.76 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,612 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:46,612 EPOCH 9 done: loss 0.1218 - lr 0.0100000\n",
-      "2021-09-08 10:54:46,613 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:54:46,690 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:46,797 epoch 10 - iter 1/5 - loss 0.02574043 - samples/sec: 11.16 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,849 epoch 10 - iter 2/5 - loss 0.13189000 - samples/sec: 19.19 - lr: 0.010000\n",
-      "2021-09-08 10:54:46,894 epoch 10 - iter 3/5 - loss 0.09406913 - samples/sec: 22.70 - lr: 0.010000\n"
+      "2021-09-21 20:21:03,487 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:03,589 epoch 2 - iter 1/5 - loss 0.45827842 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,661 epoch 2 - iter 2/5 - loss 0.60303974 - samples/sec: 14.03 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,751 epoch 2 - iter 3/5 - loss 0.58252690 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,848 epoch 2 - iter 4/5 - loss 0.62085015 - samples/sec: 10.37 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,931 epoch 2 - iter 5/5 - loss 0.61379597 - samples/sec: 12.26 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,932 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:03,932 EPOCH 2 done: loss 0.6138 - lr 0.0200000\n",
+      "2021-09-21 20:21:03,933 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:21:04,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:04,521 epoch 3 - iter 1/5 - loss 0.54122192 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 20:21:04,587 epoch 3 - iter 2/5 - loss 0.61398488 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 20:21:04,647 epoch 3 - iter 3/5 - loss 0.57596343 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 20:21:04,708 epoch 3 - iter 4/5 - loss 0.58591416 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 20:21:04,767 epoch 3 - iter 5/5 - loss 0.56139152 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 20:21:04,769 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:04,770 EPOCH 3 done: loss 0.5614 - lr 0.0200000\n",
+      "2021-09-21 20:21:04,770 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:21:10,975 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:11,055 epoch 4 - iter 1/5 - loss 0.76891136 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,118 epoch 4 - iter 2/5 - loss 0.53577439 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,195 epoch 4 - iter 3/5 - loss 0.52754940 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,247 epoch 4 - iter 4/5 - loss 0.53896213 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,298 epoch 4 - iter 5/5 - loss 0.46517279 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,300 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:11,300 EPOCH 4 done: loss 0.4652 - lr 0.0200000\n",
+      "2021-09-21 20:21:11,300 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:21:11,383 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:11,448 epoch 5 - iter 1/5 - loss 0.16503108 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,502 epoch 5 - iter 2/5 - loss 0.31967300 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,551 epoch 5 - iter 3/5 - loss 0.38244722 - samples/sec: 20.74 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,599 epoch 5 - iter 4/5 - loss 0.48587009 - samples/sec: 21.07 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,658 epoch 5 - iter 5/5 - loss 0.46690434 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 20:21:11,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:11,659 EPOCH 5 done: loss 0.4669 - lr 0.0200000\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:21:11,660 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:21:11,759 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:11,816 epoch 6 - iter 1/5 - loss 0.04584508 - samples/sec: 22.46 - lr: 0.010000\n",
+      "2021-09-21 20:21:11,869 epoch 6 - iter 2/5 - loss 0.38956978 - samples/sec: 19.10 - lr: 0.010000\n",
+      "2021-09-21 20:21:11,926 epoch 6 - iter 3/5 - loss 0.39004408 - samples/sec: 17.73 - lr: 0.010000\n",
+      "2021-09-21 20:21:11,973 epoch 6 - iter 4/5 - loss 0.46828890 - samples/sec: 21.45 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,020 epoch 6 - iter 5/5 - loss 0.48056978 - samples/sec: 21.75 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:12,021 EPOCH 6 done: loss 0.4806 - lr 0.0100000\n",
+      "2021-09-21 20:21:12,022 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:21:12,124 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:12,186 epoch 7 - iter 1/5 - loss 0.44619784 - samples/sec: 20.22 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,233 epoch 7 - iter 2/5 - loss 0.59416927 - samples/sec: 21.30 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,280 epoch 7 - iter 3/5 - loss 0.42956774 - samples/sec: 21.60 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,324 epoch 7 - iter 4/5 - loss 0.33580769 - samples/sec: 23.02 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,383 epoch 7 - iter 5/5 - loss 0.30385874 - samples/sec: 17.08 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,384 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:12,385 EPOCH 7 done: loss 0.3039 - lr 0.0100000\n",
+      "2021-09-21 20:21:12,385 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:21:12,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:12,529 epoch 8 - iter 1/5 - loss 0.03717216 - samples/sec: 22.89 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,572 epoch 8 - iter 2/5 - loss 0.03306312 - samples/sec: 23.04 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,623 epoch 8 - iter 3/5 - loss 0.16880955 - samples/sec: 20.13 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,670 epoch 8 - iter 4/5 - loss 0.23772257 - samples/sec: 21.52 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,727 epoch 8 - iter 5/5 - loss 0.23031679 - samples/sec: 17.75 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,728 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:12,728 EPOCH 8 done: loss 0.2303 - lr 0.0100000\n",
+      "2021-09-21 20:21:12,728 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:21:12,849 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:54:47,019 epoch 10 - iter 4/5 - loss 0.09050147 - samples/sec: 8.02 - lr: 0.010000\n",
-      "2021-09-08 10:54:47,077 epoch 10 - iter 5/5 - loss 0.10358969 - samples/sec: 17.22 - lr: 0.010000\n",
-      "2021-09-08 10:54:47,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:54:47,079 EPOCH 10 done: loss 0.1036 - lr 0.0100000\n",
-      "2021-09-08 10:54:47,079 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:54:58,133 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.3575609756097561\n"
+      "2021-09-21 20:21:12,909 epoch 9 - iter 1/5 - loss 0.13525091 - samples/sec: 21.40 - lr: 0.010000\n",
+      "2021-09-21 20:21:12,953 epoch 9 - iter 2/5 - loss 0.08137684 - samples/sec: 22.95 - lr: 0.010000\n",
+      "2021-09-21 20:21:13,004 epoch 9 - iter 3/5 - loss 0.08584386 - samples/sec: 19.90 - lr: 0.010000\n",
+      "2021-09-21 20:21:13,047 epoch 9 - iter 4/5 - loss 0.06612217 - samples/sec: 23.19 - lr: 0.010000\n",
+      "2021-09-21 20:21:13,101 epoch 9 - iter 5/5 - loss 0.05831282 - samples/sec: 18.71 - lr: 0.010000\n",
+      "2021-09-21 20:21:13,102 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:13,103 EPOCH 9 done: loss 0.0583 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:21:13,103 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:21:13,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:13,305 epoch 10 - iter 1/5 - loss 0.00540346 - samples/sec: 23.19 - lr: 0.005000\n",
+      "2021-09-21 20:21:13,349 epoch 10 - iter 2/5 - loss 0.01213585 - samples/sec: 23.05 - lr: 0.005000\n",
+      "2021-09-21 20:21:13,393 epoch 10 - iter 3/5 - loss 0.02179905 - samples/sec: 22.97 - lr: 0.005000\n",
+      "2021-09-21 20:21:13,450 epoch 10 - iter 4/5 - loss 0.31528628 - samples/sec: 17.76 - lr: 0.005000\n",
+      "2021-09-21 20:21:13,497 epoch 10 - iter 5/5 - loss 0.25911985 - samples/sec: 21.64 - lr: 0.005000\n",
+      "2021-09-21 20:21:13,498 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:13,498 EPOCH 10 done: loss 0.2591 - lr 0.0050000\n",
+      "2021-09-21 20:21:13,499 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:21:21,766 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.35073170731707315\n"
      ]
     }
    ],
@@ -2448,11 +2452,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "0c4025f0",
+   "execution_count": 5,
+   "id": "922bbf5c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.34227642276422765, 0.34796747967479674, 0.2943089430894309, 0.37886178861788616, 0.3902439024390244]\n",
+      "0.033511717806646434\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -2464,7 +2480,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2472,25 +2488,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:29,710 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:22:03,664 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:55:34,179 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:22:08,125 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 10438.79it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 11729.04it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:34,182 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 10:55:34,349 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,351 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:22:08,127 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
+      "2021-09-21 20:22:08,136 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,138 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2803,24 +2819,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:34,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,352 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:55:34,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,353 Parameters:\n",
-      "2021-09-08 10:55:34,353  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:55:34,353  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:55:34,354  - patience: \"3\"\n",
-      "2021-09-08 10:55:34,354  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:55:34,354  - max_epochs: \"10\"\n",
-      "2021-09-08 10:55:34,355  - shuffle: \"True\"\n",
-      "2021-09-08 10:55:34,355  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:55:34,355  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:55:34,356 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,356 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:55:34,356 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,357 Device: cuda:0\n",
-      "2021-09-08 10:55:34,357 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,357 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:22:08,138 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,139 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:22:08,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,139 Parameters:\n",
+      "2021-09-21 20:22:08,140  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:22:08,140  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:22:08,140  - patience: \"3\"\n",
+      "2021-09-21 20:22:08,140  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:22:08,141  - max_epochs: \"10\"\n",
+      "2021-09-21 20:22:08,141  - shuffle: \"True\"\n",
+      "2021-09-21 20:22:08,141  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:22:08,141  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:22:08,142 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,142 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:22:08,142 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,143 Device: cuda:0\n",
+      "2021-09-21 20:22:08,143 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,143 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:22:08,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,214 epoch 1 - iter 1/5 - loss 1.64375257 - samples/sec: 21.13 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,294 epoch 1 - iter 2/5 - loss 1.11507121 - samples/sec: 12.68 - lr: 0.020000\n"
      ]
     },
     {
@@ -2834,138 +2853,122 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:34,533 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,605 epoch 1 - iter 1/5 - loss 0.67214364 - samples/sec: 17.99 - lr: 0.020000\n",
-      "2021-09-08 10:55:34,660 epoch 1 - iter 2/5 - loss 1.03889874 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 10:55:34,735 epoch 1 - iter 3/5 - loss 1.00012900 - samples/sec: 13.52 - lr: 0.020000\n",
-      "2021-09-08 10:55:34,803 epoch 1 - iter 4/5 - loss 0.89248812 - samples/sec: 14.71 - lr: 0.020000\n",
-      "2021-09-08 10:55:34,860 epoch 1 - iter 5/5 - loss 0.99558115 - samples/sec: 17.79 - lr: 0.020000\n",
-      "2021-09-08 10:55:34,862 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:34,862 EPOCH 1 done: loss 0.9956 - lr 0.0200000\n",
-      "2021-09-08 10:55:34,863 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:22:08,359 epoch 1 - iter 3/5 - loss 0.92708480 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,428 epoch 1 - iter 4/5 - loss 0.79171852 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,488 epoch 1 - iter 5/5 - loss 0.84849818 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,489 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,489 EPOCH 1 done: loss 0.8485 - lr 0.0200000\n",
+      "2021-09-21 20:22:08,490 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:55:41,971 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,053 epoch 2 - iter 1/5 - loss 0.88782936 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,101 epoch 2 - iter 2/5 - loss 0.64618497 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,163 epoch 2 - iter 3/5 - loss 0.67229463 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,210 epoch 2 - iter 4/5 - loss 0.67508968 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,263 epoch 2 - iter 5/5 - loss 0.64980511 - samples/sec: 19.15 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,264 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,264 EPOCH 2 done: loss 0.6498 - lr 0.0200000\n",
-      "2021-09-08 10:55:42,265 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:42,267 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,332 epoch 3 - iter 1/5 - loss 0.60070914 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,380 epoch 3 - iter 2/5 - loss 0.55400538 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,436 epoch 3 - iter 3/5 - loss 0.75635167 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,494 epoch 3 - iter 4/5 - loss 0.67681717 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,542 epoch 3 - iter 5/5 - loss 0.70642223 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,543 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,543 EPOCH 3 done: loss 0.7064 - lr 0.0200000\n",
-      "2021-09-08 10:55:42,543 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:55:42,547 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,611 epoch 4 - iter 1/5 - loss 0.92973304 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,659 epoch 4 - iter 2/5 - loss 0.73880813 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,706 epoch 4 - iter 3/5 - loss 0.64207094 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,762 epoch 4 - iter 4/5 - loss 0.63603223 - samples/sec: 17.96 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,819 epoch 4 - iter 5/5 - loss 0.62458498 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,820 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,821 EPOCH 4 done: loss 0.6246 - lr 0.0200000\n",
-      "2021-09-08 10:55:42,821 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:55:42,823 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:42,892 epoch 5 - iter 1/5 - loss 0.85116982 - samples/sec: 18.11 - lr: 0.020000\n",
-      "2021-09-08 10:55:42,954 epoch 5 - iter 2/5 - loss 0.60684584 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,007 epoch 5 - iter 3/5 - loss 0.60107029 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,056 epoch 5 - iter 4/5 - loss 0.49494147 - samples/sec: 20.61 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,116 epoch 5 - iter 5/5 - loss 0.52348540 - samples/sec: 16.89 - lr: 0.020000\n",
-      "2021-09-08 10:55:43,117 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,117 EPOCH 5 done: loss 0.5235 - lr 0.0200000\n",
+      "2021-09-21 20:22:17,446 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:17,513 epoch 2 - iter 1/5 - loss 0.78303051 - samples/sec: 19.33 - lr: 0.020000\n",
+      "2021-09-21 20:22:17,568 epoch 2 - iter 2/5 - loss 0.69056055 - samples/sec: 18.32 - lr: 0.020000\n",
+      "2021-09-21 20:22:17,620 epoch 2 - iter 3/5 - loss 0.55209970 - samples/sec: 19.46 - lr: 0.020000\n",
+      "2021-09-21 20:22:17,669 epoch 2 - iter 4/5 - loss 0.57000844 - samples/sec: 20.40 - lr: 0.020000\n",
+      "2021-09-21 20:22:17,730 epoch 2 - iter 5/5 - loss 0.57923825 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 20:22:17,731 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:17,731 EPOCH 2 done: loss 0.5792 - lr 0.0200000\n",
+      "2021-09-21 20:22:17,731 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:19,178 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:19,248 epoch 3 - iter 1/5 - loss 0.25956878 - samples/sec: 18.05 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,350 epoch 3 - iter 2/5 - loss 0.46606715 - samples/sec: 9.85 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,475 epoch 3 - iter 3/5 - loss 0.57961391 - samples/sec: 8.00 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,572 epoch 3 - iter 4/5 - loss 0.67440199 - samples/sec: 10.40 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,658 epoch 3 - iter 5/5 - loss 0.67668875 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:19,659 EPOCH 3 done: loss 0.6767 - lr 0.0200000\n",
+      "2021-09-21 20:22:19,660 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:22:19,662 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:19,798 epoch 4 - iter 1/5 - loss 0.56577879 - samples/sec: 11.20 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,928 epoch 4 - iter 2/5 - loss 0.75876832 - samples/sec: 7.72 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,029 epoch 4 - iter 3/5 - loss 0.67469951 - samples/sec: 9.87 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,144 epoch 4 - iter 4/5 - loss 0.59777539 - samples/sec: 8.75 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,219 epoch 4 - iter 5/5 - loss 0.53285213 - samples/sec: 13.55 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,220 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,220 EPOCH 4 done: loss 0.5329 - lr 0.0200000\n",
+      "2021-09-21 20:22:20,220 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:22:20,222 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,318 epoch 5 - iter 1/5 - loss 1.29641032 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,429 epoch 5 - iter 2/5 - loss 1.01465210 - samples/sec: 9.08 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,502 epoch 5 - iter 3/5 - loss 0.75887108 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,590 epoch 5 - iter 4/5 - loss 0.70558650 - samples/sec: 11.46 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,669 epoch 5 - iter 5/5 - loss 0.68750527 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,670 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,671 EPOCH 5 done: loss 0.6875 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:55:43,118 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:55:43,120 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,181 epoch 6 - iter 1/5 - loss 0.38881993 - samples/sec: 21.18 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,242 epoch 6 - iter 2/5 - loss 0.47808242 - samples/sec: 16.44 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,298 epoch 6 - iter 3/5 - loss 0.34741159 - samples/sec: 18.01 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,352 epoch 6 - iter 4/5 - loss 0.40447946 - samples/sec: 18.61 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,398 epoch 6 - iter 5/5 - loss 0.32751867 - samples/sec: 22.29 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,399 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,399 EPOCH 6 done: loss 0.3275 - lr 0.0100000\n",
-      "2021-09-08 10:55:43,399 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:43,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,471 epoch 7 - iter 1/5 - loss 0.38621774 - samples/sec: 17.88 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,553 epoch 7 - iter 2/5 - loss 0.46496178 - samples/sec: 12.38 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,597 epoch 7 - iter 3/5 - loss 0.32320931 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,658 epoch 7 - iter 4/5 - loss 0.36440811 - samples/sec: 16.49 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,716 epoch 7 - iter 5/5 - loss 0.52089637 - samples/sec: 17.36 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,718 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,718 EPOCH 7 done: loss 0.5209 - lr 0.0100000\n",
-      "2021-09-08 10:55:43,718 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:55:43,720 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:43,787 epoch 8 - iter 1/5 - loss 0.42939970 - samples/sec: 19.24 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,835 epoch 8 - iter 2/5 - loss 0.44102481 - samples/sec: 20.90 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,892 epoch 8 - iter 3/5 - loss 0.42312513 - samples/sec: 17.77 - lr: 0.010000\n",
-      "2021-09-08 10:55:43,953 epoch 8 - iter 4/5 - loss 0.41448698 - samples/sec: 16.62 - lr: 0.010000\n",
-      "2021-09-08 10:55:44,011 epoch 8 - iter 5/5 - loss 0.33341184 - samples/sec: 17.25 - lr: 0.010000\n",
-      "2021-09-08 10:55:44,013 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:44,013 EPOCH 8 done: loss 0.3334 - lr 0.0100000\n",
-      "2021-09-08 10:55:44,013 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:55:44,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:44,077 epoch 9 - iter 1/5 - loss 0.00336547 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 10:55:44,131 epoch 9 - iter 2/5 - loss 0.33982826 - samples/sec: 18.68 - lr: 0.010000\n",
-      "2021-09-08 10:55:44,191 epoch 9 - iter 3/5 - loss 0.34874830 - samples/sec: 17.04 - lr: 0.010000\n",
-      "2021-09-08 10:55:44,240 epoch 9 - iter 4/5 - loss 0.49932318 - samples/sec: 20.70 - lr: 0.010000\n",
-      "2021-09-08 10:55:44,296 epoch 9 - iter 5/5 - loss 0.46117319 - samples/sec: 18.01 - lr: 0.010000\n",
-      "2021-09-08 10:55:44,297 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:44,297 EPOCH 9 done: loss 0.4612 - lr 0.0100000\n",
+      "2021-09-21 20:22:20,671 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:22:20,673 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,822 epoch 6 - iter 1/5 - loss 0.23105110 - samples/sec: 11.52 - lr: 0.010000\n",
+      "2021-09-21 20:22:20,946 epoch 6 - iter 2/5 - loss 0.27277615 - samples/sec: 8.12 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,043 epoch 6 - iter 3/5 - loss 0.22294244 - samples/sec: 10.37 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,135 epoch 6 - iter 4/5 - loss 0.32262014 - samples/sec: 10.84 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,222 epoch 6 - iter 5/5 - loss 0.36193914 - samples/sec: 11.61 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,223 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:21,223 EPOCH 6 done: loss 0.3619 - lr 0.0100000\n",
+      "2021-09-21 20:22:21,224 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:21,226 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:21,358 epoch 7 - iter 1/5 - loss 0.06180995 - samples/sec: 12.72 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,447 epoch 7 - iter 2/5 - loss 0.14574107 - samples/sec: 11.22 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,530 epoch 7 - iter 3/5 - loss 0.44301619 - samples/sec: 12.14 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,624 epoch 7 - iter 4/5 - loss 0.46605185 - samples/sec: 10.74 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,674 epoch 7 - iter 5/5 - loss 0.45166914 - samples/sec: 20.01 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,676 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:21,676 EPOCH 7 done: loss 0.4517 - lr 0.0100000\n",
+      "2021-09-21 20:22:21,676 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:22:21,680 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:21,791 epoch 8 - iter 1/5 - loss 0.17323290 - samples/sec: 10.56 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,851 epoch 8 - iter 2/5 - loss 0.34359439 - samples/sec: 16.95 - lr: 0.010000\n",
+      "2021-09-21 20:22:21,943 epoch 8 - iter 3/5 - loss 0.34422442 - samples/sec: 10.95 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,012 epoch 8 - iter 4/5 - loss 0.38685601 - samples/sec: 14.57 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,092 epoch 8 - iter 5/5 - loss 0.32188670 - samples/sec: 12.70 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,093 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:22,093 EPOCH 8 done: loss 0.3219 - lr 0.0100000\n",
+      "2021-09-21 20:22:22,094 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:22:22,097 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:22,197 epoch 9 - iter 1/5 - loss 0.28266284 - samples/sec: 13.67 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,280 epoch 9 - iter 2/5 - loss 0.37476163 - samples/sec: 12.05 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,334 epoch 9 - iter 3/5 - loss 0.25718738 - samples/sec: 18.76 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,412 epoch 9 - iter 4/5 - loss 0.25010146 - samples/sec: 12.83 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,480 epoch 9 - iter 5/5 - loss 0.21267884 - samples/sec: 14.95 - lr: 0.010000\n",
+      "2021-09-21 20:22:22,481 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:22,481 EPOCH 9 done: loss 0.2127 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:55:44,298 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:55:44,368 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:44,437 epoch 10 - iter 1/5 - loss 0.01975068 - samples/sec: 18.07 - lr: 0.005000\n",
-      "2021-09-08 10:55:44,489 epoch 10 - iter 2/5 - loss 0.64335631 - samples/sec: 19.65 - lr: 0.005000\n",
-      "2021-09-08 10:55:44,536 epoch 10 - iter 3/5 - loss 0.43018403 - samples/sec: 21.41 - lr: 0.005000\n"
+      "2021-09-21 20:22:22,481 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:22:22,582 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:22,652 epoch 10 - iter 1/5 - loss 0.54693794 - samples/sec: 17.90 - lr: 0.005000\n",
+      "2021-09-21 20:22:22,699 epoch 10 - iter 2/5 - loss 0.62090990 - samples/sec: 21.28 - lr: 0.005000\n",
+      "2021-09-21 20:22:22,752 epoch 10 - iter 3/5 - loss 0.44264690 - samples/sec: 19.11 - lr: 0.005000\n",
+      "2021-09-21 20:22:22,801 epoch 10 - iter 4/5 - loss 0.33863028 - samples/sec: 20.69 - lr: 0.005000\n",
+      "2021-09-21 20:22:22,876 epoch 10 - iter 5/5 - loss 0.30903378 - samples/sec: 13.29 - lr: 0.005000\n",
+      "2021-09-21 20:22:22,877 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:55:44,585 epoch 10 - iter 4/5 - loss 0.55989269 - samples/sec: 20.48 - lr: 0.005000\n",
-      "2021-09-08 10:55:44,641 epoch 10 - iter 5/5 - loss 0.49856021 - samples/sec: 18.15 - lr: 0.005000\n",
-      "2021-09-08 10:55:44,642 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:55:44,642 EPOCH 10 done: loss 0.4986 - lr 0.0050000\n",
-      "2021-09-08 10:55:44,642 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:55:50,928 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:56:31,349 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:22:22,878 EPOCH 10 done: loss 0.3090 - lr 0.0050000\n",
+      "2021-09-21 20:22:22,878 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:28,642 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:23:16,730 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:56:36,409 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 4971.91it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:56:36,412 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n"
+      "2021-09-21 20:23:30,284 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 5/5 [00:00<00:00, 3788.21it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:39,291 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,294 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:23:30,288 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
+      "2021-09-21 20:23:30,462 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:30,463 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3278,143 +3281,169 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:56:39,294 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,295 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:56:39,295 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,296 Parameters:\n",
-      "2021-09-08 10:56:39,296  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:56:39,296  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:56:39,297  - patience: \"3\"\n",
-      "2021-09-08 10:56:39,297  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:56:39,297  - max_epochs: \"10\"\n",
-      "2021-09-08 10:56:39,298  - shuffle: \"True\"\n",
-      "2021-09-08 10:56:39,298  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:56:39,298  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:56:39,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,299 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:56:39,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,300 Device: cuda:0\n",
-      "2021-09-08 10:56:39,300 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:39,300 Embeddings storage mode: cpu\n",
-      "2021-09-08 10:56:40,153 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:40,229 epoch 1 - iter 1/5 - loss 1.41443014 - samples/sec: 16.83 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,317 epoch 1 - iter 2/5 - loss 1.07379779 - samples/sec: 11.58 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,378 epoch 1 - iter 3/5 - loss 0.93179586 - samples/sec: 16.51 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,433 epoch 1 - iter 4/5 - loss 0.84244336 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,485 epoch 1 - iter 5/5 - loss 0.90988568 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 10:56:40,487 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:40,487 EPOCH 1 done: loss 0.9099 - lr 0.0200000\n",
-      "2021-09-08 10:56:40,487 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:23:30,464 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:30,464 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:23:30,465 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:30,465 Parameters:\n",
+      "2021-09-21 20:23:30,465  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:23:30,465  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:23:30,466  - patience: \"3\"\n",
+      "2021-09-21 20:23:30,466  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:23:30,466  - max_epochs: \"10\"\n",
+      "2021-09-21 20:23:30,467  - shuffle: \"True\"\n",
+      "2021-09-21 20:23:30,467  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:23:30,467  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:23:30,467 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:30,468 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:23:30,468 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:30,468 Device: cuda:0\n",
+      "2021-09-21 20:23:30,468 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:30,469 Embeddings storage mode: cpu\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:23:30,684 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:30,743 epoch 1 - iter 1/5 - loss 1.20276117 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 20:23:30,844 epoch 1 - iter 2/5 - loss 0.82606047 - samples/sec: 9.91 - lr: 0.020000\n",
+      "2021-09-21 20:23:30,923 epoch 1 - iter 3/5 - loss 0.84363774 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 20:23:31,047 epoch 1 - iter 4/5 - loss 0.87088536 - samples/sec: 8.14 - lr: 0.020000\n",
+      "2021-09-21 20:23:31,124 epoch 1 - iter 5/5 - loss 0.83238076 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 20:23:31,125 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:31,125 EPOCH 1 done: loss 0.8324 - lr 0.0200000\n",
+      "2021-09-21 20:23:31,125 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:56:57,505 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:57,574 epoch 2 - iter 1/5 - loss 0.82534379 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,635 epoch 2 - iter 2/5 - loss 0.72219527 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,686 epoch 2 - iter 3/5 - loss 0.80558612 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,743 epoch 2 - iter 4/5 - loss 0.77966972 - samples/sec: 17.84 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,795 epoch 2 - iter 5/5 - loss 0.70343040 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 10:56:57,796 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:57,796 EPOCH 2 done: loss 0.7034 - lr 0.0200000\n",
-      "2021-09-08 10:56:57,797 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:58,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,272 epoch 3 - iter 1/5 - loss 0.70769054 - samples/sec: 17.08 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,338 epoch 3 - iter 2/5 - loss 0.53815922 - samples/sec: 15.34 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,420 epoch 3 - iter 3/5 - loss 0.56581676 - samples/sec: 12.32 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,482 epoch 3 - iter 4/5 - loss 0.56724118 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,568 epoch 3 - iter 5/5 - loss 0.60239482 - samples/sec: 11.77 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,569 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,569 EPOCH 3 done: loss 0.6024 - lr 0.0200000\n",
-      "2021-09-08 10:56:58,570 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:56:58,572 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,646 epoch 4 - iter 1/5 - loss 0.69639856 - samples/sec: 16.74 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,715 epoch 4 - iter 2/5 - loss 0.69293967 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,787 epoch 4 - iter 3/5 - loss 0.60280207 - samples/sec: 14.16 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,875 epoch 4 - iter 4/5 - loss 0.57807528 - samples/sec: 11.46 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,948 epoch 4 - iter 5/5 - loss 0.47912900 - samples/sec: 13.76 - lr: 0.020000\n",
-      "2021-09-08 10:56:58,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:58,950 EPOCH 4 done: loss 0.4791 - lr 0.0200000\n",
-      "2021-09-08 10:56:58,950 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:56:58,952 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,030 epoch 5 - iter 1/5 - loss 1.22692525 - samples/sec: 15.93 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,092 epoch 5 - iter 2/5 - loss 1.01406863 - samples/sec: 16.25 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,167 epoch 5 - iter 3/5 - loss 0.85585600 - samples/sec: 13.45 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,235 epoch 5 - iter 4/5 - loss 0.68984903 - samples/sec: 14.88 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,316 epoch 5 - iter 5/5 - loss 0.67766180 - samples/sec: 12.47 - lr: 0.020000\n",
-      "2021-09-08 10:56:59,317 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,317 EPOCH 5 done: loss 0.6777 - lr 0.0200000\n",
+      "2021-09-21 20:23:41,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:41,405 epoch 2 - iter 1/5 - loss 0.84037703 - samples/sec: 9.87 - lr: 0.020000\n",
+      "2021-09-21 20:23:41,542 epoch 2 - iter 2/5 - loss 0.75348389 - samples/sec: 7.33 - lr: 0.020000\n",
+      "2021-09-21 20:23:41,616 epoch 2 - iter 3/5 - loss 0.74880940 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 20:23:41,688 epoch 2 - iter 4/5 - loss 0.75656281 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 20:23:41,738 epoch 2 - iter 5/5 - loss 0.69420692 - samples/sec: 20.20 - lr: 0.020000\n",
+      "2021-09-21 20:23:41,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:41,739 EPOCH 2 done: loss 0.6942 - lr 0.0200000\n",
+      "2021-09-21 20:23:41,740 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:23:43,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:43,600 epoch 3 - iter 1/5 - loss 0.69951111 - samples/sec: 10.05 - lr: 0.020000\n",
+      "2021-09-21 20:23:43,786 epoch 3 - iter 2/5 - loss 0.70181537 - samples/sec: 5.42 - lr: 0.020000\n",
+      "2021-09-21 20:23:43,845 epoch 3 - iter 3/5 - loss 0.85928261 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:23:43,982 epoch 3 - iter 4/5 - loss 0.79986890 - samples/sec: 7.35 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,086 epoch 3 - iter 5/5 - loss 0.80512857 - samples/sec: 9.66 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,087 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:44,087 EPOCH 3 done: loss 0.8051 - lr 0.0200000\n",
+      "2021-09-21 20:23:44,088 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:23:44,090 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:44,233 epoch 4 - iter 1/5 - loss 0.32922050 - samples/sec: 7.74 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,414 epoch 4 - iter 2/5 - loss 0.51173069 - samples/sec: 5.55 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,468 epoch 4 - iter 3/5 - loss 0.36112630 - samples/sec: 18.70 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,542 epoch 4 - iter 4/5 - loss 0.40578273 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,616 epoch 4 - iter 5/5 - loss 0.55918334 - samples/sec: 13.48 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,617 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:44,618 EPOCH 4 done: loss 0.5592 - lr 0.0200000\n",
+      "2021-09-21 20:23:44,618 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:23:44,710 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:44,886 epoch 5 - iter 1/5 - loss 0.75124699 - samples/sec: 6.25 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,989 epoch 5 - iter 2/5 - loss 0.65572110 - samples/sec: 9.70 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,053 epoch 5 - iter 3/5 - loss 0.88917174 - samples/sec: 15.90 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,158 epoch 5 - iter 4/5 - loss 0.81231946 - samples/sec: 9.59 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,289 epoch 5 - iter 5/5 - loss 0.82785252 - samples/sec: 7.67 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,290 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:45,290 EPOCH 5 done: loss 0.8279 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:56:59,317 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:56:59,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,405 epoch 6 - iter 1/5 - loss 0.65938842 - samples/sec: 17.30 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,457 epoch 6 - iter 2/5 - loss 0.57990974 - samples/sec: 19.43 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,513 epoch 6 - iter 3/5 - loss 0.54658191 - samples/sec: 18.34 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,561 epoch 6 - iter 4/5 - loss 0.42764525 - samples/sec: 20.86 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,612 epoch 6 - iter 5/5 - loss 0.44305258 - samples/sec: 19.95 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,613 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,614 EPOCH 6 done: loss 0.4431 - lr 0.0100000\n",
-      "2021-09-08 10:56:59,614 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:56:59,616 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,686 epoch 7 - iter 1/5 - loss 0.40231299 - samples/sec: 17.80 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,738 epoch 7 - iter 2/5 - loss 0.32732056 - samples/sec: 19.60 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,789 epoch 7 - iter 3/5 - loss 0.23046357 - samples/sec: 19.72 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,844 epoch 7 - iter 4/5 - loss 0.28834452 - samples/sec: 18.34 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,893 epoch 7 - iter 5/5 - loss 0.33662036 - samples/sec: 20.68 - lr: 0.010000\n",
-      "2021-09-08 10:56:59,895 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,895 EPOCH 7 done: loss 0.3366 - lr 0.0100000\n",
-      "2021-09-08 10:56:59,895 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:56:59,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:56:59,964 epoch 8 - iter 1/5 - loss 0.62008369 - samples/sec: 18.76 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,011 epoch 8 - iter 2/5 - loss 0.33214389 - samples/sec: 21.68 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,066 epoch 8 - iter 3/5 - loss 0.40146117 - samples/sec: 18.29 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,113 epoch 8 - iter 4/5 - loss 0.42362125 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,165 epoch 8 - iter 5/5 - loss 0.47192166 - samples/sec: 19.57 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,166 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:00,167 EPOCH 8 done: loss 0.4719 - lr 0.0100000\n",
-      "2021-09-08 10:57:00,167 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:57:00,242 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:23:45,290 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:23:45,371 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:45,484 epoch 6 - iter 1/5 - loss 0.36872706 - samples/sec: 10.80 - lr: 0.010000\n",
+      "2021-09-21 20:23:45,539 epoch 6 - iter 2/5 - loss 0.39658114 - samples/sec: 18.31 - lr: 0.010000\n",
+      "2021-09-21 20:23:45,612 epoch 6 - iter 3/5 - loss 0.46053567 - samples/sec: 13.71 - lr: 0.010000\n",
+      "2021-09-21 20:23:45,740 epoch 6 - iter 4/5 - loss 0.48533440 - samples/sec: 7.88 - lr: 0.010000\n",
+      "2021-09-21 20:23:45,876 epoch 6 - iter 5/5 - loss 0.47599066 - samples/sec: 7.35 - lr: 0.010000\n",
+      "2021-09-21 20:23:45,877 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:45,878 EPOCH 6 done: loss 0.4760 - lr 0.0100000\n",
+      "2021-09-21 20:23:45,878 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:23:45,953 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:46,075 epoch 7 - iter 1/5 - loss 0.36858448 - samples/sec: 9.44 - lr: 0.010000\n",
+      "2021-09-21 20:23:46,182 epoch 7 - iter 2/5 - loss 0.34752308 - samples/sec: 9.40 - lr: 0.010000\n",
+      "2021-09-21 20:23:46,253 epoch 7 - iter 3/5 - loss 0.31237682 - samples/sec: 14.25 - lr: 0.010000\n",
+      "2021-09-21 20:23:46,386 epoch 7 - iter 4/5 - loss 0.35539421 - samples/sec: 7.53 - lr: 0.010000\n",
+      "2021-09-21 20:23:46,555 epoch 7 - iter 5/5 - loss 0.38044710 - samples/sec: 5.95 - lr: 0.010000\n",
+      "2021-09-21 20:23:46,556 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:46,556 EPOCH 7 done: loss 0.3804 - lr 0.0100000\n",
+      "2021-09-21 20:23:46,557 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:23:46,668 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:46,753 epoch 8 - iter 1/5 - loss 0.26985222 - samples/sec: 14.06 - lr: 0.010000\n",
+      "2021-09-21 20:23:46,852 epoch 8 - iter 2/5 - loss 0.41568911 - samples/sec: 10.15 - lr: 0.010000\n",
+      "2021-09-21 20:23:47,041 epoch 8 - iter 3/5 - loss 0.52820182 - samples/sec: 5.30 - lr: 0.010000\n",
+      "2021-09-21 20:23:47,108 epoch 8 - iter 4/5 - loss 0.44167092 - samples/sec: 15.08 - lr: 0.010000\n",
+      "2021-09-21 20:23:47,216 epoch 8 - iter 5/5 - loss 0.41273308 - samples/sec: 9.30 - lr: 0.010000\n",
+      "2021-09-21 20:23:47,217 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:47,217 EPOCH 8 done: loss 0.4127 - lr 0.0100000\n",
+      "2021-09-21 20:23:47,218 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:23:48,703 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:48,792 epoch 9 - iter 1/5 - loss 0.11856428 - samples/sec: 13.18 - lr: 0.010000\n",
+      "2021-09-21 20:23:48,960 epoch 9 - iter 2/5 - loss 0.25825579 - samples/sec: 5.99 - lr: 0.010000\n",
+      "2021-09-21 20:23:49,152 epoch 9 - iter 3/5 - loss 0.38983761 - samples/sec: 5.22 - lr: 0.010000\n",
+      "2021-09-21 20:23:49,231 epoch 9 - iter 4/5 - loss 0.37804123 - samples/sec: 12.89 - lr: 0.010000\n",
+      "2021-09-21 20:23:49,372 epoch 9 - iter 5/5 - loss 0.32687976 - samples/sec: 7.08 - lr: 0.010000\n",
+      "2021-09-21 20:23:49,374 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:49,374 EPOCH 9 done: loss 0.3269 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:23:49,374 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:23:49,377 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:49,467 epoch 10 - iter 1/5 - loss 0.10544339 - samples/sec: 13.65 - lr: 0.005000\n",
+      "2021-09-21 20:23:49,610 epoch 10 - iter 2/5 - loss 0.19259138 - samples/sec: 7.04 - lr: 0.005000\n",
+      "2021-09-21 20:23:49,738 epoch 10 - iter 3/5 - loss 0.19516320 - samples/sec: 7.87 - lr: 0.005000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:00,308 epoch 9 - iter 1/5 - loss 0.43946314 - samples/sec: 19.13 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,363 epoch 9 - iter 2/5 - loss 0.44772537 - samples/sec: 18.45 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,410 epoch 9 - iter 3/5 - loss 0.58822555 - samples/sec: 21.31 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,458 epoch 9 - iter 4/5 - loss 0.55885924 - samples/sec: 21.09 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,503 epoch 9 - iter 5/5 - loss 0.45683023 - samples/sec: 22.48 - lr: 0.010000\n",
-      "2021-09-08 10:57:00,504 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:00,504 EPOCH 9 done: loss 0.4568 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:57:00,504 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:57:00,574 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:00,642 epoch 10 - iter 1/5 - loss 0.35697547 - samples/sec: 18.51 - lr: 0.005000\n",
-      "2021-09-08 10:57:00,697 epoch 10 - iter 2/5 - loss 0.41936725 - samples/sec: 18.18 - lr: 0.005000\n",
-      "2021-09-08 10:57:00,744 epoch 10 - iter 3/5 - loss 0.55781702 - samples/sec: 21.39 - lr: 0.005000\n",
-      "2021-09-08 10:57:00,792 epoch 10 - iter 4/5 - loss 0.57523482 - samples/sec: 20.92 - lr: 0.005000\n",
-      "2021-09-08 10:57:00,837 epoch 10 - iter 5/5 - loss 0.46421293 - samples/sec: 22.59 - lr: 0.005000\n",
-      "2021-09-08 10:57:00,838 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:00,839 EPOCH 10 done: loss 0.4642 - lr 0.0050000\n",
-      "2021-09-08 10:57:00,839 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:57:06,213 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:57:50,831 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:23:49,884 epoch 10 - iter 4/5 - loss 0.15663071 - samples/sec: 6.86 - lr: 0.005000\n",
+      "2021-09-21 20:23:50,009 epoch 10 - iter 5/5 - loss 0.16941384 - samples/sec: 8.03 - lr: 0.005000\n",
+      "2021-09-21 20:23:50,010 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:50,010 EPOCH 10 done: loss 0.1694 - lr 0.0050000\n",
+      "2021-09-21 20:23:50,011 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:23:58,867 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:24:49,069 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:57:55,287 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:24:53,568 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 12595.51it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 11631.46it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:55,290 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 10:57:55,463 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,465 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:24:53,570 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:24:55,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,602 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3727,157 +3756,144 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:57:55,466 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,466 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:57:55,466 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,466 Parameters:\n",
-      "2021-09-08 10:57:55,467  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:57:55,467  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:57:55,467  - patience: \"3\"\n",
-      "2021-09-08 10:57:55,468  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:57:55,468  - max_epochs: \"10\"\n",
-      "2021-09-08 10:57:55,468  - shuffle: \"True\"\n",
-      "2021-09-08 10:57:55,468  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:57:55,469  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:57:55,469 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,469 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:57:55,469 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,470 Device: cuda:0\n",
-      "2021-09-08 10:57:55,470 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,470 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 10:57:55,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,758 epoch 1 - iter 1/5 - loss 1.06520116 - samples/sec: 24.39 - lr: 0.020000\n",
-      "2021-09-08 10:57:55,806 epoch 1 - iter 2/5 - loss 0.78776452 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 10:57:55,853 epoch 1 - iter 3/5 - loss 1.04741249 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 10:57:55,923 epoch 1 - iter 4/5 - loss 0.91216880 - samples/sec: 14.36 - lr: 0.020000\n",
-      "2021-09-08 10:57:55,974 epoch 1 - iter 5/5 - loss 1.01528728 - samples/sec: 19.75 - lr: 0.020000\n",
-      "2021-09-08 10:57:55,975 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:57:55,976 EPOCH 1 done: loss 1.0153 - lr 0.0200000\n",
-      "2021-09-08 10:57:55,976 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:24:55,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,603 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:24:55,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,603 Parameters:\n",
+      "2021-09-21 20:24:55,604  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:24:55,604  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:24:55,604  - patience: \"3\"\n",
+      "2021-09-21 20:24:55,605  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:24:55,605  - max_epochs: \"10\"\n",
+      "2021-09-21 20:24:55,605  - shuffle: \"True\"\n",
+      "2021-09-21 20:24:55,605  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:24:55,606  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:24:55,606 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,606 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:24:55,607 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,607 Device: cuda:0\n",
+      "2021-09-21 20:24:55,607 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,607 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:24:55,620 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,697 epoch 1 - iter 1/5 - loss 0.90898538 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 20:24:55,917 epoch 1 - iter 2/5 - loss 0.79371116 - samples/sec: 4.55 - lr: 0.020000\n",
+      "2021-09-21 20:24:55,999 epoch 1 - iter 3/5 - loss 0.78662481 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,075 epoch 1 - iter 4/5 - loss 0.75779465 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,214 epoch 1 - iter 5/5 - loss 0.74677304 - samples/sec: 7.21 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,215 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:56,215 EPOCH 1 done: loss 0.7468 - lr 0.0200000\n",
+      "2021-09-21 20:24:56,216 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:07,078 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,182 epoch 2 - iter 1/5 - loss 1.08645523 - samples/sec: 12.90 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,249 epoch 2 - iter 2/5 - loss 1.39758515 - samples/sec: 15.14 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,313 epoch 2 - iter 3/5 - loss 1.10000660 - samples/sec: 15.78 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,384 epoch 2 - iter 4/5 - loss 1.10992263 - samples/sec: 14.22 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,478 epoch 2 - iter 5/5 - loss 1.05400895 - samples/sec: 10.69 - lr: 0.020000\n",
-      "2021-09-08 10:58:07,479 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:07,479 EPOCH 2 done: loss 1.0540 - lr 0.0200000\n",
-      "2021-09-08 10:58:07,480 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:25:02,595 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:02,660 epoch 2 - iter 1/5 - loss 0.77774650 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 20:25:02,762 epoch 2 - iter 2/5 - loss 0.73166779 - samples/sec: 9.85 - lr: 0.020000\n",
+      "2021-09-21 20:25:02,809 epoch 2 - iter 3/5 - loss 0.81305989 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:25:02,855 epoch 2 - iter 4/5 - loss 0.85673945 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:25:02,911 epoch 2 - iter 5/5 - loss 0.83498809 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 20:25:02,912 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:02,913 EPOCH 2 done: loss 0.8350 - lr 0.0200000\n",
+      "2021-09-21 20:25:02,913 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:58:17,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:17,654 epoch 3 - iter 1/5 - loss 1.20341253 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 10:58:17,707 epoch 3 - iter 2/5 - loss 1.18130475 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 10:58:17,757 epoch 3 - iter 3/5 - loss 0.86436365 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 10:58:17,804 epoch 3 - iter 4/5 - loss 0.83338700 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 10:58:17,873 epoch 3 - iter 5/5 - loss 0.76955161 - samples/sec: 14.71 - lr: 0.020000\n",
-      "2021-09-08 10:58:17,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:17,874 EPOCH 3 done: loss 0.7696 - lr 0.0200000\n",
-      "2021-09-08 10:58:17,874 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:18,167 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:18,231 epoch 4 - iter 1/5 - loss 0.26190573 - samples/sec: 19.90 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,278 epoch 4 - iter 2/5 - loss 1.25962678 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,327 epoch 4 - iter 3/5 - loss 0.99634607 - samples/sec: 20.58 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,395 epoch 4 - iter 4/5 - loss 0.79857245 - samples/sec: 14.83 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,442 epoch 4 - iter 5/5 - loss 0.82641276 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,443 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:18,444 EPOCH 4 done: loss 0.8264 - lr 0.0200000\n",
-      "2021-09-08 10:58:18,444 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:18,620 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:18,680 epoch 5 - iter 1/5 - loss 0.48138642 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,728 epoch 5 - iter 2/5 - loss 0.68754765 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,775 epoch 5 - iter 3/5 - loss 0.67691350 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,845 epoch 5 - iter 4/5 - loss 0.56265070 - samples/sec: 14.38 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,893 epoch 5 - iter 5/5 - loss 0.47488963 - samples/sec: 20.82 - lr: 0.020000\n",
-      "2021-09-08 10:58:18,894 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:18,894 EPOCH 5 done: loss 0.4749 - lr 0.0200000\n",
-      "2021-09-08 10:58:18,895 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:19,276 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:19,342 epoch 6 - iter 1/5 - loss 0.67225152 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 10:58:19,399 epoch 6 - iter 2/5 - loss 0.34892676 - samples/sec: 17.52 - lr: 0.020000\n",
-      "2021-09-08 10:58:19,453 epoch 6 - iter 3/5 - loss 0.47587217 - samples/sec: 18.96 - lr: 0.020000\n",
-      "2021-09-08 10:58:19,507 epoch 6 - iter 4/5 - loss 0.40155019 - samples/sec: 18.46 - lr: 0.020000\n",
-      "2021-09-08 10:58:19,596 epoch 6 - iter 5/5 - loss 0.48075378 - samples/sec: 11.27 - lr: 0.020000\n",
-      "2021-09-08 10:58:19,597 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:19,598 EPOCH 6 done: loss 0.4808 - lr 0.0200000\n",
+      "2021-09-21 20:25:10,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:11,121 epoch 3 - iter 1/5 - loss 0.87577552 - samples/sec: 8.71 - lr: 0.020000\n",
+      "2021-09-21 20:25:11,169 epoch 3 - iter 2/5 - loss 0.73857152 - samples/sec: 21.22 - lr: 0.020000\n",
+      "2021-09-21 20:25:11,226 epoch 3 - iter 3/5 - loss 0.73411576 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 20:25:11,276 epoch 3 - iter 4/5 - loss 0.72419506 - samples/sec: 20.16 - lr: 0.020000\n",
+      "2021-09-21 20:25:11,323 epoch 3 - iter 5/5 - loss 0.72636780 - samples/sec: 21.41 - lr: 0.020000\n",
+      "2021-09-21 20:25:11,324 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:11,324 EPOCH 3 done: loss 0.7264 - lr 0.0200000\n",
+      "2021-09-21 20:25:11,325 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:25:15,113 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:15,181 epoch 4 - iter 1/5 - loss 0.73244286 - samples/sec: 18.79 - lr: 0.020000\n",
+      "2021-09-21 20:25:15,229 epoch 4 - iter 2/5 - loss 0.60879751 - samples/sec: 20.78 - lr: 0.020000\n",
+      "2021-09-21 20:25:15,292 epoch 4 - iter 3/5 - loss 0.65909430 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:25:15,338 epoch 4 - iter 4/5 - loss 0.63000410 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:25:15,446 epoch 4 - iter 5/5 - loss 0.62294340 - samples/sec: 9.33 - lr: 0.020000\n",
+      "2021-09-21 20:25:15,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:15,447 EPOCH 4 done: loss 0.6229 - lr 0.0200000\n",
+      "2021-09-21 20:25:15,447 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:25:17,784 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:17,850 epoch 5 - iter 1/5 - loss 0.50076193 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 20:25:17,938 epoch 5 - iter 2/5 - loss 0.60600528 - samples/sec: 11.48 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,116 epoch 5 - iter 3/5 - loss 0.69802187 - samples/sec: 5.64 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,212 epoch 5 - iter 4/5 - loss 0.69172767 - samples/sec: 10.57 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,275 epoch 5 - iter 5/5 - loss 0.66731064 - samples/sec: 15.99 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,276 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:18,277 EPOCH 5 done: loss 0.6673 - lr 0.0200000\n",
+      "2021-09-21 20:25:18,277 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:25:18,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:18,530 epoch 6 - iter 1/5 - loss 0.74536306 - samples/sec: 5.09 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,607 epoch 6 - iter 2/5 - loss 0.69285649 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,674 epoch 6 - iter 3/5 - loss 0.53525887 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,736 epoch 6 - iter 4/5 - loss 0.54455632 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,792 epoch 6 - iter 5/5 - loss 0.52568921 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,793 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:18,794 EPOCH 6 done: loss 0.5257 - lr 0.0200000\n",
       "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:58:19,598 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:58:21,776 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:21,842 epoch 7 - iter 1/5 - loss 0.83904475 - samples/sec: 19.86 - lr: 0.010000\n",
-      "2021-09-08 10:58:21,895 epoch 7 - iter 2/5 - loss 0.59246346 - samples/sec: 19.13 - lr: 0.010000\n",
-      "2021-09-08 10:58:21,954 epoch 7 - iter 3/5 - loss 0.42521450 - samples/sec: 17.01 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,044 epoch 7 - iter 4/5 - loss 0.32530100 - samples/sec: 11.20 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,100 epoch 7 - iter 5/5 - loss 0.40665183 - samples/sec: 17.95 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,101 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:22,101 EPOCH 7 done: loss 0.4067 - lr 0.0100000\n",
-      "2021-09-08 10:58:22,101 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:58:22,109 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:22,181 epoch 8 - iter 1/5 - loss 0.34211126 - samples/sec: 18.17 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,240 epoch 8 - iter 2/5 - loss 0.17364061 - samples/sec: 16.99 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,292 epoch 8 - iter 3/5 - loss 0.14817417 - samples/sec: 19.42 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,351 epoch 8 - iter 4/5 - loss 0.26622387 - samples/sec: 17.09 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,434 epoch 8 - iter 5/5 - loss 0.31055816 - samples/sec: 12.04 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,435 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:22,436 EPOCH 8 done: loss 0.3106 - lr 0.0100000\n",
-      "2021-09-08 10:58:22,436 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:58:22,438 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:22,549 epoch 9 - iter 1/5 - loss 0.02964082 - samples/sec: 11.00 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,597 epoch 9 - iter 2/5 - loss 0.07116606 - samples/sec: 21.09 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,652 epoch 9 - iter 3/5 - loss 0.11222388 - samples/sec: 18.65 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,710 epoch 9 - iter 4/5 - loss 0.44783340 - samples/sec: 17.34 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,762 epoch 9 - iter 5/5 - loss 0.41486593 - samples/sec: 19.44 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,763 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:22,763 EPOCH 9 done: loss 0.4149 - lr 0.0100000\n",
-      "2021-09-08 10:58:22,763 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:58:22,765 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:22,843 epoch 10 - iter 1/5 - loss 0.55964857 - samples/sec: 16.98 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,902 epoch 10 - iter 2/5 - loss 0.29592175 - samples/sec: 17.05 - lr: 0.010000\n",
-      "2021-09-08 10:58:22,950 epoch 10 - iter 3/5 - loss 0.26470428 - samples/sec: 21.04 - lr: 0.010000\n"
+      "2021-09-21 20:25:18,794 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:25:18,796 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:18,867 epoch 7 - iter 1/5 - loss 1.18541086 - samples/sec: 17.29 - lr: 0.010000\n",
+      "2021-09-21 20:25:18,917 epoch 7 - iter 2/5 - loss 0.94447383 - samples/sec: 20.19 - lr: 0.010000\n",
+      "2021-09-21 20:25:18,965 epoch 7 - iter 3/5 - loss 0.78325726 - samples/sec: 21.22 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,066 epoch 7 - iter 4/5 - loss 0.76327492 - samples/sec: 9.97 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,112 epoch 7 - iter 5/5 - loss 0.71494839 - samples/sec: 21.78 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,113 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:19,114 EPOCH 7 done: loss 0.7149 - lr 0.0100000\n",
+      "2021-09-21 20:25:19,114 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:25:19,133 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:19,245 epoch 8 - iter 1/5 - loss 0.58653444 - samples/sec: 10.13 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,301 epoch 8 - iter 2/5 - loss 0.57038355 - samples/sec: 17.80 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,348 epoch 8 - iter 3/5 - loss 0.46447045 - samples/sec: 21.44 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,399 epoch 8 - iter 4/5 - loss 0.50280106 - samples/sec: 20.11 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,447 epoch 8 - iter 5/5 - loss 0.47684881 - samples/sec: 20.76 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,448 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:19,449 EPOCH 8 done: loss 0.4768 - lr 0.0100000\n",
+      "2021-09-21 20:25:19,449 BAD EPOCHS (no improvement): 2\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:58:23,004 epoch 10 - iter 4/5 - loss 0.41581752 - samples/sec: 18.50 - lr: 0.010000\n",
-      "2021-09-08 10:58:23,092 epoch 10 - iter 5/5 - loss 0.33477306 - samples/sec: 11.43 - lr: 0.010000\n",
-      "2021-09-08 10:58:23,094 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:58:23,094 EPOCH 10 done: loss 0.3348 - lr 0.0100000\n",
+      "2021-09-21 20:25:19,524 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:19,596 epoch 9 - iter 1/5 - loss 0.49917522 - samples/sec: 17.29 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,697 epoch 9 - iter 2/5 - loss 0.58346029 - samples/sec: 9.92 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,746 epoch 9 - iter 3/5 - loss 0.47024028 - samples/sec: 20.62 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,798 epoch 9 - iter 4/5 - loss 0.46266498 - samples/sec: 19.66 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,843 epoch 9 - iter 5/5 - loss 0.37433149 - samples/sec: 22.12 - lr: 0.010000\n",
+      "2021-09-21 20:25:19,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:19,845 EPOCH 9 done: loss 0.3743 - lr 0.0100000\n",
+      "2021-09-21 20:25:19,845 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:25:19,924 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:20,039 epoch 10 - iter 1/5 - loss 0.63561195 - samples/sec: 9.83 - lr: 0.010000\n",
+      "2021-09-21 20:25:20,097 epoch 10 - iter 2/5 - loss 0.67132288 - samples/sec: 17.53 - lr: 0.010000\n",
+      "2021-09-21 20:25:20,142 epoch 10 - iter 3/5 - loss 0.48680762 - samples/sec: 22.23 - lr: 0.010000\n",
+      "2021-09-21 20:25:20,187 epoch 10 - iter 4/5 - loss 0.36978380 - samples/sec: 22.36 - lr: 0.010000\n",
+      "2021-09-21 20:25:20,239 epoch 10 - iter 5/5 - loss 0.43907122 - samples/sec: 19.76 - lr: 0.010000\n",
+      "2021-09-21 20:25:20,240 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:20,240 EPOCH 10 done: loss 0.4391 - lr 0.0100000\n",
       "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:58:23,094 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:58:28,568 Test data not provided setting final score to 0\n",
-      "2021-09-08 10:59:06,388 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:25:20,240 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:25:25,703 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:26:14,484 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 10:59:10,622 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:26:18,917 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 11008.67it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 17246.32it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:10,624 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
-      "2021-09-08 10:59:10,758 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:10,760 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:26:18,919 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n",
+      "2021-09-21 20:26:18,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,931 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4190,24 +4206,28 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:10,761 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:10,761 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 10:59:10,761 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:10,762 Parameters:\n",
-      "2021-09-08 10:59:10,762  - learning_rate: \"0.02\"\n",
-      "2021-09-08 10:59:10,762  - mini_batch_size: \"1\"\n",
-      "2021-09-08 10:59:10,762  - patience: \"3\"\n",
-      "2021-09-08 10:59:10,763  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 10:59:10,763  - max_epochs: \"10\"\n",
-      "2021-09-08 10:59:10,763  - shuffle: \"True\"\n",
-      "2021-09-08 10:59:10,764  - train_with_dev: \"False\"\n",
-      "2021-09-08 10:59:10,764  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 10:59:10,764 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:10,764 Model training base path: \"temp1\"\n",
-      "2021-09-08 10:59:10,765 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:10,765 Device: cuda:0\n",
-      "2021-09-08 10:59:10,765 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:10,766 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:26:18,931 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,931 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:26:18,932 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,932 Parameters:\n",
+      "2021-09-21 20:26:18,932  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:26:18,933  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:26:18,933  - patience: \"3\"\n",
+      "2021-09-21 20:26:18,934  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:26:18,934  - max_epochs: \"10\"\n",
+      "2021-09-21 20:26:18,935  - shuffle: \"True\"\n",
+      "2021-09-21 20:26:18,935  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:26:18,935  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:26:18,935 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,936 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:26:18,936 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,936 Device: cuda:0\n",
+      "2021-09-21 20:26:18,937 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,937 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:26:18,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:19,005 epoch 1 - iter 1/5 - loss 1.01485968 - samples/sec: 21.19 - lr: 0.020000\n",
+      "2021-09-21 20:26:19,056 epoch 1 - iter 2/5 - loss 0.67799301 - samples/sec: 19.68 - lr: 0.020000\n",
+      "2021-09-21 20:26:19,108 epoch 1 - iter 3/5 - loss 0.87812925 - samples/sec: 19.83 - lr: 0.020000\n"
      ]
     },
     {
@@ -4221,124 +4241,120 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:10,948 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:11,016 epoch 1 - iter 1/5 - loss 0.63021511 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 10:59:11,173 epoch 1 - iter 2/5 - loss 0.94430646 - samples/sec: 6.39 - lr: 0.020000\n",
-      "2021-09-08 10:59:11,229 epoch 1 - iter 3/5 - loss 0.94699407 - samples/sec: 18.02 - lr: 0.020000\n",
-      "2021-09-08 10:59:11,284 epoch 1 - iter 4/5 - loss 0.85576709 - samples/sec: 18.26 - lr: 0.020000\n",
-      "2021-09-08 10:59:11,356 epoch 1 - iter 5/5 - loss 0.83798146 - samples/sec: 14.11 - lr: 0.020000\n",
-      "2021-09-08 10:59:11,357 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:11,357 EPOCH 1 done: loss 0.8380 - lr 0.0200000\n",
-      "2021-09-08 10:59:11,358 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:26:19,159 epoch 1 - iter 4/5 - loss 0.73367870 - samples/sec: 19.89 - lr: 0.020000\n",
+      "2021-09-21 20:26:19,217 epoch 1 - iter 5/5 - loss 0.71572212 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 20:26:19,218 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:19,219 EPOCH 1 done: loss 0.7157 - lr 0.0200000\n",
+      "2021-09-21 20:26:19,219 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:59:18,609 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:18,685 epoch 2 - iter 1/5 - loss 0.57808805 - samples/sec: 16.81 - lr: 0.020000\n",
-      "2021-09-08 10:59:18,736 epoch 2 - iter 2/5 - loss 1.21709639 - samples/sec: 19.89 - lr: 0.020000\n",
-      "2021-09-08 10:59:18,895 epoch 2 - iter 3/5 - loss 1.04794500 - samples/sec: 6.28 - lr: 0.020000\n",
-      "2021-09-08 10:59:18,968 epoch 2 - iter 4/5 - loss 1.07490580 - samples/sec: 13.90 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,023 epoch 2 - iter 5/5 - loss 0.98117284 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 10:59:19,024 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:19,024 EPOCH 2 done: loss 0.9812 - lr 0.0200000\n",
-      "2021-09-08 10:59:19,024 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:26:32,090 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:32,153 epoch 2 - iter 1/5 - loss 1.28375793 - samples/sec: 20.82 - lr: 0.020000\n",
+      "2021-09-21 20:26:32,200 epoch 2 - iter 2/5 - loss 1.02681372 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 20:26:32,251 epoch 2 - iter 3/5 - loss 0.88080704 - samples/sec: 19.74 - lr: 0.020000\n",
+      "2021-09-21 20:26:32,314 epoch 2 - iter 4/5 - loss 0.95167720 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 20:26:32,362 epoch 2 - iter 5/5 - loss 0.94102073 - samples/sec: 21.00 - lr: 0.020000\n",
+      "2021-09-21 20:26:32,363 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:32,363 EPOCH 2 done: loss 0.9410 - lr 0.0200000\n",
+      "2021-09-21 20:26:32,363 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 10:59:24,639 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:24,702 epoch 3 - iter 1/5 - loss 0.57814533 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 10:59:24,752 epoch 3 - iter 2/5 - loss 0.64667383 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 10:59:24,886 epoch 3 - iter 3/5 - loss 0.61945848 - samples/sec: 7.48 - lr: 0.020000\n",
-      "2021-09-08 10:59:24,933 epoch 3 - iter 4/5 - loss 0.68236612 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 10:59:24,990 epoch 3 - iter 5/5 - loss 0.66495326 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 10:59:24,991 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:24,992 EPOCH 3 done: loss 0.6650 - lr 0.0200000\n",
-      "2021-09-08 10:59:24,992 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:28,630 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:28,713 epoch 4 - iter 1/5 - loss 0.72480631 - samples/sec: 14.68 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,778 epoch 4 - iter 2/5 - loss 0.69987014 - samples/sec: 15.61 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,941 epoch 4 - iter 3/5 - loss 0.69441005 - samples/sec: 6.17 - lr: 0.020000\n",
-      "2021-09-08 10:59:28,991 epoch 4 - iter 4/5 - loss 0.63244256 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,043 epoch 4 - iter 5/5 - loss 0.73816757 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,044 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,045 EPOCH 4 done: loss 0.7382 - lr 0.0200000\n",
-      "2021-09-08 10:59:29,045 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:29,048 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,185 epoch 5 - iter 1/5 - loss 0.62178600 - samples/sec: 8.19 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,236 epoch 5 - iter 2/5 - loss 0.58477733 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,296 epoch 5 - iter 3/5 - loss 0.63501457 - samples/sec: 17.09 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,347 epoch 5 - iter 4/5 - loss 0.58352234 - samples/sec: 19.91 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,397 epoch 5 - iter 5/5 - loss 0.55104855 - samples/sec: 19.99 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,398 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,399 EPOCH 5 done: loss 0.5510 - lr 0.0200000\n",
-      "2021-09-08 10:59:29,399 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:59:29,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,463 epoch 6 - iter 1/5 - loss 0.71150285 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,511 epoch 6 - iter 2/5 - loss 0.53860433 - samples/sec: 21.04 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,633 epoch 6 - iter 3/5 - loss 0.54471977 - samples/sec: 8.19 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,681 epoch 6 - iter 4/5 - loss 0.47740979 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,738 epoch 6 - iter 5/5 - loss 0.50534725 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 10:59:29,739 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,740 EPOCH 6 done: loss 0.5053 - lr 0.0200000\n",
+      "2021-09-21 20:26:52,844 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:58,518 epoch 3 - iter 1/5 - loss 0.61091667 - samples/sec: 12.73 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,589 epoch 3 - iter 2/5 - loss 0.60943702 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,664 epoch 3 - iter 3/5 - loss 0.55267840 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,734 epoch 3 - iter 4/5 - loss 0.48371154 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,807 epoch 3 - iter 5/5 - loss 0.47248862 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,809 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:58,810 EPOCH 3 done: loss 0.4725 - lr 0.0200000\n",
+      "2021-09-21 20:26:58,810 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:26:58,835 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:58,956 epoch 4 - iter 1/5 - loss 0.95857602 - samples/sec: 12.01 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,025 epoch 4 - iter 2/5 - loss 0.81950787 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,103 epoch 4 - iter 3/5 - loss 0.75344733 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,160 epoch 4 - iter 4/5 - loss 0.72551738 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,207 epoch 4 - iter 5/5 - loss 0.66314722 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:59,209 EPOCH 4 done: loss 0.6631 - lr 0.0200000\n",
+      "2021-09-21 20:26:59,209 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:26:59,297 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:59,363 epoch 5 - iter 1/5 - loss 0.18810292 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,462 epoch 5 - iter 2/5 - loss 0.46677537 - samples/sec: 10.15 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,522 epoch 5 - iter 3/5 - loss 0.32985146 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,587 epoch 5 - iter 4/5 - loss 0.44943969 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,669 epoch 5 - iter 5/5 - loss 0.48144062 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,670 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:59,670 EPOCH 5 done: loss 0.4814 - lr 0.0200000\n",
+      "2021-09-21 20:26:59,671 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:26:59,762 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:59,859 epoch 6 - iter 1/5 - loss 0.60604709 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,921 epoch 6 - iter 2/5 - loss 0.59881818 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,980 epoch 6 - iter 3/5 - loss 0.67496318 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,027 epoch 6 - iter 4/5 - loss 0.59335875 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,075 epoch 6 - iter 5/5 - loss 0.56238236 - samples/sec: 21.25 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,076 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:00,076 EPOCH 6 done: loss 0.5624 - lr 0.0200000\n",
       "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 10:59:29,740 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:59:29,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:29,877 epoch 7 - iter 1/5 - loss 0.52418071 - samples/sec: 17.31 - lr: 0.010000\n",
-      "2021-09-08 10:59:29,925 epoch 7 - iter 2/5 - loss 0.41068156 - samples/sec: 21.06 - lr: 0.010000\n",
-      "2021-09-08 10:59:29,974 epoch 7 - iter 3/5 - loss 0.41720360 - samples/sec: 20.65 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,095 epoch 7 - iter 4/5 - loss 0.42761172 - samples/sec: 8.30 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,149 epoch 7 - iter 5/5 - loss 0.38242557 - samples/sec: 18.73 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,150 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:30,151 EPOCH 7 done: loss 0.3824 - lr 0.0100000\n",
-      "2021-09-08 10:59:30,151 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 10:59:30,234 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:30,375 epoch 8 - iter 1/5 - loss 0.21625160 - samples/sec: 8.11 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,423 epoch 8 - iter 2/5 - loss 0.33269472 - samples/sec: 20.99 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,479 epoch 8 - iter 3/5 - loss 0.49723346 - samples/sec: 17.84 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,524 epoch 8 - iter 4/5 - loss 0.38339468 - samples/sec: 22.42 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,571 epoch 8 - iter 5/5 - loss 0.35295762 - samples/sec: 21.49 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,572 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:30,573 EPOCH 8 done: loss 0.3530 - lr 0.0100000\n",
-      "2021-09-08 10:59:30,573 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 10:59:30,644 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:30,713 epoch 9 - iter 1/5 - loss 0.68490285 - samples/sec: 17.93 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,761 epoch 9 - iter 2/5 - loss 0.40902131 - samples/sec: 21.08 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,881 epoch 9 - iter 3/5 - loss 0.33463716 - samples/sec: 8.36 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,925 epoch 9 - iter 4/5 - loss 0.28299825 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,969 epoch 9 - iter 5/5 - loss 0.22951339 - samples/sec: 23.04 - lr: 0.010000\n",
-      "2021-09-08 10:59:30,970 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:30,970 EPOCH 9 done: loss 0.2295 - lr 0.0100000\n",
-      "2021-09-08 10:59:30,971 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 10:59:31,047 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:31,104 epoch 10 - iter 1/5 - loss 0.04611866 - samples/sec: 22.97 - lr: 0.010000\n",
-      "2021-09-08 10:59:31,148 epoch 10 - iter 2/5 - loss 0.06379385 - samples/sec: 23.03 - lr: 0.010000\n",
-      "2021-09-08 10:59:31,195 epoch 10 - iter 3/5 - loss 0.10659521 - samples/sec: 21.10 - lr: 0.010000\n"
+      "2021-09-21 20:27:00,077 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:27:00,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:00,228 epoch 7 - iter 1/5 - loss 0.44717589 - samples/sec: 20.58 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,290 epoch 7 - iter 2/5 - loss 0.23195049 - samples/sec: 16.30 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,380 epoch 7 - iter 3/5 - loss 0.27174643 - samples/sec: 11.19 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,450 epoch 7 - iter 4/5 - loss 0.37323023 - samples/sec: 14.33 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,524 epoch 7 - iter 5/5 - loss 0.40400917 - samples/sec: 13.63 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,525 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:00,525 EPOCH 7 done: loss 0.4040 - lr 0.0100000\n",
+      "2021-09-21 20:27:00,526 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:27:00,601 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:00,712 epoch 8 - iter 1/5 - loss 0.12798944 - samples/sec: 11.15 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,768 epoch 8 - iter 2/5 - loss 0.15837020 - samples/sec: 18.08 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,827 epoch 8 - iter 3/5 - loss 0.51580831 - samples/sec: 17.07 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,879 epoch 8 - iter 4/5 - loss 0.53239815 - samples/sec: 19.54 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,926 epoch 8 - iter 5/5 - loss 0.45840676 - samples/sec: 21.14 - lr: 0.010000\n",
+      "2021-09-21 20:27:00,927 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:00,928 EPOCH 8 done: loss 0.4584 - lr 0.0100000\n",
+      "2021-09-21 20:27:00,928 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:27:01,027 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:01,086 epoch 9 - iter 1/5 - loss 0.12212277 - samples/sec: 21.83 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,152 epoch 9 - iter 2/5 - loss 0.29482842 - samples/sec: 15.26 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,219 epoch 9 - iter 3/5 - loss 0.45117508 - samples/sec: 15.19 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,320 epoch 9 - iter 4/5 - loss 0.60820793 - samples/sec: 9.92 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,388 epoch 9 - iter 5/5 - loss 0.58431884 - samples/sec: 14.84 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,390 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:01,390 EPOCH 9 done: loss 0.5843 - lr 0.0100000\n",
+      "2021-09-21 20:27:01,390 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:27:01,465 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:01,563 epoch 10 - iter 1/5 - loss 0.49924889 - samples/sec: 14.96 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,634 epoch 10 - iter 2/5 - loss 0.29088499 - samples/sec: 14.17 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,692 epoch 10 - iter 3/5 - loss 0.34808501 - samples/sec: 17.47 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,735 epoch 10 - iter 4/5 - loss 0.27341008 - samples/sec: 23.11 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,794 epoch 10 - iter 5/5 - loss 0.39474458 - samples/sec: 17.16 - lr: 0.010000\n",
+      "2021-09-21 20:27:01,795 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:01,795 EPOCH 10 done: loss 0.3947 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:27:01,796 BAD EPOCHS (no improvement): 4\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 10:59:31,313 epoch 10 - iter 4/5 - loss 0.08958627 - samples/sec: 8.55 - lr: 0.010000\n",
-      "2021-09-08 10:59:31,369 epoch 10 - iter 5/5 - loss 0.16478022 - samples/sec: 17.84 - lr: 0.010000\n",
-      "2021-09-08 10:59:31,370 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 10:59:31,371 EPOCH 10 done: loss 0.1648 - lr 0.0100000\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 10:59:31,371 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 10:59:41,322 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:00:16,869 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:27:07,277 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:27:56,427 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:00:20,825 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:28:01,203 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 16033.27it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 14675.66it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:20,827 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n"
+      "2021-09-21 20:28:01,205 [b'The product has been reviewed as awful', b'The product has been reviewed as bad', b'The product has been reviewed as neutral', b'The product has been reviewed as good', b'The product has been reviewed as great']\n"
      ]
     },
     {
@@ -4352,8 +4368,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:21,466 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,468 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:28:11,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:11,291 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4666,125 +4682,126 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:21,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,469 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:00:21,469 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,469 Parameters:\n",
-      "2021-09-08 11:00:21,469  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:00:21,470  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:00:21,470  - patience: \"3\"\n",
-      "2021-09-08 11:00:21,470  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:00:21,471  - max_epochs: \"10\"\n",
-      "2021-09-08 11:00:21,471  - shuffle: \"True\"\n",
-      "2021-09-08 11:00:21,471  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:00:21,471  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:00:21,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,472 Model training base path: \"temp1\"\n",
-      "2021-09-08 11:00:21,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,473 Device: cuda:0\n",
-      "2021-09-08 11:00:21,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,473 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:00:21,580 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,637 epoch 1 - iter 1/5 - loss 1.50917590 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 11:00:21,701 epoch 1 - iter 2/5 - loss 0.85781446 - samples/sec: 15.69 - lr: 0.020000\n",
-      "2021-09-08 11:00:21,753 epoch 1 - iter 3/5 - loss 1.05971795 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 11:00:21,804 epoch 1 - iter 4/5 - loss 0.87886865 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 11:00:21,855 epoch 1 - iter 5/5 - loss 0.86791858 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 11:00:21,857 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:21,857 EPOCH 1 done: loss 0.8679 - lr 0.0200000\n",
-      "2021-09-08 11:00:21,858 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:28:11,291 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:11,292 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:28:11,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:11,292 Parameters:\n",
+      "2021-09-21 20:28:11,293  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:28:11,293  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:28:11,293  - patience: \"3\"\n",
+      "2021-09-21 20:28:11,293  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:28:11,294  - max_epochs: \"10\"\n",
+      "2021-09-21 20:28:11,294  - shuffle: \"True\"\n",
+      "2021-09-21 20:28:11,294  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:28:11,295  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:28:11,295 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:11,295 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:28:11,295 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:11,296 Device: cuda:0\n",
+      "2021-09-21 20:28:11,296 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:11,296 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:28:12,907 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:12,967 epoch 1 - iter 1/5 - loss 0.88901401 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,044 epoch 1 - iter 2/5 - loss 0.85255274 - samples/sec: 13.15 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,199 epoch 1 - iter 3/5 - loss 0.79281851 - samples/sec: 6.44 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,278 epoch 1 - iter 4/5 - loss 0.72898306 - samples/sec: 12.82 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,356 epoch 1 - iter 5/5 - loss 0.62499335 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:13,357 EPOCH 1 done: loss 0.6250 - lr 0.0200000\n",
+      "2021-09-21 20:28:13,357 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:28:28,748 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:28,809 epoch 2 - iter 1/5 - loss 0.42922816 - samples/sec: 21.38 - lr: 0.020000\n",
+      "2021-09-21 20:28:28,863 epoch 2 - iter 2/5 - loss 0.46753006 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 20:28:28,909 epoch 2 - iter 3/5 - loss 0.66129127 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 20:28:28,990 epoch 2 - iter 4/5 - loss 0.61408805 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 20:28:29,037 epoch 2 - iter 5/5 - loss 0.67737773 - samples/sec: 21.88 - lr: 0.020000\n",
+      "2021-09-21 20:28:29,038 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:29,038 EPOCH 2 done: loss 0.6774 - lr 0.0200000\n",
+      "2021-09-21 20:28:29,038 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:00:39,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:39,468 epoch 2 - iter 1/5 - loss 0.53625709 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 11:00:39,532 epoch 2 - iter 2/5 - loss 0.50302330 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 11:00:39,608 epoch 2 - iter 3/5 - loss 0.64013666 - samples/sec: 13.26 - lr: 0.020000\n",
-      "2021-09-08 11:00:39,683 epoch 2 - iter 4/5 - loss 0.56586661 - samples/sec: 13.35 - lr: 0.020000\n",
-      "2021-09-08 11:00:39,740 epoch 2 - iter 5/5 - loss 0.47674437 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 11:00:39,741 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:39,741 EPOCH 2 done: loss 0.4767 - lr 0.0200000\n",
-      "2021-09-08 11:00:39,742 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:39,745 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:39,817 epoch 3 - iter 1/5 - loss 1.59204495 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 11:00:39,890 epoch 3 - iter 2/5 - loss 1.42562395 - samples/sec: 13.72 - lr: 0.020000\n",
-      "2021-09-08 11:00:39,943 epoch 3 - iter 3/5 - loss 1.12208494 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 11:00:40,007 epoch 3 - iter 4/5 - loss 0.97709320 - samples/sec: 15.58 - lr: 0.020000\n",
-      "2021-09-08 11:00:40,064 epoch 3 - iter 5/5 - loss 0.91106802 - samples/sec: 17.94 - lr: 0.020000\n",
-      "2021-09-08 11:00:40,065 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:40,065 EPOCH 3 done: loss 0.9111 - lr 0.0200000\n",
-      "2021-09-08 11:00:40,065 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:28:36,490 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:36,552 epoch 3 - iter 1/5 - loss 0.53317720 - samples/sec: 21.10 - lr: 0.020000\n",
+      "2021-09-21 20:28:36,606 epoch 3 - iter 2/5 - loss 0.67124572 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 20:28:36,686 epoch 3 - iter 3/5 - loss 0.71153110 - samples/sec: 12.58 - lr: 0.020000\n",
+      "2021-09-21 20:28:36,732 epoch 3 - iter 4/5 - loss 0.66839848 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:28:36,778 epoch 3 - iter 5/5 - loss 0.64707098 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:28:36,779 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:36,780 EPOCH 3 done: loss 0.6471 - lr 0.0200000\n",
+      "2021-09-21 20:28:36,780 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:28:36,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:36,920 epoch 4 - iter 1/5 - loss 0.12572914 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:28:36,970 epoch 4 - iter 2/5 - loss 0.32411665 - samples/sec: 20.15 - lr: 0.020000\n",
+      "2021-09-21 20:28:37,016 epoch 4 - iter 3/5 - loss 0.36743713 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 20:28:37,086 epoch 4 - iter 4/5 - loss 0.41740864 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 20:28:37,134 epoch 4 - iter 5/5 - loss 0.50000495 - samples/sec: 20.76 - lr: 0.020000\n",
+      "2021-09-21 20:28:37,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:37,136 EPOCH 4 done: loss 0.5000 - lr 0.0200000\n",
+      "2021-09-21 20:28:37,136 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:28:41,894 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:41,978 epoch 5 - iter 1/5 - loss 0.97208071 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,101 epoch 5 - iter 2/5 - loss 0.69460042 - samples/sec: 8.20 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,170 epoch 5 - iter 3/5 - loss 0.81380062 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,243 epoch 5 - iter 4/5 - loss 1.02257536 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,299 epoch 5 - iter 5/5 - loss 0.88899472 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,300 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:42,300 EPOCH 5 done: loss 0.8890 - lr 0.0200000\n",
+      "2021-09-21 20:28:42,300 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:00:45,666 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,745 epoch 4 - iter 1/5 - loss 0.53005081 - samples/sec: 16.20 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,820 epoch 4 - iter 2/5 - loss 0.42183518 - samples/sec: 13.59 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,869 epoch 4 - iter 3/5 - loss 0.29216204 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,921 epoch 4 - iter 4/5 - loss 0.28049638 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,978 epoch 4 - iter 5/5 - loss 0.38194671 - samples/sec: 17.69 - lr: 0.020000\n",
-      "2021-09-08 11:00:45,979 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:45,979 EPOCH 4 done: loss 0.3819 - lr 0.0200000\n",
-      "2021-09-08 11:00:45,980 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:00:45,985 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:46,079 epoch 5 - iter 1/5 - loss 0.39367840 - samples/sec: 13.52 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,132 epoch 5 - iter 2/5 - loss 0.54053892 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,190 epoch 5 - iter 3/5 - loss 0.55349424 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,246 epoch 5 - iter 4/5 - loss 0.55639998 - samples/sec: 17.81 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,298 epoch 5 - iter 5/5 - loss 0.50609409 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:46,300 EPOCH 5 done: loss 0.5061 - lr 0.0200000\n",
-      "2021-09-08 11:00:46,300 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:46,310 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:46,401 epoch 6 - iter 1/5 - loss 0.26015523 - samples/sec: 13.33 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,452 epoch 6 - iter 2/5 - loss 0.32557657 - samples/sec: 19.66 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,504 epoch 6 - iter 3/5 - loss 0.39794674 - samples/sec: 19.51 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,558 epoch 6 - iter 4/5 - loss 0.33578297 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,617 epoch 6 - iter 5/5 - loss 0.34507194 - samples/sec: 17.04 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,618 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:46,618 EPOCH 6 done: loss 0.3451 - lr 0.0200000\n",
-      "2021-09-08 11:00:46,619 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:00:46,692 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:46,761 epoch 7 - iter 1/5 - loss 0.21853828 - samples/sec: 18.88 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,811 epoch 7 - iter 2/5 - loss 0.69013524 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,869 epoch 7 - iter 3/5 - loss 0.48767227 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 11:00:46,946 epoch 7 - iter 4/5 - loss 0.41600379 - samples/sec: 13.06 - lr: 0.020000\n",
-      "2021-09-08 11:00:47,002 epoch 7 - iter 5/5 - loss 0.36696165 - samples/sec: 18.03 - lr: 0.020000\n",
-      "2021-09-08 11:00:47,003 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:47,003 EPOCH 7 done: loss 0.3670 - lr 0.0200000\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:00:47,003 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:00:47,073 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:47,149 epoch 8 - iter 1/5 - loss 2.02279472 - samples/sec: 17.26 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,223 epoch 8 - iter 2/5 - loss 1.06207852 - samples/sec: 13.59 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,280 epoch 8 - iter 3/5 - loss 0.74564374 - samples/sec: 17.92 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,332 epoch 8 - iter 4/5 - loss 0.72378417 - samples/sec: 19.37 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,392 epoch 8 - iter 5/5 - loss 0.61813211 - samples/sec: 16.89 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,393 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:47,393 EPOCH 8 done: loss 0.6181 - lr 0.0100000\n",
-      "2021-09-08 11:00:47,393 BAD EPOCHS (no improvement): 1\n"
+      "2021-09-21 20:28:49,919 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:50,011 epoch 6 - iter 1/5 - loss 0.21768303 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 20:28:50,058 epoch 6 - iter 2/5 - loss 0.37760139 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 20:28:50,101 epoch 6 - iter 3/5 - loss 0.26933590 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 20:28:50,147 epoch 6 - iter 4/5 - loss 0.25140626 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 20:28:50,197 epoch 6 - iter 5/5 - loss 0.27580855 - samples/sec: 20.07 - lr: 0.020000\n",
+      "2021-09-21 20:28:50,198 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:50,199 EPOCH 6 done: loss 0.2758 - lr 0.0200000\n",
+      "2021-09-21 20:28:50,199 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:28:51,035 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:51,097 epoch 7 - iter 1/5 - loss 0.01151805 - samples/sec: 21.05 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,213 epoch 7 - iter 2/5 - loss 0.08608731 - samples/sec: 8.70 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,288 epoch 7 - iter 3/5 - loss 0.51260584 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,362 epoch 7 - iter 4/5 - loss 0.40219321 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,439 epoch 7 - iter 5/5 - loss 0.33929801 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,440 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:51,441 EPOCH 7 done: loss 0.3393 - lr 0.0200000\n",
+      "2021-09-21 20:28:51,441 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:28:52,988 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:53,047 epoch 8 - iter 1/5 - loss 0.83098334 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:28:53,093 epoch 8 - iter 2/5 - loss 0.53279903 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 20:28:53,143 epoch 8 - iter 3/5 - loss 0.86102741 - samples/sec: 20.18 - lr: 0.020000\n",
+      "2021-09-21 20:28:53,211 epoch 8 - iter 4/5 - loss 0.65946440 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 20:28:53,257 epoch 8 - iter 5/5 - loss 0.55273330 - samples/sec: 21.94 - lr: 0.020000\n",
+      "2021-09-21 20:28:53,258 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:53,258 EPOCH 8 done: loss 0.5527 - lr 0.0200000\n",
+      "2021-09-21 20:28:53,258 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:28:57,723 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:00:47,474 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:47,547 epoch 9 - iter 1/5 - loss 0.37427500 - samples/sec: 18.19 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,607 epoch 9 - iter 2/5 - loss 0.23869887 - samples/sec: 17.02 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,678 epoch 9 - iter 3/5 - loss 0.17728720 - samples/sec: 14.02 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,732 epoch 9 - iter 4/5 - loss 0.14808558 - samples/sec: 18.64 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,792 epoch 9 - iter 5/5 - loss 0.45721146 - samples/sec: 17.02 - lr: 0.010000\n",
-      "2021-09-08 11:00:47,793 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:47,793 EPOCH 9 done: loss 0.4572 - lr 0.0100000\n",
-      "2021-09-08 11:00:47,793 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:00:47,872 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:47,942 epoch 10 - iter 1/5 - loss 0.04554083 - samples/sec: 19.51 - lr: 0.010000\n",
-      "2021-09-08 11:00:48,016 epoch 10 - iter 2/5 - loss 0.22920061 - samples/sec: 13.51 - lr: 0.010000\n",
-      "2021-09-08 11:00:48,069 epoch 10 - iter 3/5 - loss 0.35842870 - samples/sec: 19.21 - lr: 0.010000\n",
-      "2021-09-08 11:00:48,117 epoch 10 - iter 4/5 - loss 0.27090691 - samples/sec: 20.87 - lr: 0.010000\n",
-      "2021-09-08 11:00:48,179 epoch 10 - iter 5/5 - loss 0.50781256 - samples/sec: 16.23 - lr: 0.010000\n",
-      "2021-09-08 11:00:48,180 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:00:48,181 EPOCH 10 done: loss 0.5078 - lr 0.0100000\n",
-      "2021-09-08 11:00:48,181 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:00:57,478 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.29049271339347676\n"
+      "2021-09-21 20:28:57,834 epoch 9 - iter 1/5 - loss 0.01442855 - samples/sec: 10.21 - lr: 0.020000\n",
+      "2021-09-21 20:28:57,894 epoch 9 - iter 2/5 - loss 0.53244075 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 20:28:57,952 epoch 9 - iter 3/5 - loss 0.52078388 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 20:28:58,007 epoch 9 - iter 4/5 - loss 0.51325826 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 20:28:58,062 epoch 9 - iter 5/5 - loss 0.45757995 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 20:28:58,063 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:58,063 EPOCH 9 done: loss 0.4576 - lr 0.0200000\n",
+      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:28:58,064 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:28:58,066 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:58,143 epoch 10 - iter 1/5 - loss 0.88358808 - samples/sec: 18.61 - lr: 0.010000\n",
+      "2021-09-21 20:28:58,196 epoch 10 - iter 2/5 - loss 0.50237542 - samples/sec: 19.12 - lr: 0.010000\n",
+      "2021-09-21 20:28:58,254 epoch 10 - iter 3/5 - loss 0.47834685 - samples/sec: 17.22 - lr: 0.010000\n",
+      "2021-09-21 20:28:58,320 epoch 10 - iter 4/5 - loss 0.54762826 - samples/sec: 15.37 - lr: 0.010000\n",
+      "2021-09-21 20:28:58,396 epoch 10 - iter 5/5 - loss 0.44147368 - samples/sec: 13.26 - lr: 0.010000\n",
+      "2021-09-21 20:28:58,397 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:58,397 EPOCH 10 done: loss 0.4415 - lr 0.0100000\n",
+      "2021-09-21 20:28:58,398 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:29:04,895 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.3331020124913255\n"
      ]
     }
    ],
@@ -4853,11 +4870,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "86988d74",
+   "execution_count": 7,
+   "id": "6e7286eb",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.3525329632199861, 0.3386537126995142, 0.30811936155447606, 0.3435114503816794, 0.32269257460097156]\n",
+      "0.015806510453860306\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -4869,7 +4898,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -4877,25 +4906,38 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:35,473 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:29:54,858 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:01:39,736 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:29:59,271 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 11729.04it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 12490.48it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:39,738 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 11:01:39,875 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:39,877 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:29:59,273 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:29:59,833 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:59,835 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5208,156 +5250,143 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:39,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:39,878 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:01:39,878 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:39,878 Parameters:\n",
-      "2021-09-08 11:01:39,879  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:01:39,879  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:01:39,879  - patience: \"3\"\n",
-      "2021-09-08 11:01:39,879  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:01:39,880  - max_epochs: \"10\"\n",
-      "2021-09-08 11:01:39,880  - shuffle: \"True\"\n",
-      "2021-09-08 11:01:39,880  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:01:39,881  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:01:39,881 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:39,881 Model training base path: \"temp1\"\n",
-      "2021-09-08 11:01:39,881 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:39,882 Device: cuda:0\n",
-      "2021-09-08 11:01:39,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:39,882 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:01:40,076 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:40,141 epoch 1 - iter 1/5 - loss 0.69598532 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:01:40,206 epoch 1 - iter 2/5 - loss 1.14815468 - samples/sec: 15.41 - lr: 0.020000\n",
-      "2021-09-08 11:01:40,262 epoch 1 - iter 3/5 - loss 0.92342211 - samples/sec: 18.05 - lr: 0.020000\n",
-      "2021-09-08 11:01:40,346 epoch 1 - iter 4/5 - loss 0.82427789 - samples/sec: 12.03 - lr: 0.020000\n",
-      "2021-09-08 11:01:40,460 epoch 1 - iter 5/5 - loss 0.95321425 - samples/sec: 8.74 - lr: 0.020000\n",
-      "2021-09-08 11:01:40,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:40,462 EPOCH 1 done: loss 0.9532 - lr 0.0200000\n",
-      "2021-09-08 11:01:40,462 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:29:59,836 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:59,836 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:29:59,836 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:59,837 Parameters:\n",
+      "2021-09-21 20:29:59,837  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:29:59,837  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:29:59,837  - patience: \"3\"\n",
+      "2021-09-21 20:29:59,838  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:29:59,838  - max_epochs: \"10\"\n",
+      "2021-09-21 20:29:59,838  - shuffle: \"True\"\n",
+      "2021-09-21 20:29:59,839  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:29:59,839  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:29:59,839 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:59,840 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:29:59,840 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:59,840 Device: cuda:0\n",
+      "2021-09-21 20:29:59,841 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:59,841 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:30:03,994 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,050 epoch 1 - iter 1/5 - loss 1.09064651 - samples/sec: 24.36 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,108 epoch 1 - iter 2/5 - loss 1.05241996 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,184 epoch 1 - iter 3/5 - loss 0.89896697 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,327 epoch 1 - iter 4/5 - loss 0.83077262 - samples/sec: 7.06 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,394 epoch 1 - iter 5/5 - loss 0.78119758 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,396 EPOCH 1 done: loss 0.7812 - lr 0.0200000\n",
+      "2021-09-21 20:30:04,396 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:01:45,229 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:45,296 epoch 2 - iter 1/5 - loss 0.55698645 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,343 epoch 2 - iter 2/5 - loss 0.81386393 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,439 epoch 2 - iter 3/5 - loss 0.91083737 - samples/sec: 10.47 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,508 epoch 2 - iter 4/5 - loss 0.81085472 - samples/sec: 14.62 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,561 epoch 2 - iter 5/5 - loss 0.75571327 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,562 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:45,562 EPOCH 2 done: loss 0.7557 - lr 0.0200000\n",
-      "2021-09-08 11:01:45,562 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:45,630 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:45,694 epoch 3 - iter 1/5 - loss 0.69516498 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,742 epoch 3 - iter 2/5 - loss 0.57408465 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,805 epoch 3 - iter 3/5 - loss 0.59112200 - samples/sec: 15.91 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,855 epoch 3 - iter 4/5 - loss 0.54721074 - samples/sec: 20.26 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,948 epoch 3 - iter 5/5 - loss 0.56807120 - samples/sec: 10.85 - lr: 0.020000\n",
-      "2021-09-08 11:01:45,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:45,950 EPOCH 3 done: loss 0.5681 - lr 0.0200000\n",
-      "2021-09-08 11:01:45,950 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:01:46,064 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:46,128 epoch 4 - iter 1/5 - loss 0.23046350 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,192 epoch 4 - iter 2/5 - loss 0.51533556 - samples/sec: 15.73 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,282 epoch 4 - iter 3/5 - loss 0.62654934 - samples/sec: 11.17 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,334 epoch 4 - iter 4/5 - loss 0.60592116 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,382 epoch 4 - iter 5/5 - loss 0.60689216 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,384 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:46,384 EPOCH 4 done: loss 0.6069 - lr 0.0200000\n",
-      "2021-09-08 11:01:46,384 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:01:46,482 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:46,595 epoch 5 - iter 1/5 - loss 0.65021145 - samples/sec: 10.15 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,653 epoch 5 - iter 2/5 - loss 0.51740547 - samples/sec: 17.41 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,722 epoch 5 - iter 3/5 - loss 0.52747310 - samples/sec: 14.64 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,776 epoch 5 - iter 4/5 - loss 0.44532840 - samples/sec: 18.89 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,832 epoch 5 - iter 5/5 - loss 0.41484415 - samples/sec: 18.04 - lr: 0.020000\n",
-      "2021-09-08 11:01:46,834 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:46,834 EPOCH 5 done: loss 0.4148 - lr 0.0200000\n",
+      "2021-09-21 20:30:10,662 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:10,735 epoch 2 - iter 1/5 - loss 0.79464555 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 20:30:10,785 epoch 2 - iter 2/5 - loss 0.73279998 - samples/sec: 20.33 - lr: 0.020000\n",
+      "2021-09-21 20:30:10,839 epoch 2 - iter 3/5 - loss 0.54094692 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 20:30:10,886 epoch 2 - iter 4/5 - loss 0.45786501 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:30:10,953 epoch 2 - iter 5/5 - loss 0.57577275 - samples/sec: 15.00 - lr: 0.020000\n",
+      "2021-09-21 20:30:10,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:10,954 EPOCH 2 done: loss 0.5758 - lr 0.0200000\n",
+      "2021-09-21 20:30:10,955 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:11,034 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:11,100 epoch 3 - iter 1/5 - loss 0.62686187 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,151 epoch 3 - iter 2/5 - loss 0.82669780 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,218 epoch 3 - iter 3/5 - loss 0.92500105 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,264 epoch 3 - iter 4/5 - loss 0.71964245 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,311 epoch 3 - iter 5/5 - loss 0.71347297 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,312 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:11,313 EPOCH 3 done: loss 0.7135 - lr 0.0200000\n",
+      "2021-09-21 20:30:11,313 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:30:11,443 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:11,499 epoch 4 - iter 1/5 - loss 0.01831517 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,549 epoch 4 - iter 2/5 - loss 0.10620302 - samples/sec: 20.41 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,616 epoch 4 - iter 3/5 - loss 0.49848652 - samples/sec: 14.93 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,668 epoch 4 - iter 4/5 - loss 0.49814653 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,719 epoch 4 - iter 5/5 - loss 0.43017361 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 20:30:11,720 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:11,720 EPOCH 4 done: loss 0.4302 - lr 0.0200000\n",
+      "2021-09-21 20:30:11,721 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:30:11,936 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:11,997 epoch 5 - iter 1/5 - loss 0.00638759 - samples/sec: 21.28 - lr: 0.020000\n",
+      "2021-09-21 20:30:12,042 epoch 5 - iter 2/5 - loss 0.01413874 - samples/sec: 22.33 - lr: 0.020000\n",
+      "2021-09-21 20:30:12,089 epoch 5 - iter 3/5 - loss 0.05252241 - samples/sec: 21.33 - lr: 0.020000\n",
+      "2021-09-21 20:30:12,156 epoch 5 - iter 4/5 - loss 0.21580697 - samples/sec: 15.16 - lr: 0.020000\n",
+      "2021-09-21 20:30:12,207 epoch 5 - iter 5/5 - loss 0.21466361 - samples/sec: 19.54 - lr: 0.020000\n",
+      "2021-09-21 20:30:12,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:12,209 EPOCH 5 done: loss 0.2147 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:01:46,835 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:01:46,914 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:46,994 epoch 6 - iter 1/5 - loss 0.56301683 - samples/sec: 15.32 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,043 epoch 6 - iter 2/5 - loss 0.55520555 - samples/sec: 20.55 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,092 epoch 6 - iter 3/5 - loss 0.42148683 - samples/sec: 20.88 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,144 epoch 6 - iter 4/5 - loss 0.36464544 - samples/sec: 19.59 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,233 epoch 6 - iter 5/5 - loss 0.36186010 - samples/sec: 11.31 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,235 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,235 EPOCH 6 done: loss 0.3619 - lr 0.0100000\n",
-      "2021-09-08 11:01:47,236 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:47,317 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,400 epoch 7 - iter 1/5 - loss 0.28923327 - samples/sec: 14.45 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,538 epoch 7 - iter 2/5 - loss 0.32971135 - samples/sec: 7.33 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,597 epoch 7 - iter 3/5 - loss 0.27132414 - samples/sec: 17.21 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,646 epoch 7 - iter 4/5 - loss 0.21704859 - samples/sec: 20.84 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,693 epoch 7 - iter 5/5 - loss 0.17778261 - samples/sec: 21.43 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,695 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,696 EPOCH 7 done: loss 0.1778 - lr 0.0100000\n",
-      "2021-09-08 11:01:47,696 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:01:47,794 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:47,900 epoch 8 - iter 1/5 - loss 0.44147456 - samples/sec: 11.11 - lr: 0.010000\n",
-      "2021-09-08 11:01:47,956 epoch 8 - iter 2/5 - loss 0.26416944 - samples/sec: 18.08 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,015 epoch 8 - iter 3/5 - loss 0.26032201 - samples/sec: 17.16 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,083 epoch 8 - iter 4/5 - loss 0.35254028 - samples/sec: 14.75 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,140 epoch 8 - iter 5/5 - loss 0.28606745 - samples/sec: 17.78 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,142 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:48,142 EPOCH 8 done: loss 0.2861 - lr 0.0100000\n",
-      "2021-09-08 11:01:48,143 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:01:48,220 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:48,292 epoch 9 - iter 1/5 - loss 0.02625341 - samples/sec: 17.73 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,384 epoch 9 - iter 2/5 - loss 0.22257057 - samples/sec: 10.87 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,452 epoch 9 - iter 3/5 - loss 0.32972229 - samples/sec: 14.99 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,500 epoch 9 - iter 4/5 - loss 0.28384165 - samples/sec: 21.01 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,548 epoch 9 - iter 5/5 - loss 0.22958435 - samples/sec: 20.90 - lr: 0.010000\n",
-      "2021-09-08 11:01:48,549 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:48,550 EPOCH 9 done: loss 0.2296 - lr 0.0100000\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:01:48,550 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:01:48,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:48,700 epoch 10 - iter 1/5 - loss 0.02551093 - samples/sec: 21.02 - lr: 0.005000\n",
-      "2021-09-08 11:01:48,744 epoch 10 - iter 2/5 - loss 0.02056285 - samples/sec: 22.57 - lr: 0.005000\n",
-      "2021-09-08 11:01:48,809 epoch 10 - iter 3/5 - loss 0.07285011 - samples/sec: 15.48 - lr: 0.005000\n"
+      "2021-09-21 20:30:12,209 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:30:12,330 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:12,386 epoch 6 - iter 1/5 - loss 0.00813769 - samples/sec: 23.48 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,454 epoch 6 - iter 2/5 - loss 0.18921349 - samples/sec: 14.70 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,505 epoch 6 - iter 3/5 - loss 0.20814212 - samples/sec: 19.88 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,552 epoch 6 - iter 4/5 - loss 0.19249001 - samples/sec: 21.33 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,603 epoch 6 - iter 5/5 - loss 0.22761393 - samples/sec: 19.71 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,604 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:12,605 EPOCH 6 done: loss 0.2276 - lr 0.0100000\n",
+      "2021-09-21 20:30:12,605 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:12,698 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:12,755 epoch 7 - iter 1/5 - loss 0.04556751 - samples/sec: 22.83 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,802 epoch 7 - iter 2/5 - loss 0.14335540 - samples/sec: 21.62 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,871 epoch 7 - iter 3/5 - loss 0.22161109 - samples/sec: 14.70 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,918 epoch 7 - iter 4/5 - loss 0.16833517 - samples/sec: 21.32 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,969 epoch 7 - iter 5/5 - loss 0.14761559 - samples/sec: 19.73 - lr: 0.010000\n",
+      "2021-09-21 20:30:12,970 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:12,970 EPOCH 7 done: loss 0.1476 - lr 0.0100000\n",
+      "2021-09-21 20:30:12,971 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:30:15,994 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:16,053 epoch 8 - iter 1/5 - loss 0.01605252 - samples/sec: 22.16 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,115 epoch 8 - iter 2/5 - loss 0.03343131 - samples/sec: 16.20 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,193 epoch 8 - iter 3/5 - loss 0.04552655 - samples/sec: 12.92 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,275 epoch 8 - iter 4/5 - loss 0.20708661 - samples/sec: 12.28 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,377 epoch 8 - iter 5/5 - loss 0.28168965 - samples/sec: 9.89 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,378 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:16,378 EPOCH 8 done: loss 0.2817 - lr 0.0100000\n",
+      "2021-09-21 20:30:16,378 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:30:16,428 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:01:48,898 epoch 10 - iter 4/5 - loss 0.11751337 - samples/sec: 11.38 - lr: 0.005000\n",
-      "2021-09-08 11:01:48,942 epoch 10 - iter 5/5 - loss 0.10298727 - samples/sec: 22.93 - lr: 0.005000\n",
-      "2021-09-08 11:01:48,943 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:01:48,943 EPOCH 10 done: loss 0.1030 - lr 0.0050000\n",
-      "2021-09-08 11:01:48,943 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:01:57,920 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:02:36,760 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:30:16,544 epoch 9 - iter 1/5 - loss 0.34637415 - samples/sec: 9.96 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,605 epoch 9 - iter 2/5 - loss 0.23904270 - samples/sec: 16.54 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,669 epoch 9 - iter 3/5 - loss 0.17658624 - samples/sec: 15.86 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,742 epoch 9 - iter 4/5 - loss 0.19434573 - samples/sec: 13.67 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,815 epoch 9 - iter 5/5 - loss 0.17327531 - samples/sec: 13.78 - lr: 0.010000\n",
+      "2021-09-21 20:30:16,816 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:16,817 EPOCH 9 done: loss 0.1733 - lr 0.0100000\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:30:16,817 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:30:16,819 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:16,901 epoch 10 - iter 1/5 - loss 0.06360737 - samples/sec: 16.43 - lr: 0.005000\n",
+      "2021-09-21 20:30:16,968 epoch 10 - iter 2/5 - loss 0.18703108 - samples/sec: 15.11 - lr: 0.005000\n",
+      "2021-09-21 20:30:17,013 epoch 10 - iter 3/5 - loss 0.12994344 - samples/sec: 22.05 - lr: 0.005000\n",
+      "2021-09-21 20:30:17,057 epoch 10 - iter 4/5 - loss 0.09862937 - samples/sec: 23.32 - lr: 0.005000\n",
+      "2021-09-21 20:30:17,106 epoch 10 - iter 5/5 - loss 0.08379027 - samples/sec: 20.57 - lr: 0.005000\n",
+      "2021-09-21 20:30:17,107 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:17,108 EPOCH 10 done: loss 0.0838 - lr 0.0050000\n",
+      "2021-09-21 20:30:17,108 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:27,527 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:31:10,981 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:02:40,758 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:31:14,985 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 15330.06it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 12094.30it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:40,760 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 11:02:40,898 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:40,900 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:31:14,987 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 20:31:14,995 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:14,997 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5670,24 +5699,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:40,901 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:40,901 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:02:40,901 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:40,901 Parameters:\n",
-      "2021-09-08 11:02:40,902  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:02:40,902  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:02:40,902  - patience: \"3\"\n",
-      "2021-09-08 11:02:40,903  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:02:40,903  - max_epochs: \"10\"\n",
-      "2021-09-08 11:02:40,903  - shuffle: \"True\"\n",
-      "2021-09-08 11:02:40,903  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:02:40,904  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:02:40,904 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:40,904 Model training base path: \"temp1\"\n",
-      "2021-09-08 11:02:40,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:40,905 Device: cuda:0\n",
-      "2021-09-08 11:02:40,905 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:40,905 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:31:14,998 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:14,998 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:31:14,998 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:14,999 Parameters:\n",
+      "2021-09-21 20:31:14,999  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:31:14,999  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:31:15,000  - patience: \"3\"\n",
+      "2021-09-21 20:31:15,000  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:31:15,000  - max_epochs: \"10\"\n",
+      "2021-09-21 20:31:15,000  - shuffle: \"True\"\n",
+      "2021-09-21 20:31:15,001  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:31:15,001  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:31:15,001 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:15,001 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:31:15,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:15,002 Device: cuda:0\n",
+      "2021-09-21 20:31:15,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:15,003 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:31:15,009 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:15,136 epoch 1 - iter 1/5 - loss 0.77857596 - samples/sec: 9.25 - lr: 0.020000\n"
      ]
     },
     {
@@ -5701,125 +5732,123 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:41,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:41,152 epoch 1 - iter 1/5 - loss 0.65271634 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 11:02:41,207 epoch 1 - iter 2/5 - loss 1.15945563 - samples/sec: 18.25 - lr: 0.020000\n",
-      "2021-09-08 11:02:41,256 epoch 1 - iter 3/5 - loss 1.06801246 - samples/sec: 20.82 - lr: 0.020000\n",
-      "2021-09-08 11:02:41,307 epoch 1 - iter 4/5 - loss 0.99666879 - samples/sec: 19.78 - lr: 0.020000\n",
-      "2021-09-08 11:02:41,358 epoch 1 - iter 5/5 - loss 1.08235157 - samples/sec: 19.51 - lr: 0.020000\n",
-      "2021-09-08 11:02:41,360 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:41,360 EPOCH 1 done: loss 1.0824 - lr 0.0200000\n",
-      "2021-09-08 11:02:41,360 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:31:15,207 epoch 1 - iter 2/5 - loss 0.96038267 - samples/sec: 14.12 - lr: 0.020000\n",
+      "2021-09-21 20:31:15,259 epoch 1 - iter 3/5 - loss 0.88990241 - samples/sec: 19.57 - lr: 0.020000\n",
+      "2021-09-21 20:31:15,319 epoch 1 - iter 4/5 - loss 0.87711862 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 20:31:15,370 epoch 1 - iter 5/5 - loss 0.86952312 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 20:31:15,371 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:15,371 EPOCH 1 done: loss 0.8695 - lr 0.0200000\n",
+      "2021-09-21 20:31:15,371 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:02:55,145 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,247 epoch 2 - iter 1/5 - loss 0.42879272 - samples/sec: 11.78 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,328 epoch 2 - iter 2/5 - loss 0.68053269 - samples/sec: 12.32 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,396 epoch 2 - iter 3/5 - loss 0.72433231 - samples/sec: 14.96 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,456 epoch 2 - iter 4/5 - loss 0.66001491 - samples/sec: 16.73 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,521 epoch 2 - iter 5/5 - loss 0.66685670 - samples/sec: 15.49 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,522 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,522 EPOCH 2 done: loss 0.6669 - lr 0.0200000\n",
-      "2021-09-08 11:02:55,523 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:02:55,525 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,604 epoch 3 - iter 1/5 - loss 0.45641264 - samples/sec: 18.90 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,674 epoch 3 - iter 2/5 - loss 0.41504905 - samples/sec: 14.48 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,750 epoch 3 - iter 3/5 - loss 0.45280202 - samples/sec: 13.19 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,797 epoch 3 - iter 4/5 - loss 0.39286414 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,849 epoch 3 - iter 5/5 - loss 0.44872500 - samples/sec: 19.40 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,851 EPOCH 3 done: loss 0.4487 - lr 0.0200000\n",
-      "2021-09-08 11:02:55,851 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:02:55,854 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:55,923 epoch 4 - iter 1/5 - loss 0.57949287 - samples/sec: 18.35 - lr: 0.020000\n",
-      "2021-09-08 11:02:55,973 epoch 4 - iter 2/5 - loss 0.33230864 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,021 epoch 4 - iter 3/5 - loss 0.22915437 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,074 epoch 4 - iter 4/5 - loss 0.28975964 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,126 epoch 4 - iter 5/5 - loss 0.36809775 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:56,128 EPOCH 4 done: loss 0.3681 - lr 0.0200000\n",
-      "2021-09-08 11:02:56,128 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:02:56,147 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:56,209 epoch 5 - iter 1/5 - loss 0.13957222 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,257 epoch 5 - iter 2/5 - loss 0.10885819 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,309 epoch 5 - iter 3/5 - loss 0.52852779 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,364 epoch 5 - iter 4/5 - loss 0.57611226 - samples/sec: 18.49 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,416 epoch 5 - iter 5/5 - loss 0.52936961 - samples/sec: 19.41 - lr: 0.020000\n",
-      "2021-09-08 11:02:56,418 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:56,418 EPOCH 5 done: loss 0.5294 - lr 0.0200000\n",
+      "2021-09-21 20:31:19,323 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:19,436 epoch 2 - iter 1/5 - loss 0.72422355 - samples/sec: 10.14 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,484 epoch 2 - iter 2/5 - loss 0.54885612 - samples/sec: 21.17 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,531 epoch 2 - iter 3/5 - loss 0.54467679 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,582 epoch 2 - iter 4/5 - loss 0.52402232 - samples/sec: 19.92 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,641 epoch 2 - iter 5/5 - loss 0.61994107 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,642 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:19,642 EPOCH 2 done: loss 0.6199 - lr 0.0200000\n",
+      "2021-09-21 20:31:19,643 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:19,645 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:19,714 epoch 3 - iter 1/5 - loss 0.72253817 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,762 epoch 3 - iter 2/5 - loss 0.84257030 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,859 epoch 3 - iter 3/5 - loss 0.71012905 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,906 epoch 3 - iter 4/5 - loss 0.63484180 - samples/sec: 21.32 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,959 epoch 3 - iter 5/5 - loss 0.58219414 - samples/sec: 19.08 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,960 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:19,961 EPOCH 3 done: loss 0.5822 - lr 0.0200000\n",
+      "2021-09-21 20:31:19,961 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:31:19,964 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:20,034 epoch 4 - iter 1/5 - loss 0.76219726 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,081 epoch 4 - iter 2/5 - loss 0.67989123 - samples/sec: 21.79 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,127 epoch 4 - iter 3/5 - loss 0.62866884 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,224 epoch 4 - iter 4/5 - loss 0.53412833 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,275 epoch 4 - iter 5/5 - loss 0.45716780 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,276 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:20,276 EPOCH 4 done: loss 0.4572 - lr 0.0200000\n",
+      "2021-09-21 20:31:20,276 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:31:20,366 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:20,429 epoch 5 - iter 1/5 - loss 0.12051222 - samples/sec: 20.07 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,475 epoch 5 - iter 2/5 - loss 0.60773944 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,522 epoch 5 - iter 3/5 - loss 0.62843380 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,579 epoch 5 - iter 4/5 - loss 0.63626636 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,677 epoch 5 - iter 5/5 - loss 0.54648148 - samples/sec: 10.19 - lr: 0.020000\n",
+      "2021-09-21 20:31:20,679 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:20,679 EPOCH 5 done: loss 0.5465 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:02:56,418 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:02:56,547 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:56,606 epoch 6 - iter 1/5 - loss 0.02547208 - samples/sec: 22.60 - lr: 0.010000\n",
-      "2021-09-08 11:02:56,657 epoch 6 - iter 2/5 - loss 0.36222671 - samples/sec: 19.93 - lr: 0.010000\n",
-      "2021-09-08 11:02:56,707 epoch 6 - iter 3/5 - loss 0.32923543 - samples/sec: 20.20 - lr: 0.010000\n",
-      "2021-09-08 11:02:56,757 epoch 6 - iter 4/5 - loss 0.32394028 - samples/sec: 20.31 - lr: 0.010000\n",
-      "2021-09-08 11:02:56,805 epoch 6 - iter 5/5 - loss 0.34979564 - samples/sec: 21.13 - lr: 0.010000\n",
-      "2021-09-08 11:02:56,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:56,806 EPOCH 6 done: loss 0.3498 - lr 0.0100000\n",
-      "2021-09-08 11:02:56,806 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:02:56,913 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:56,977 epoch 7 - iter 1/5 - loss 0.16952790 - samples/sec: 19.74 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,021 epoch 7 - iter 2/5 - loss 0.08973478 - samples/sec: 22.94 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,068 epoch 7 - iter 3/5 - loss 0.08405371 - samples/sec: 21.52 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,118 epoch 7 - iter 4/5 - loss 0.09975957 - samples/sec: 20.34 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,165 epoch 7 - iter 5/5 - loss 0.18243254 - samples/sec: 21.17 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,166 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:57,167 EPOCH 7 done: loss 0.1824 - lr 0.0100000\n",
-      "2021-09-08 11:02:57,167 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:02:57,259 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:57,320 epoch 8 - iter 1/5 - loss 0.42717138 - samples/sec: 21.07 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,364 epoch 8 - iter 2/5 - loss 0.21941545 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,414 epoch 8 - iter 3/5 - loss 0.47697776 - samples/sec: 19.97 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,460 epoch 8 - iter 4/5 - loss 0.38361079 - samples/sec: 21.84 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,513 epoch 8 - iter 5/5 - loss 0.32943391 - samples/sec: 19.19 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,514 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:57,514 EPOCH 8 done: loss 0.3294 - lr 0.0100000\n",
-      "2021-09-08 11:02:57,515 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:02:57,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:57,668 epoch 9 - iter 1/5 - loss 0.13052516 - samples/sec: 19.78 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,719 epoch 9 - iter 2/5 - loss 0.14412819 - samples/sec: 19.98 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,763 epoch 9 - iter 3/5 - loss 0.10032829 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,810 epoch 9 - iter 4/5 - loss 0.15496836 - samples/sec: 21.15 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,857 epoch 9 - iter 5/5 - loss 0.12975249 - samples/sec: 21.65 - lr: 0.010000\n",
-      "2021-09-08 11:02:57,858 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:57,858 EPOCH 9 done: loss 0.1298 - lr 0.0100000\n",
+      "2021-09-21 20:31:20,679 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:31:20,753 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:20,823 epoch 6 - iter 1/5 - loss 0.87705308 - samples/sec: 17.50 - lr: 0.010000\n",
+      "2021-09-21 20:31:20,869 epoch 6 - iter 2/5 - loss 0.62849264 - samples/sec: 21.77 - lr: 0.010000\n",
+      "2021-09-21 20:31:20,916 epoch 6 - iter 3/5 - loss 0.43282503 - samples/sec: 21.51 - lr: 0.010000\n",
+      "2021-09-21 20:31:20,962 epoch 6 - iter 4/5 - loss 0.34643744 - samples/sec: 21.91 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,060 epoch 6 - iter 5/5 - loss 0.31149256 - samples/sec: 10.25 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:21,062 EPOCH 6 done: loss 0.3115 - lr 0.0100000\n",
+      "2021-09-21 20:31:21,062 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:21,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:21,199 epoch 7 - iter 1/5 - loss 0.18006442 - samples/sec: 21.66 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,245 epoch 7 - iter 2/5 - loss 0.16306247 - samples/sec: 21.73 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,293 epoch 7 - iter 3/5 - loss 0.11188125 - samples/sec: 21.37 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,388 epoch 7 - iter 4/5 - loss 0.09748040 - samples/sec: 10.58 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,446 epoch 7 - iter 5/5 - loss 0.23288703 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:21,447 EPOCH 7 done: loss 0.2329 - lr 0.0100000\n",
+      "2021-09-21 20:31:21,448 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:31:21,533 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:21,589 epoch 8 - iter 1/5 - loss 0.03273086 - samples/sec: 23.43 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,635 epoch 8 - iter 2/5 - loss 0.04697097 - samples/sec: 21.82 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,693 epoch 8 - iter 3/5 - loss 0.29725574 - samples/sec: 17.58 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,745 epoch 8 - iter 4/5 - loss 0.23171277 - samples/sec: 19.41 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,841 epoch 8 - iter 5/5 - loss 0.20400533 - samples/sec: 10.46 - lr: 0.010000\n",
+      "2021-09-21 20:31:21,842 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:21,842 EPOCH 8 done: loss 0.2040 - lr 0.0100000\n",
+      "2021-09-21 20:31:21,842 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:31:21,917 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:21,977 epoch 9 - iter 1/5 - loss 0.00878468 - samples/sec: 21.43 - lr: 0.010000\n",
+      "2021-09-21 20:31:22,034 epoch 9 - iter 2/5 - loss 0.31413202 - samples/sec: 17.49 - lr: 0.010000\n",
+      "2021-09-21 20:31:22,079 epoch 9 - iter 3/5 - loss 0.22395432 - samples/sec: 22.68 - lr: 0.010000\n",
+      "2021-09-21 20:31:22,125 epoch 9 - iter 4/5 - loss 0.38161076 - samples/sec: 21.90 - lr: 0.010000\n",
+      "2021-09-21 20:31:22,218 epoch 9 - iter 5/5 - loss 0.30925589 - samples/sec: 10.82 - lr: 0.010000\n",
+      "2021-09-21 20:31:22,219 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:22,219 EPOCH 9 done: loss 0.3093 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:02:57,859 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:02:57,956 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:58,015 epoch 10 - iter 1/5 - loss 0.00255153 - samples/sec: 23.07 - lr: 0.005000\n",
-      "2021-09-08 11:02:58,063 epoch 10 - iter 2/5 - loss 0.04420956 - samples/sec: 21.08 - lr: 0.005000\n",
-      "2021-09-08 11:02:58,112 epoch 10 - iter 3/5 - loss 0.04028338 - samples/sec: 20.39 - lr: 0.005000\n"
+      "2021-09-21 20:31:22,219 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:31:22,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:22,355 epoch 10 - iter 1/5 - loss 0.03701346 - samples/sec: 23.42 - lr: 0.005000\n",
+      "2021-09-21 20:31:22,412 epoch 10 - iter 2/5 - loss 0.28403651 - samples/sec: 17.51 - lr: 0.005000\n",
+      "2021-09-21 20:31:22,458 epoch 10 - iter 3/5 - loss 0.21147584 - samples/sec: 22.26 - lr: 0.005000\n",
+      "2021-09-21 20:31:22,551 epoch 10 - iter 4/5 - loss 0.16517476 - samples/sec: 10.75 - lr: 0.005000\n",
+      "2021-09-21 20:31:22,600 epoch 10 - iter 5/5 - loss 0.13507464 - samples/sec: 20.89 - lr: 0.005000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:02:58,159 epoch 10 - iter 4/5 - loss 0.03951361 - samples/sec: 21.44 - lr: 0.005000\n",
-      "2021-09-08 11:02:58,209 epoch 10 - iter 5/5 - loss 0.08776858 - samples/sec: 20.17 - lr: 0.005000\n",
-      "2021-09-08 11:02:58,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:02:58,211 EPOCH 10 done: loss 0.0878 - lr 0.0050000\n",
-      "2021-09-08 11:02:58,211 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:03:10,782 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:03:49,433 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:31:22,601 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:22,602 EPOCH 10 done: loss 0.1351 - lr 0.0050000\n",
+      "2021-09-21 20:31:22,602 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:30,966 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:32:10,999 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:03:53,365 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:32:15,258 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 14841.84it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 9804.36it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:03:53,367 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 11:03:53,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,379 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:32:15,260 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 20:32:15,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,270 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6132,28 +6161,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:03:53,379 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,380 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:03:53,380 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,380 Parameters:\n",
-      "2021-09-08 11:03:53,380  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:03:53,381  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:03:53,381  - patience: \"3\"\n",
-      "2021-09-08 11:03:53,381  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:03:53,382  - max_epochs: \"10\"\n",
-      "2021-09-08 11:03:53,382  - shuffle: \"True\"\n",
-      "2021-09-08 11:03:53,382  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:03:53,382  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:03:53,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,383 Model training base path: \"temp1\"\n",
-      "2021-09-08 11:03:53,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,384 Device: cuda:0\n",
-      "2021-09-08 11:03:53,384 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,384 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:03:53,391 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,445 epoch 1 - iter 1/5 - loss 1.55410957 - samples/sec: 24.86 - lr: 0.020000\n",
-      "2021-09-08 11:03:53,496 epoch 1 - iter 2/5 - loss 1.35552520 - samples/sec: 19.79 - lr: 0.020000\n",
-      "2021-09-08 11:03:53,544 epoch 1 - iter 3/5 - loss 1.20445238 - samples/sec: 20.72 - lr: 0.020000\n"
+      "2021-09-21 20:32:15,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,271 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:32:15,271 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,272 Parameters:\n",
+      "2021-09-21 20:32:15,272  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:32:15,272  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:32:15,272  - patience: \"3\"\n",
+      "2021-09-21 20:32:15,273  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:32:15,273  - max_epochs: \"10\"\n",
+      "2021-09-21 20:32:15,273  - shuffle: \"True\"\n",
+      "2021-09-21 20:32:15,274  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:32:15,274  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:32:15,274 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,274 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:32:15,275 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,275 Device: cuda:0\n",
+      "2021-09-21 20:32:15,275 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,275 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:32:15,282 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,398 epoch 1 - iter 1/5 - loss 0.61602813 - samples/sec: 10.27 - lr: 0.020000\n"
      ]
     },
     {
@@ -6167,121 +6194,123 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:03:53,615 epoch 1 - iter 4/5 - loss 1.06630349 - samples/sec: 14.14 - lr: 0.020000\n",
-      "2021-09-08 11:03:53,664 epoch 1 - iter 5/5 - loss 0.91721554 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 11:03:53,665 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:53,665 EPOCH 1 done: loss 0.9172 - lr 0.0200000\n",
-      "2021-09-08 11:03:53,666 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:32:15,476 epoch 1 - iter 2/5 - loss 1.01677838 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 20:32:15,562 epoch 1 - iter 3/5 - loss 0.86793782 - samples/sec: 11.70 - lr: 0.020000\n",
+      "2021-09-21 20:32:15,618 epoch 1 - iter 4/5 - loss 0.82067585 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 20:32:15,772 epoch 1 - iter 5/5 - loss 0.78043373 - samples/sec: 6.52 - lr: 0.020000\n",
+      "2021-09-21 20:32:15,773 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:15,773 EPOCH 1 done: loss 0.7804 - lr 0.0200000\n",
+      "2021-09-21 20:32:15,774 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:03:57,755 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:57,821 epoch 2 - iter 1/5 - loss 0.19431478 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 11:03:57,870 epoch 2 - iter 2/5 - loss 0.57107693 - samples/sec: 20.64 - lr: 0.020000\n",
-      "2021-09-08 11:03:57,948 epoch 2 - iter 3/5 - loss 0.69144269 - samples/sec: 12.95 - lr: 0.020000\n",
-      "2021-09-08 11:03:57,995 epoch 2 - iter 4/5 - loss 0.62870643 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,043 epoch 2 - iter 5/5 - loss 0.66438190 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,044 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,044 EPOCH 2 done: loss 0.6644 - lr 0.0200000\n",
-      "2021-09-08 11:03:58,045 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:03:58,047 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,107 epoch 3 - iter 1/5 - loss 0.14906041 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,154 epoch 3 - iter 2/5 - loss 0.65424659 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,222 epoch 3 - iter 3/5 - loss 0.62977872 - samples/sec: 14.75 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,270 epoch 3 - iter 4/5 - loss 0.50011941 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,318 epoch 3 - iter 5/5 - loss 0.72176473 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,319 EPOCH 3 done: loss 0.7218 - lr 0.0200000\n",
-      "2021-09-08 11:03:58,319 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:03:58,321 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,400 epoch 4 - iter 1/5 - loss 0.39420727 - samples/sec: 15.12 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,445 epoch 4 - iter 2/5 - loss 0.22281007 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,493 epoch 4 - iter 3/5 - loss 0.23378617 - samples/sec: 21.14 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,540 epoch 4 - iter 4/5 - loss 0.50602444 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,587 epoch 4 - iter 5/5 - loss 0.41967546 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,588 EPOCH 4 done: loss 0.4197 - lr 0.0200000\n",
-      "2021-09-08 11:03:58,588 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:03:58,590 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,670 epoch 5 - iter 1/5 - loss 0.19600229 - samples/sec: 15.02 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,717 epoch 5 - iter 2/5 - loss 1.11997821 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,765 epoch 5 - iter 3/5 - loss 0.76900875 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,812 epoch 5 - iter 4/5 - loss 0.89144155 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,859 epoch 5 - iter 5/5 - loss 0.87405594 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 11:03:58,860 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,861 EPOCH 5 done: loss 0.8741 - lr 0.0200000\n",
+      "2021-09-21 20:32:20,009 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:20,124 epoch 2 - iter 1/5 - loss 0.42669436 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,206 epoch 2 - iter 2/5 - loss 0.23355074 - samples/sec: 12.36 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,312 epoch 2 - iter 3/5 - loss 0.49545710 - samples/sec: 9.41 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,439 epoch 2 - iter 4/5 - loss 0.51916809 - samples/sec: 7.90 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,590 epoch 2 - iter 5/5 - loss 0.43756130 - samples/sec: 6.65 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:20,592 EPOCH 2 done: loss 0.4376 - lr 0.0200000\n",
+      "2021-09-21 20:32:20,592 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:32:20,598 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:20,703 epoch 3 - iter 1/5 - loss 0.14845097 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,774 epoch 3 - iter 2/5 - loss 0.71239740 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,875 epoch 3 - iter 3/5 - loss 0.61388255 - samples/sec: 9.99 - lr: 0.020000\n",
+      "2021-09-21 20:32:20,981 epoch 3 - iter 4/5 - loss 0.60115456 - samples/sec: 9.48 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,102 epoch 3 - iter 5/5 - loss 0.53238202 - samples/sec: 8.28 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,103 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:21,104 EPOCH 3 done: loss 0.5324 - lr 0.0200000\n",
+      "2021-09-21 20:32:21,104 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:32:21,106 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:21,211 epoch 4 - iter 1/5 - loss 0.92121500 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,312 epoch 4 - iter 2/5 - loss 0.53460207 - samples/sec: 9.98 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,442 epoch 4 - iter 3/5 - loss 0.36529133 - samples/sec: 7.70 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,558 epoch 4 - iter 4/5 - loss 0.52515159 - samples/sec: 8.70 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,624 epoch 4 - iter 5/5 - loss 0.49451971 - samples/sec: 15.13 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,625 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:21,625 EPOCH 4 done: loss 0.4945 - lr 0.0200000\n",
+      "2021-09-21 20:32:21,626 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:32:21,628 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:21,726 epoch 5 - iter 1/5 - loss 0.06373935 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,853 epoch 5 - iter 2/5 - loss 0.17323638 - samples/sec: 7.92 - lr: 0.020000\n",
+      "2021-09-21 20:32:21,967 epoch 5 - iter 3/5 - loss 0.26656962 - samples/sec: 8.82 - lr: 0.020000\n",
+      "2021-09-21 20:32:22,037 epoch 5 - iter 4/5 - loss 0.43509758 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 20:32:22,165 epoch 5 - iter 5/5 - loss 0.38192764 - samples/sec: 7.80 - lr: 0.020000\n",
+      "2021-09-21 20:32:22,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:22,166 EPOCH 5 done: loss 0.3819 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:03:58,861 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:03:58,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:58,922 epoch 6 - iter 1/5 - loss 0.01058261 - samples/sec: 22.05 - lr: 0.010000\n",
-      "2021-09-08 11:03:58,969 epoch 6 - iter 2/5 - loss 0.60034897 - samples/sec: 21.21 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,036 epoch 6 - iter 3/5 - loss 0.55551467 - samples/sec: 15.14 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,080 epoch 6 - iter 4/5 - loss 0.43210599 - samples/sec: 22.95 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,127 epoch 6 - iter 5/5 - loss 0.43805993 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,128 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,128 EPOCH 6 done: loss 0.4381 - lr 0.0100000\n",
-      "2021-09-08 11:03:59,128 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:03:59,131 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,191 epoch 7 - iter 1/5 - loss 0.37137151 - samples/sec: 21.17 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,235 epoch 7 - iter 2/5 - loss 0.19055956 - samples/sec: 22.79 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,281 epoch 7 - iter 3/5 - loss 0.12874224 - samples/sec: 21.82 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,325 epoch 7 - iter 4/5 - loss 0.13232212 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,393 epoch 7 - iter 5/5 - loss 0.19195360 - samples/sec: 14.98 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,394 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,394 EPOCH 7 done: loss 0.1920 - lr 0.0100000\n",
-      "2021-09-08 11:03:59,394 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:03:59,396 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,476 epoch 8 - iter 1/5 - loss 0.25009048 - samples/sec: 15.01 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,520 epoch 8 - iter 2/5 - loss 0.12913581 - samples/sec: 22.87 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,566 epoch 8 - iter 3/5 - loss 0.08756727 - samples/sec: 22.08 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,613 epoch 8 - iter 4/5 - loss 0.16412198 - samples/sec: 21.16 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,661 epoch 8 - iter 5/5 - loss 0.19138642 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,661 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,662 EPOCH 8 done: loss 0.1914 - lr 0.0100000\n",
-      "2021-09-08 11:03:59,662 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:03:59,664 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,724 epoch 9 - iter 1/5 - loss 0.15158848 - samples/sec: 21.22 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,772 epoch 9 - iter 2/5 - loss 0.27293482 - samples/sec: 21.27 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,819 epoch 9 - iter 3/5 - loss 0.28484039 - samples/sec: 21.18 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,864 epoch 9 - iter 4/5 - loss 0.21402193 - samples/sec: 22.77 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,933 epoch 9 - iter 5/5 - loss 0.20578846 - samples/sec: 14.59 - lr: 0.010000\n",
-      "2021-09-08 11:03:59,934 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,934 EPOCH 9 done: loss 0.2058 - lr 0.0100000\n",
+      "2021-09-21 20:32:22,167 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:32:22,169 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:22,256 epoch 6 - iter 1/5 - loss 0.21673107 - samples/sec: 13.41 - lr: 0.010000\n",
+      "2021-09-21 20:32:22,336 epoch 6 - iter 2/5 - loss 0.11365523 - samples/sec: 12.68 - lr: 0.010000\n",
+      "2021-09-21 20:32:22,507 epoch 6 - iter 3/5 - loss 0.07778617 - samples/sec: 5.85 - lr: 0.010000\n",
+      "2021-09-21 20:32:22,577 epoch 6 - iter 4/5 - loss 0.13659605 - samples/sec: 14.32 - lr: 0.010000\n",
+      "2021-09-21 20:32:22,658 epoch 6 - iter 5/5 - loss 0.36587310 - samples/sec: 12.45 - lr: 0.010000\n",
+      "2021-09-21 20:32:22,659 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:22,659 EPOCH 6 done: loss 0.3659 - lr 0.0100000\n",
+      "2021-09-21 20:32:22,660 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:32:22,662 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:22,788 epoch 7 - iter 1/5 - loss 0.19737953 - samples/sec: 12.44 - lr: 0.010000\n",
+      "2021-09-21 20:32:22,928 epoch 7 - iter 2/5 - loss 0.10027859 - samples/sec: 7.18 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,022 epoch 7 - iter 3/5 - loss 0.11678570 - samples/sec: 10.62 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,105 epoch 7 - iter 4/5 - loss 0.29473570 - samples/sec: 12.15 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,173 epoch 7 - iter 5/5 - loss 0.23708368 - samples/sec: 14.77 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,174 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:23,174 EPOCH 7 done: loss 0.2371 - lr 0.0100000\n",
+      "2021-09-21 20:32:23,175 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:32:23,177 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:23,335 epoch 8 - iter 1/5 - loss 0.00377939 - samples/sec: 6.90 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,413 epoch 8 - iter 2/5 - loss 0.12530547 - samples/sec: 12.83 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,492 epoch 8 - iter 3/5 - loss 0.23460439 - samples/sec: 12.82 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,591 epoch 8 - iter 4/5 - loss 0.19333499 - samples/sec: 10.16 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,653 epoch 8 - iter 5/5 - loss 0.23756679 - samples/sec: 16.18 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,655 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:23,655 EPOCH 8 done: loss 0.2376 - lr 0.0100000\n",
+      "2021-09-21 20:32:23,655 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:32:23,657 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:23,771 epoch 9 - iter 1/5 - loss 0.40600371 - samples/sec: 13.17 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,821 epoch 9 - iter 2/5 - loss 0.20865835 - samples/sec: 20.38 - lr: 0.010000\n",
+      "2021-09-21 20:32:23,974 epoch 9 - iter 3/5 - loss 0.15653553 - samples/sec: 6.54 - lr: 0.010000\n",
+      "2021-09-21 20:32:24,056 epoch 9 - iter 4/5 - loss 0.19410587 - samples/sec: 12.33 - lr: 0.010000\n",
+      "2021-09-21 20:32:24,193 epoch 9 - iter 5/5 - loss 0.15595910 - samples/sec: 7.35 - lr: 0.010000\n",
+      "2021-09-21 20:32:24,194 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,194 EPOCH 9 done: loss 0.1560 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:03:59,934 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:03:59,936 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:03:59,997 epoch 10 - iter 1/5 - loss 0.27422825 - samples/sec: 21.20 - lr: 0.005000\n",
-      "2021-09-08 11:04:00,044 epoch 10 - iter 2/5 - loss 0.27834211 - samples/sec: 21.27 - lr: 0.005000\n",
-      "2021-09-08 11:04:00,108 epoch 10 - iter 3/5 - loss 0.21085028 - samples/sec: 15.74 - lr: 0.005000\n",
-      "2021-09-08 11:04:00,155 epoch 10 - iter 4/5 - loss 0.17307806 - samples/sec: 21.28 - lr: 0.005000\n",
-      "2021-09-08 11:04:00,200 epoch 10 - iter 5/5 - loss 0.14004326 - samples/sec: 22.78 - lr: 0.005000\n",
-      "2021-09-08 11:04:00,201 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:00,201 EPOCH 10 done: loss 0.1400 - lr 0.0050000\n"
+      "2021-09-21 20:32:24,195 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:32:24,197 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,301 epoch 10 - iter 1/5 - loss 0.26447499 - samples/sec: 13.77 - lr: 0.005000\n",
+      "2021-09-21 20:32:24,453 epoch 10 - iter 2/5 - loss 0.13386126 - samples/sec: 6.62 - lr: 0.005000\n",
+      "2021-09-21 20:32:24,554 epoch 10 - iter 3/5 - loss 0.09886107 - samples/sec: 10.00 - lr: 0.005000\n",
+      "2021-09-21 20:32:24,672 epoch 10 - iter 4/5 - loss 0.18431368 - samples/sec: 8.50 - lr: 0.005000\n",
+      "2021-09-21 20:32:24,760 epoch 10 - iter 5/5 - loss 0.14867095 - samples/sec: 11.39 - lr: 0.005000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:00,202 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:04:04,149 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:04:37,560 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:32:24,761 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,762 EPOCH 10 done: loss 0.1487 - lr 0.0050000\n",
+      "2021-09-21 20:32:24,762 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:32:37,524 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:33:17,990 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:04:41,465 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:33:22,120 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 12961.38it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 14810.40it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:41,467 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 11:04:41,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:41,606 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:33:22,122 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 20:33:22,131 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,133 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6594,24 +6623,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:41,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:41,607 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:04:41,607 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:41,608 Parameters:\n",
-      "2021-09-08 11:04:41,608  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:04:41,608  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:04:41,609  - patience: \"3\"\n",
-      "2021-09-08 11:04:41,609  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:04:41,609  - max_epochs: \"10\"\n",
-      "2021-09-08 11:04:41,609  - shuffle: \"True\"\n",
-      "2021-09-08 11:04:41,610  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:04:41,610  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:04:41,610 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:41,611 Model training base path: \"temp1\"\n",
-      "2021-09-08 11:04:41,611 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:41,611 Device: cuda:0\n",
-      "2021-09-08 11:04:41,611 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:41,612 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:33:22,133 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,134 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:33:22,134 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,134 Parameters:\n",
+      "2021-09-21 20:33:22,134  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:33:22,135  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:33:22,135  - patience: \"3\"\n",
+      "2021-09-21 20:33:22,135  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:33:22,136  - max_epochs: \"10\"\n",
+      "2021-09-21 20:33:22,136  - shuffle: \"True\"\n",
+      "2021-09-21 20:33:22,136  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:33:22,136  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:33:22,137 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,137 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:33:22,137 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,138 Device: cuda:0\n",
+      "2021-09-21 20:33:22,138 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,138 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:33:22,144 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,255 epoch 1 - iter 1/5 - loss 0.40445438 - samples/sec: 15.58 - lr: 0.020000\n"
      ]
     },
     {
@@ -6625,125 +6656,123 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:41,782 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:41,842 epoch 1 - iter 1/5 - loss 0.50212318 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 11:04:41,890 epoch 1 - iter 2/5 - loss 0.92630991 - samples/sec: 21.11 - lr: 0.020000\n",
-      "2021-09-08 11:04:41,944 epoch 1 - iter 3/5 - loss 0.85456916 - samples/sec: 18.56 - lr: 0.020000\n",
-      "2021-09-08 11:04:42,006 epoch 1 - iter 4/5 - loss 0.79625596 - samples/sec: 16.13 - lr: 0.020000\n",
-      "2021-09-08 11:04:42,072 epoch 1 - iter 5/5 - loss 0.80244231 - samples/sec: 15.30 - lr: 0.020000\n",
-      "2021-09-08 11:04:42,073 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:42,073 EPOCH 1 done: loss 0.8024 - lr 0.0200000\n",
-      "2021-09-08 11:04:42,074 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:33:22,334 epoch 1 - iter 2/5 - loss 0.84261508 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 20:33:22,403 epoch 1 - iter 3/5 - loss 0.74055263 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 20:33:22,491 epoch 1 - iter 4/5 - loss 0.69433834 - samples/sec: 11.30 - lr: 0.020000\n",
+      "2021-09-21 20:33:22,567 epoch 1 - iter 5/5 - loss 0.72153223 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 20:33:22,568 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:22,569 EPOCH 1 done: loss 0.7215 - lr 0.0200000\n",
+      "2021-09-21 20:33:22,569 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:04:46,809 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:46,891 epoch 2 - iter 1/5 - loss 0.20973170 - samples/sec: 15.05 - lr: 0.020000\n",
-      "2021-09-08 11:04:46,945 epoch 2 - iter 2/5 - loss 0.40168482 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,008 epoch 2 - iter 3/5 - loss 0.54183265 - samples/sec: 15.97 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,063 epoch 2 - iter 4/5 - loss 0.51311017 - samples/sec: 18.24 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,113 epoch 2 - iter 5/5 - loss 0.52691686 - samples/sec: 20.30 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,114 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:47,115 EPOCH 2 done: loss 0.5269 - lr 0.0200000\n",
-      "2021-09-08 11:04:47,115 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:04:47,191 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:47,253 epoch 3 - iter 1/5 - loss 0.42268440 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,307 epoch 3 - iter 2/5 - loss 0.52501009 - samples/sec: 19.00 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,358 epoch 3 - iter 3/5 - loss 0.54147686 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,413 epoch 3 - iter 4/5 - loss 0.41514304 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,466 epoch 3 - iter 5/5 - loss 0.37785077 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,467 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:47,467 EPOCH 3 done: loss 0.3779 - lr 0.0200000\n",
-      "2021-09-08 11:04:47,467 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:04:47,548 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:47,613 epoch 4 - iter 1/5 - loss 0.63136053 - samples/sec: 19.48 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,669 epoch 4 - iter 2/5 - loss 0.32646870 - samples/sec: 18.18 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,720 epoch 4 - iter 3/5 - loss 0.34561041 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,769 epoch 4 - iter 4/5 - loss 0.34659323 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,825 epoch 4 - iter 5/5 - loss 0.39694113 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 11:04:47,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:47,827 EPOCH 4 done: loss 0.3969 - lr 0.0200000\n",
-      "2021-09-08 11:04:47,827 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:04:47,894 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:47,957 epoch 5 - iter 1/5 - loss 0.24149126 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 11:04:48,007 epoch 5 - iter 2/5 - loss 0.34940359 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 11:04:48,058 epoch 5 - iter 3/5 - loss 0.44568006 - samples/sec: 19.84 - lr: 0.020000\n",
-      "2021-09-08 11:04:48,117 epoch 5 - iter 4/5 - loss 0.37133975 - samples/sec: 17.11 - lr: 0.020000\n",
-      "2021-09-08 11:04:48,172 epoch 5 - iter 5/5 - loss 0.33502122 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 11:04:48,173 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:48,174 EPOCH 5 done: loss 0.3350 - lr 0.0200000\n",
+      "2021-09-21 20:33:26,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:26,810 epoch 2 - iter 1/5 - loss 1.29624641 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 20:33:26,884 epoch 2 - iter 2/5 - loss 0.90676707 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 20:33:26,952 epoch 2 - iter 3/5 - loss 0.82258886 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,015 epoch 2 - iter 4/5 - loss 0.68615861 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,079 epoch 2 - iter 5/5 - loss 0.64637086 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,080 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:27,080 EPOCH 2 done: loss 0.6464 - lr 0.0200000\n",
+      "2021-09-21 20:33:27,081 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:33:27,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:27,224 epoch 3 - iter 1/5 - loss 0.21515422 - samples/sec: 11.72 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,294 epoch 3 - iter 2/5 - loss 0.16388177 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,364 epoch 3 - iter 3/5 - loss 0.24229574 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,431 epoch 3 - iter 4/5 - loss 0.27917465 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,537 epoch 3 - iter 5/5 - loss 0.44025254 - samples/sec: 9.47 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,538 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:27,538 EPOCH 3 done: loss 0.4403 - lr 0.0200000\n",
+      "2021-09-21 20:33:27,539 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:33:27,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:27,665 epoch 4 - iter 1/5 - loss 0.39387676 - samples/sec: 11.74 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,740 epoch 4 - iter 2/5 - loss 0.52199970 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,807 epoch 4 - iter 3/5 - loss 0.35630331 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,868 epoch 4 - iter 4/5 - loss 0.31945754 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,941 epoch 4 - iter 5/5 - loss 0.38998433 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 20:33:27,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:27,943 EPOCH 4 done: loss 0.3900 - lr 0.0200000\n",
+      "2021-09-21 20:33:27,943 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:33:27,945 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:28,037 epoch 5 - iter 1/5 - loss 0.19477670 - samples/sec: 12.83 - lr: 0.020000\n",
+      "2021-09-21 20:33:28,110 epoch 5 - iter 2/5 - loss 0.37697833 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 20:33:28,154 epoch 5 - iter 3/5 - loss 0.25911925 - samples/sec: 22.94 - lr: 0.020000\n",
+      "2021-09-21 20:33:28,207 epoch 5 - iter 4/5 - loss 0.38630084 - samples/sec: 19.24 - lr: 0.020000\n",
+      "2021-09-21 20:33:28,259 epoch 5 - iter 5/5 - loss 0.34426652 - samples/sec: 19.22 - lr: 0.020000\n",
+      "2021-09-21 20:33:28,260 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:28,260 EPOCH 5 done: loss 0.3443 - lr 0.0200000\n",
       "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:04:48,174 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:04:48,251 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:48,322 epoch 6 - iter 1/5 - loss 0.00503149 - samples/sec: 17.37 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,370 epoch 6 - iter 2/5 - loss 0.04103212 - samples/sec: 21.06 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,421 epoch 6 - iter 3/5 - loss 0.07066927 - samples/sec: 19.88 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,477 epoch 6 - iter 4/5 - loss 0.32506232 - samples/sec: 17.94 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,527 epoch 6 - iter 5/5 - loss 0.34252707 - samples/sec: 20.41 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,528 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:48,528 EPOCH 6 done: loss 0.3425 - lr 0.0100000\n",
-      "2021-09-08 11:04:48,528 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:04:48,599 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:48,665 epoch 7 - iter 1/5 - loss 0.27672711 - samples/sec: 19.50 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,723 epoch 7 - iter 2/5 - loss 0.22533019 - samples/sec: 17.24 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,771 epoch 7 - iter 3/5 - loss 0.16881585 - samples/sec: 21.13 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,819 epoch 7 - iter 4/5 - loss 0.18090243 - samples/sec: 20.90 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,874 epoch 7 - iter 5/5 - loss 0.14634838 - samples/sec: 18.35 - lr: 0.010000\n",
-      "2021-09-08 11:04:48,875 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:48,875 EPOCH 7 done: loss 0.1463 - lr 0.0100000\n",
-      "2021-09-08 11:04:48,876 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:04:52,753 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:52,829 epoch 8 - iter 1/5 - loss 0.01014116 - samples/sec: 16.17 - lr: 0.010000\n",
-      "2021-09-08 11:04:52,878 epoch 8 - iter 2/5 - loss 0.43388809 - samples/sec: 20.60 - lr: 0.010000\n",
-      "2021-09-08 11:04:52,926 epoch 8 - iter 3/5 - loss 0.29683586 - samples/sec: 20.98 - lr: 0.010000\n",
-      "2021-09-08 11:04:52,978 epoch 8 - iter 4/5 - loss 0.35560769 - samples/sec: 19.30 - lr: 0.010000\n",
-      "2021-09-08 11:04:53,025 epoch 8 - iter 5/5 - loss 0.30423534 - samples/sec: 21.31 - lr: 0.010000\n",
-      "2021-09-08 11:04:53,026 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,027 EPOCH 8 done: loss 0.3042 - lr 0.0100000\n",
-      "2021-09-08 11:04:53,027 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:04:53,029 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,092 epoch 9 - iter 1/5 - loss 0.20285802 - samples/sec: 19.94 - lr: 0.010000\n",
-      "2021-09-08 11:04:53,147 epoch 9 - iter 2/5 - loss 0.17015319 - samples/sec: 18.29 - lr: 0.010000\n",
-      "2021-09-08 11:04:53,197 epoch 9 - iter 3/5 - loss 0.48989819 - samples/sec: 20.30 - lr: 0.010000\n",
-      "2021-09-08 11:04:53,252 epoch 9 - iter 4/5 - loss 0.36792231 - samples/sec: 18.47 - lr: 0.010000\n",
-      "2021-09-08 11:04:53,298 epoch 9 - iter 5/5 - loss 0.34096504 - samples/sec: 21.52 - lr: 0.010000\n",
-      "2021-09-08 11:04:53,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,300 EPOCH 9 done: loss 0.3410 - lr 0.0100000\n",
+      "2021-09-21 20:33:28,261 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:33:28,263 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:28,391 epoch 6 - iter 1/5 - loss 0.01111351 - samples/sec: 12.12 - lr: 0.010000\n",
+      "2021-09-21 20:33:28,457 epoch 6 - iter 2/5 - loss 0.29381194 - samples/sec: 15.39 - lr: 0.010000\n",
+      "2021-09-21 20:33:28,523 epoch 6 - iter 3/5 - loss 0.20287108 - samples/sec: 15.29 - lr: 0.010000\n",
+      "2021-09-21 20:33:28,588 epoch 6 - iter 4/5 - loss 0.21199599 - samples/sec: 15.42 - lr: 0.010000\n",
+      "2021-09-21 20:33:28,669 epoch 6 - iter 5/5 - loss 0.23350774 - samples/sec: 12.36 - lr: 0.010000\n",
+      "2021-09-21 20:33:28,670 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:28,671 EPOCH 6 done: loss 0.2335 - lr 0.0100000\n",
+      "2021-09-21 20:33:28,671 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:33:28,673 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:28,779 epoch 7 - iter 1/5 - loss 0.06419181 - samples/sec: 12.88 - lr: 0.010000\n",
+      "2021-09-21 20:33:28,871 epoch 7 - iter 2/5 - loss 0.04337131 - samples/sec: 10.96 - lr: 0.010000\n",
+      "2021-09-21 20:33:28,938 epoch 7 - iter 3/5 - loss 0.02974342 - samples/sec: 14.99 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,009 epoch 7 - iter 4/5 - loss 0.09165755 - samples/sec: 14.27 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,087 epoch 7 - iter 5/5 - loss 0.07384072 - samples/sec: 12.77 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,088 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:29,089 EPOCH 7 done: loss 0.0738 - lr 0.0100000\n",
+      "2021-09-21 20:33:29,089 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:33:29,091 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:29,199 epoch 8 - iter 1/5 - loss 0.04045656 - samples/sec: 10.53 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,271 epoch 8 - iter 2/5 - loss 0.31703047 - samples/sec: 14.01 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,363 epoch 8 - iter 3/5 - loss 0.22695671 - samples/sec: 10.84 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,416 epoch 8 - iter 4/5 - loss 0.17227503 - samples/sec: 19.14 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,478 epoch 8 - iter 5/5 - loss 0.48125729 - samples/sec: 16.28 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:29,479 EPOCH 8 done: loss 0.4813 - lr 0.0100000\n",
+      "2021-09-21 20:33:29,479 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:33:29,481 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:29,561 epoch 9 - iter 1/5 - loss 0.03719404 - samples/sec: 18.00 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,613 epoch 9 - iter 2/5 - loss 0.02418664 - samples/sec: 19.47 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,685 epoch 9 - iter 3/5 - loss 0.02596346 - samples/sec: 13.95 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,739 epoch 9 - iter 4/5 - loss 0.26025426 - samples/sec: 18.74 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,809 epoch 9 - iter 5/5 - loss 0.26876494 - samples/sec: 14.41 - lr: 0.010000\n",
+      "2021-09-21 20:33:29,810 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:29,810 EPOCH 9 done: loss 0.2688 - lr 0.0100000\n",
       "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:04:53,300 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:04:53,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,365 epoch 10 - iter 1/5 - loss 0.40518013 - samples/sec: 20.40 - lr: 0.005000\n",
-      "2021-09-08 11:04:53,416 epoch 10 - iter 2/5 - loss 0.20834803 - samples/sec: 19.49 - lr: 0.005000\n",
-      "2021-09-08 11:04:53,462 epoch 10 - iter 3/5 - loss 0.15767495 - samples/sec: 22.09 - lr: 0.005000\n"
+      "2021-09-21 20:33:29,810 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:33:29,813 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:29,913 epoch 10 - iter 1/5 - loss 0.07738264 - samples/sec: 18.07 - lr: 0.005000\n",
+      "2021-09-21 20:33:30,011 epoch 10 - iter 2/5 - loss 0.30336273 - samples/sec: 10.30 - lr: 0.005000\n",
+      "2021-09-21 20:33:30,079 epoch 10 - iter 3/5 - loss 0.35419318 - samples/sec: 14.81 - lr: 0.005000\n",
+      "2021-09-21 20:33:30,144 epoch 10 - iter 4/5 - loss 0.26962977 - samples/sec: 15.48 - lr: 0.005000\n",
+      "2021-09-21 20:33:30,204 epoch 10 - iter 5/5 - loss 0.21667055 - samples/sec: 16.77 - lr: 0.005000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:04:53,517 epoch 10 - iter 4/5 - loss 0.12022594 - samples/sec: 18.34 - lr: 0.005000\n",
-      "2021-09-08 11:04:53,564 epoch 10 - iter 5/5 - loss 0.10469292 - samples/sec: 21.36 - lr: 0.005000\n",
-      "2021-09-08 11:04:53,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:04:53,566 EPOCH 10 done: loss 0.1047 - lr 0.0050000\n",
-      "2021-09-08 11:04:53,566 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:05:03,958 Test data not provided setting final score to 0\n",
-      "2021-09-08 11:05:37,456 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
+      "2021-09-21 20:33:30,205 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:30,205 EPOCH 10 done: loss 0.2167 - lr 0.0050000\n",
+      "2021-09-21 20:33:30,206 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:33:34,299 Test data not provided setting final score to 0\n",
+      "2021-09-21 20:34:15,427 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/sentiment_twitter/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:05:41,386 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:34:20,587 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 5/5 [00:00<00:00, 12953.38it/s]"
+      "100%|██████████| 5/5 [00:00<00:00, 10882.99it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:05:41,388 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
-      "2021-09-08 11:05:41,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,399 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:34:20,589 [b'one who does not side with any party in a war or dispute', b'that which is below standard or expectations as of ethics or decency', b'that which is pleasing or valuable or useful', b'remarkable or out of the ordinary in degree or magnitude or effect', b'exceptionally bad or displeasing']\n",
+      "2021-09-21 20:34:20,599 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,600 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7056,27 +7085,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:05:41,400 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,400 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
-      "2021-09-08 11:05:41,400 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,401 Parameters:\n",
-      "2021-09-08 11:05:41,401  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:05:41,401  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:05:41,401  - patience: \"3\"\n",
-      "2021-09-08 11:05:41,402  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:05:41,402  - max_epochs: \"10\"\n",
-      "2021-09-08 11:05:41,402  - shuffle: \"True\"\n",
-      "2021-09-08 11:05:41,403  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:05:41,403  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:05:41,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,403 Model training base path: \"temp1\"\n",
-      "2021-09-08 11:05:41,404 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,404 Device: cuda:0\n",
-      "2021-09-08 11:05:41,404 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,405 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:05:41,411 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,491 epoch 1 - iter 1/5 - loss 0.49820539 - samples/sec: 15.09 - lr: 0.020000\n",
-      "2021-09-08 11:05:41,545 epoch 1 - iter 2/5 - loss 0.65268631 - samples/sec: 18.69 - lr: 0.020000\n"
+      "2021-09-21 20:34:20,601 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,601 Corpus: \"Corpus: 5 train + 0 dev + 0 test sentences\"\n",
+      "2021-09-21 20:34:20,601 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,602 Parameters:\n",
+      "2021-09-21 20:34:20,602  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:34:20,602  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:34:20,603  - patience: \"3\"\n",
+      "2021-09-21 20:34:20,603  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:34:20,603  - max_epochs: \"10\"\n",
+      "2021-09-21 20:34:20,604  - shuffle: \"True\"\n",
+      "2021-09-21 20:34:20,604  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:34:20,604  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:34:20,604 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,605 Model training base path: \"temp1\"\n",
+      "2021-09-21 20:34:20,605 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,605 Device: cuda:0\n",
+      "2021-09-21 20:34:20,605 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,606 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:34:20,612 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,675 epoch 1 - iter 1/5 - loss 0.50550860 - samples/sec: 21.01 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,723 epoch 1 - iter 2/5 - loss 0.54833180 - samples/sec: 20.81 - lr: 0.020000\n"
      ]
     },
     {
@@ -7090,104 +7119,105 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:05:41,593 epoch 1 - iter 3/5 - loss 0.65441319 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 11:05:41,646 epoch 1 - iter 4/5 - loss 0.64410857 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 11:05:41,704 epoch 1 - iter 5/5 - loss 0.73732975 - samples/sec: 17.33 - lr: 0.020000\n",
-      "2021-09-08 11:05:41,705 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:41,705 EPOCH 1 done: loss 0.7373 - lr 0.0200000\n",
-      "2021-09-08 11:05:41,705 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:34:20,836 epoch 1 - iter 3/5 - loss 0.56272393 - samples/sec: 8.88 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,887 epoch 1 - iter 4/5 - loss 0.45678656 - samples/sec: 19.90 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,938 epoch 1 - iter 5/5 - loss 0.55526154 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,940 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,940 EPOCH 1 done: loss 0.5553 - lr 0.0200000\n",
+      "2021-09-21 20:34:20,941 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:05:45,805 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:45,898 epoch 2 - iter 1/5 - loss 1.03003144 - samples/sec: 12.78 - lr: 0.020000\n",
-      "2021-09-08 11:05:45,951 epoch 2 - iter 2/5 - loss 0.82643861 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,016 epoch 2 - iter 3/5 - loss 0.66597951 - samples/sec: 15.47 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,069 epoch 2 - iter 4/5 - loss 0.69456165 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,116 epoch 2 - iter 5/5 - loss 0.61341540 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,117 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:46,118 EPOCH 2 done: loss 0.6134 - lr 0.0200000\n",
-      "2021-09-08 11:05:46,118 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:05:46,129 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:46,211 epoch 3 - iter 1/5 - loss 0.46486819 - samples/sec: 14.58 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,261 epoch 3 - iter 2/5 - loss 0.50484332 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,318 epoch 3 - iter 3/5 - loss 0.36969804 - samples/sec: 17.51 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,368 epoch 3 - iter 4/5 - loss 0.41034822 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,415 epoch 3 - iter 5/5 - loss 0.37920408 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:46,417 EPOCH 3 done: loss 0.3792 - lr 0.0200000\n",
-      "2021-09-08 11:05:46,417 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:05:46,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:46,490 epoch 4 - iter 1/5 - loss 0.06984521 - samples/sec: 17.25 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,558 epoch 4 - iter 2/5 - loss 0.78925854 - samples/sec: 14.97 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,607 epoch 4 - iter 3/5 - loss 0.67213996 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,657 epoch 4 - iter 4/5 - loss 1.07583659 - samples/sec: 20.15 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,705 epoch 4 - iter 5/5 - loss 0.92806647 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 11:05:46,706 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:46,706 EPOCH 4 done: loss 0.9281 - lr 0.0200000\n",
-      "2021-09-08 11:05:46,706 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:34:28,200 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:28,279 epoch 2 - iter 1/5 - loss 0.41061243 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 20:34:28,379 epoch 2 - iter 2/5 - loss 0.38262817 - samples/sec: 10.09 - lr: 0.020000\n",
+      "2021-09-21 20:34:28,456 epoch 2 - iter 3/5 - loss 0.78114233 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 20:34:28,607 epoch 2 - iter 4/5 - loss 0.73361897 - samples/sec: 6.65 - lr: 0.020000\n",
+      "2021-09-21 20:34:28,673 epoch 2 - iter 5/5 - loss 0.66424611 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 20:34:28,674 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:28,674 EPOCH 2 done: loss 0.6642 - lr 0.0200000\n",
+      "2021-09-21 20:34:28,674 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:05:51,913 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:51,982 epoch 5 - iter 1/5 - loss 0.49464789 - samples/sec: 18.36 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,045 epoch 5 - iter 2/5 - loss 0.27463908 - samples/sec: 16.03 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,098 epoch 5 - iter 3/5 - loss 0.32081441 - samples/sec: 19.02 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,166 epoch 5 - iter 4/5 - loss 0.29214394 - samples/sec: 14.84 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,214 epoch 5 - iter 5/5 - loss 0.24749657 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,215 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:52,215 EPOCH 5 done: loss 0.2475 - lr 0.0200000\n",
-      "2021-09-08 11:05:52,216 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:05:52,223 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:52,303 epoch 6 - iter 1/5 - loss 0.80875105 - samples/sec: 14.96 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,347 epoch 6 - iter 2/5 - loss 0.41362088 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,397 epoch 6 - iter 3/5 - loss 0.75305405 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,452 epoch 6 - iter 4/5 - loss 0.57864271 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,503 epoch 6 - iter 5/5 - loss 0.57575743 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,504 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:52,504 EPOCH 6 done: loss 0.5758 - lr 0.0200000\n",
-      "2021-09-08 11:05:52,504 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:05:52,520 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:52,583 epoch 7 - iter 1/5 - loss 0.74707013 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,637 epoch 7 - iter 2/5 - loss 0.37734626 - samples/sec: 18.55 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,686 epoch 7 - iter 3/5 - loss 0.45920448 - samples/sec: 20.45 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,754 epoch 7 - iter 4/5 - loss 0.38095358 - samples/sec: 14.98 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,802 epoch 7 - iter 5/5 - loss 0.40465748 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,803 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:52,804 EPOCH 7 done: loss 0.4047 - lr 0.0200000\n",
-      "2021-09-08 11:05:52,804 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:05:52,866 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:52,933 epoch 8 - iter 1/5 - loss 0.00338702 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 11:05:52,981 epoch 8 - iter 2/5 - loss 0.14606933 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:05:53,044 epoch 8 - iter 3/5 - loss 0.11493451 - samples/sec: 15.75 - lr: 0.020000\n",
-      "2021-09-08 11:05:53,094 epoch 8 - iter 4/5 - loss 0.27347396 - samples/sec: 20.34 - lr: 0.020000\n",
-      "2021-09-08 11:05:53,142 epoch 8 - iter 5/5 - loss 0.22341454 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:05:53,143 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:53,143 EPOCH 8 done: loss 0.2234 - lr 0.0200000\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:05:53,144 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:05:53,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:53,203 epoch 9 - iter 1/5 - loss 0.02449685 - samples/sec: 22.98 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,249 epoch 9 - iter 2/5 - loss 0.02919330 - samples/sec: 21.59 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,313 epoch 9 - iter 3/5 - loss 0.03168813 - samples/sec: 15.73 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,368 epoch 9 - iter 4/5 - loss 0.02461841 - samples/sec: 18.38 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,418 epoch 9 - iter 5/5 - loss 0.19051455 - samples/sec: 20.19 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:53,420 EPOCH 9 done: loss 0.1905 - lr 0.0100000\n",
-      "2021-09-08 11:05:53,420 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:05:53,426 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:05:53,485 epoch 10 - iter 1/5 - loss 0.00786083 - samples/sec: 22.83 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,552 epoch 10 - iter 2/5 - loss 0.04383058 - samples/sec: 14.95 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,602 epoch 10 - iter 3/5 - loss 0.09667289 - samples/sec: 20.37 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,656 epoch 10 - iter 4/5 - loss 0.07338653 - samples/sec: 18.59 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,706 epoch 10 - iter 5/5 - loss 0.26023115 - samples/sec: 20.23 - lr: 0.010000\n",
-      "2021-09-08 11:05:53,707 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:34:46,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,286 epoch 3 - iter 1/5 - loss 0.94665283 - samples/sec: 8.02 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,336 epoch 3 - iter 2/5 - loss 0.72211799 - samples/sec: 20.38 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,394 epoch 3 - iter 3/5 - loss 0.75490022 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,443 epoch 3 - iter 4/5 - loss 0.75497521 - samples/sec: 20.62 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,494 epoch 3 - iter 5/5 - loss 0.62640733 - samples/sec: 19.93 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,495 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,496 EPOCH 3 done: loss 0.6264 - lr 0.0200000\n",
+      "2021-09-21 20:34:46,496 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:46,499 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,607 epoch 4 - iter 1/5 - loss 0.20662659 - samples/sec: 10.83 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,808 epoch 4 - iter 2/5 - loss 0.46170688 - samples/sec: 5.00 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,874 epoch 4 - iter 3/5 - loss 0.37670857 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,942 epoch 4 - iter 4/5 - loss 0.42471688 - samples/sec: 14.82 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,010 epoch 4 - iter 5/5 - loss 0.49492908 - samples/sec: 14.94 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,011 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:47,012 EPOCH 4 done: loss 0.4949 - lr 0.0200000\n",
+      "2021-09-21 20:34:47,012 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:34:47,030 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:47,132 epoch 5 - iter 1/5 - loss 0.13611902 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,213 epoch 5 - iter 2/5 - loss 0.36489401 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,324 epoch 5 - iter 3/5 - loss 0.70398162 - samples/sec: 9.11 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,373 epoch 5 - iter 4/5 - loss 0.72348051 - samples/sec: 20.25 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,418 epoch 5 - iter 5/5 - loss 0.58320208 - samples/sec: 22.64 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:47,420 EPOCH 5 done: loss 0.5832 - lr 0.0200000\n",
+      "2021-09-21 20:34:47,420 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:34:47,422 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:47,480 epoch 6 - iter 1/5 - loss 0.00427972 - samples/sec: 22.76 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,536 epoch 6 - iter 2/5 - loss 0.20990587 - samples/sec: 17.96 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,583 epoch 6 - iter 3/5 - loss 0.25649909 - samples/sec: 21.33 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,694 epoch 6 - iter 4/5 - loss 0.34370419 - samples/sec: 9.07 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,743 epoch 6 - iter 5/5 - loss 0.33178954 - samples/sec: 20.82 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,744 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:47,744 EPOCH 6 done: loss 0.3318 - lr 0.0200000\n",
+      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:34:47,745 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:34:47,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:47,812 epoch 7 - iter 1/5 - loss 1.03502476 - samples/sec: 21.04 - lr: 0.010000\n",
+      "2021-09-21 20:34:47,856 epoch 7 - iter 2/5 - loss 0.51904324 - samples/sec: 22.82 - lr: 0.010000\n",
+      "2021-09-21 20:34:47,968 epoch 7 - iter 3/5 - loss 0.59377277 - samples/sec: 9.01 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,013 epoch 7 - iter 4/5 - loss 0.46717293 - samples/sec: 22.58 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,066 epoch 7 - iter 5/5 - loss 0.39168432 - samples/sec: 19.14 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,067 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:48,067 EPOCH 7 done: loss 0.3917 - lr 0.0100000\n",
+      "2021-09-21 20:34:48,067 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:48,144 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:48,214 epoch 8 - iter 1/5 - loss 0.20862477 - samples/sec: 18.02 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,325 epoch 8 - iter 2/5 - loss 0.46413644 - samples/sec: 9.06 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,373 epoch 8 - iter 3/5 - loss 0.48324290 - samples/sec: 21.28 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,420 epoch 8 - iter 4/5 - loss 0.42030519 - samples/sec: 21.24 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,464 epoch 8 - iter 5/5 - loss 0.33692252 - samples/sec: 23.23 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,465 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:48,465 EPOCH 8 done: loss 0.3369 - lr 0.0100000\n",
+      "2021-09-21 20:34:48,465 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:34:48,537 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:48,597 epoch 9 - iter 1/5 - loss 0.10239143 - samples/sec: 21.13 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,650 epoch 9 - iter 2/5 - loss 0.10361590 - samples/sec: 19.20 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,761 epoch 9 - iter 3/5 - loss 0.28570234 - samples/sec: 9.02 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,805 epoch 9 - iter 4/5 - loss 0.21507300 - samples/sec: 23.01 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,853 epoch 9 - iter 5/5 - loss 0.19267902 - samples/sec: 21.28 - lr: 0.010000\n",
+      "2021-09-21 20:34:48,854 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:48,854 EPOCH 9 done: loss 0.1927 - lr 0.0100000\n",
+      "2021-09-21 20:34:48,854 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:34:48,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:48,989 epoch 10 - iter 1/5 - loss 0.00425639 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 20:34:49,042 epoch 10 - iter 2/5 - loss 0.03089584 - samples/sec: 19.24 - lr: 0.010000\n",
+      "2021-09-21 20:34:49,091 epoch 10 - iter 3/5 - loss 0.18582917 - samples/sec: 20.50 - lr: 0.010000\n",
+      "2021-09-21 20:34:49,200 epoch 10 - iter 4/5 - loss 0.23900163 - samples/sec: 9.15 - lr: 0.010000\n",
+      "2021-09-21 20:34:49,245 epoch 10 - iter 5/5 - loss 0.19838532 - samples/sec: 22.57 - lr: 0.010000\n",
+      "2021-09-21 20:34:49,246 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:05:53,707 EPOCH 10 done: loss 0.2602 - lr 0.0100000\n",
-      "2021-09-08 11:05:53,707 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:05:57,702 Test data not provided setting final score to 0\n",
-      "Accuracy Durchschnitt: 0.34283614988978695\n"
+      "2021-09-21 20:34:49,247 EPOCH 10 done: loss 0.1984 - lr 0.0100000\n",
+      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:34:49,247 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:34:53,909 Test data not provided setting final score to 0\n",
+      "Accuracy Durchschnitt: 0.34592211609110946\n"
      ]
     }
    ],
@@ -7258,11 +7288,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a310936c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.28361498897869214, 0.4011756061719324, 0.32916972814107276, 0.3115356355620867, 0.4041146216017634]\n",
+      "0.048548051449926216\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/oneshot/topic_huffpost_oneshot.ipynb b/oneshot/topic_huffpost_oneshot.ipynb
index d7f79d2..0ee8aa8 100644
--- a/oneshot/topic_huffpost_oneshot.ipynb
+++ b/oneshot/topic_huffpost_oneshot.ipynb
@@ -37,7 +37,7 @@
    "source": [
     "# GRAKA auswählen\n",
     "import flair, torch\n",
-    "flair.device = torch.device('cuda:1') "
+    "flair.device = torch.device('cuda:0') "
    ]
   },
   {
@@ -70,38 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:47:44,650 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:19:24,132 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:47:51,433 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:19:32,816 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 16563.73it/s]"
+      "100%|██████████| 9/9 [00:00<00:00, 16754.88it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:47:51,436 [b'Family & Relationships', b'Health', b'Entertainment & Music', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government', b'Computers & Internet']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:47:51,675 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,677 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:19:32,819 [b'Family & Relationships', b'Health', b'Entertainment & Music', b'Computers & Internet', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government', b'Education & Reference']\n",
+      "2021-09-21 20:19:32,822 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:32,823 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -414,171 +401,182 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:47:51,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,677 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:47:51,678 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,678 Parameters:\n",
-      "2021-09-08 11:47:51,678  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:47:51,679  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:47:51,679  - patience: \"3\"\n",
-      "2021-09-08 11:47:51,679  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:47:51,679  - max_epochs: \"10\"\n",
-      "2021-09-08 11:47:51,680  - shuffle: \"True\"\n",
-      "2021-09-08 11:47:51,680  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:47:51,680  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:47:51,681 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,681 Model training base path: \"None\"\n",
-      "2021-09-08 11:47:51,681 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,681 Device: cuda:1\n",
-      "2021-09-08 11:47:51,682 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,682 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:47:51,689 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,767 epoch 1 - iter 1/8 - loss 2.16907382 - samples/sec: 16.98 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,814 epoch 1 - iter 2/8 - loss 1.10267113 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,863 epoch 1 - iter 3/8 - loss 0.75641841 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,919 epoch 1 - iter 4/8 - loss 1.07777996 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,972 epoch 1 - iter 5/8 - loss 0.92554935 - samples/sec: 18.94 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,070 epoch 1 - iter 6/8 - loss 0.87322715 - samples/sec: 10.20 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,121 epoch 1 - iter 7/8 - loss 0.93726090 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,170 epoch 1 - iter 8/8 - loss 0.89805487 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,171 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:52,171 EPOCH 1 done: loss 0.8981 - lr 0.0200000\n",
-      "2021-09-08 11:47:52,227 DEV : loss 0.7570549845695496 - score 0.0\n",
-      "2021-09-08 11:47:52,228 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:47:59,869 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:59,938 epoch 2 - iter 1/8 - loss 0.17430715 - samples/sec: 18.61 - lr: 0.020000\n",
-      "2021-09-08 11:47:59,984 epoch 2 - iter 2/8 - loss 0.10024045 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,036 epoch 2 - iter 3/8 - loss 0.18973431 - samples/sec: 19.64 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,088 epoch 2 - iter 4/8 - loss 0.43209962 - samples/sec: 19.06 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,137 epoch 2 - iter 5/8 - loss 0.40915971 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,185 epoch 2 - iter 6/8 - loss 0.43355339 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,233 epoch 2 - iter 7/8 - loss 0.39371163 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,331 epoch 2 - iter 8/8 - loss 0.55830879 - samples/sec: 10.27 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,332 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:00,332 EPOCH 2 done: loss 0.5583 - lr 0.0200000\n",
-      "2021-09-08 11:48:02,161 DEV : loss 0.06215842440724373 - score 0.0\n",
-      "2021-09-08 11:48:02,162 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:48:07,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:08,031 epoch 3 - iter 1/8 - loss 0.71455932 - samples/sec: 19.68 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,076 epoch 3 - iter 2/8 - loss 0.37632071 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,127 epoch 3 - iter 3/8 - loss 0.48209315 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,181 epoch 3 - iter 4/8 - loss 0.44322653 - samples/sec: 18.42 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,287 epoch 3 - iter 5/8 - loss 0.58500818 - samples/sec: 9.55 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,335 epoch 3 - iter 6/8 - loss 0.49217454 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,381 epoch 3 - iter 7/8 - loss 0.42277247 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,426 epoch 3 - iter 8/8 - loss 0.37070469 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 11:48:08,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:08,427 EPOCH 3 done: loss 0.3707 - lr 0.0200000\n",
-      "2021-09-08 11:48:10,448 DEV : loss 0.12343708425760269 - score 0.0\n",
-      "2021-09-08 11:48:10,449 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:48:10,455 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:10,577 epoch 4 - iter 1/8 - loss 0.42188218 - samples/sec: 9.25 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,628 epoch 4 - iter 2/8 - loss 0.69027628 - samples/sec: 19.95 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,682 epoch 4 - iter 3/8 - loss 0.69689299 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,732 epoch 4 - iter 4/8 - loss 0.58992127 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,777 epoch 4 - iter 5/8 - loss 0.47433110 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,822 epoch 4 - iter 6/8 - loss 0.39650009 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,868 epoch 4 - iter 7/8 - loss 0.34127998 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,913 epoch 4 - iter 8/8 - loss 0.30067890 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,914 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:10,914 EPOCH 4 done: loss 0.3007 - lr 0.0200000\n",
-      "2021-09-08 11:48:10,983 DEV : loss 0.10664058476686478 - score 0.0\n",
-      "2021-09-08 11:48:10,984 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:48:10,987 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:11,050 epoch 5 - iter 1/8 - loss 0.21842377 - samples/sec: 20.70 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,097 epoch 5 - iter 2/8 - loss 0.11086561 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,146 epoch 5 - iter 3/8 - loss 0.07471176 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,199 epoch 5 - iter 4/8 - loss 0.13575507 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,249 epoch 5 - iter 5/8 - loss 0.11170995 - samples/sec: 20.60 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,350 epoch 5 - iter 6/8 - loss 0.17822887 - samples/sec: 9.96 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,399 epoch 5 - iter 7/8 - loss 0.15299484 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,447 epoch 5 - iter 8/8 - loss 0.13404820 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 11:48:11,449 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:11,449 EPOCH 5 done: loss 0.1340 - lr 0.0200000\n",
-      "2021-09-08 11:48:11,884 DEV : loss 0.009758314117789268 - score 0.0\n",
-      "2021-09-08 11:48:11,884 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:48:17,598 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:17,659 epoch 6 - iter 1/8 - loss 0.00194070 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,704 epoch 6 - iter 2/8 - loss 0.00174737 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,754 epoch 6 - iter 3/8 - loss 0.00143837 - samples/sec: 20.20 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,809 epoch 6 - iter 4/8 - loss 0.15458436 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,858 epoch 6 - iter 5/8 - loss 0.12534402 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,908 epoch 6 - iter 6/8 - loss 0.23433206 - samples/sec: 20.14 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,005 epoch 6 - iter 7/8 - loss 0.31694694 - samples/sec: 10.42 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,049 epoch 6 - iter 8/8 - loss 0.27745768 - samples/sec: 22.53 - lr: 0.020000\n"
+      "2021-09-21 20:19:32,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:32,824 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:19:32,824 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:32,825 Parameters:\n",
+      "2021-09-21 20:19:32,825  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:19:32,825  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:19:32,825  - patience: \"3\"\n",
+      "2021-09-21 20:19:32,826  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:19:32,826  - max_epochs: \"10\"\n",
+      "2021-09-21 20:19:32,826  - shuffle: \"True\"\n",
+      "2021-09-21 20:19:32,827  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:19:32,827  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:19:32,827 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:32,827 Model training base path: \"None\"\n",
+      "2021-09-21 20:19:32,828 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:32,828 Device: cuda:0\n",
+      "2021-09-21 20:19:32,828 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:32,829 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:19:32,835 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:32,942 epoch 1 - iter 1/8 - loss 1.78477299 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 20:19:33,010 epoch 1 - iter 2/8 - loss 1.14425519 - samples/sec: 14.78 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:48:18,050 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:18,051 EPOCH 6 done: loss 0.2775 - lr 0.0200000\n",
-      "2021-09-08 11:48:18,128 DEV : loss 0.041015032678842545 - score 0.0\n",
-      "2021-09-08 11:48:18,129 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:48:18,137 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:18,247 epoch 7 - iter 1/8 - loss 0.46194541 - samples/sec: 10.45 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,298 epoch 7 - iter 2/8 - loss 0.39706247 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,347 epoch 7 - iter 3/8 - loss 0.30784037 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,395 epoch 7 - iter 4/8 - loss 0.23116454 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,444 epoch 7 - iter 5/8 - loss 0.18719969 - samples/sec: 20.77 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,493 epoch 7 - iter 6/8 - loss 0.15724694 - samples/sec: 20.70 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,542 epoch 7 - iter 7/8 - loss 0.13486985 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,591 epoch 7 - iter 8/8 - loss 0.11824762 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,593 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:18,593 EPOCH 7 done: loss 0.1182 - lr 0.0200000\n",
-      "2021-09-08 11:48:18,653 DEV : loss 0.17123302817344666 - score 0.0\n",
-      "2021-09-08 11:48:18,654 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:48:18,657 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:18,721 epoch 8 - iter 1/8 - loss 0.00101167 - samples/sec: 20.60 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,775 epoch 8 - iter 2/8 - loss 0.36061621 - samples/sec: 18.80 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,824 epoch 8 - iter 3/8 - loss 0.24116716 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,871 epoch 8 - iter 4/8 - loss 0.18116298 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,917 epoch 8 - iter 5/8 - loss 0.14586267 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:48:19,014 epoch 8 - iter 6/8 - loss 0.14755740 - samples/sec: 10.36 - lr: 0.020000\n",
-      "2021-09-08 11:48:19,064 epoch 8 - iter 7/8 - loss 0.13578078 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 11:48:19,111 epoch 8 - iter 8/8 - loss 0.11898273 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 11:48:19,112 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:19,113 EPOCH 8 done: loss 0.1190 - lr 0.0200000\n",
-      "2021-09-08 11:48:19,296 DEV : loss 0.007861610502004623 - score 0.0\n",
-      "2021-09-08 11:48:19,296 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:19:33,058 epoch 1 - iter 3/8 - loss 1.32782092 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 20:19:33,105 epoch 1 - iter 4/8 - loss 1.00809822 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 20:19:33,165 epoch 1 - iter 5/8 - loss 0.80863237 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 20:19:33,233 epoch 1 - iter 6/8 - loss 1.00293002 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 20:19:33,303 epoch 1 - iter 7/8 - loss 0.92564543 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 20:19:33,368 epoch 1 - iter 8/8 - loss 0.93463913 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:19:33,369 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:33,370 EPOCH 1 done: loss 0.9346 - lr 0.0200000\n",
+      "2021-09-21 20:19:33,640 DEV : loss 0.5161831974983215 - score 0.0\n",
+      "2021-09-21 20:19:33,641 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:48:25,393 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:25,517 epoch 9 - iter 1/8 - loss 0.71844840 - samples/sec: 9.22 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,570 epoch 9 - iter 2/8 - loss 0.49617682 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,615 epoch 9 - iter 3/8 - loss 0.33124475 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,660 epoch 9 - iter 4/8 - loss 0.24872619 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,708 epoch 9 - iter 5/8 - loss 0.19928576 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,755 epoch 9 - iter 6/8 - loss 0.16627207 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,800 epoch 9 - iter 7/8 - loss 0.14349881 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,850 epoch 9 - iter 8/8 - loss 0.13503619 - samples/sec: 20.05 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,851 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:25,851 EPOCH 9 done: loss 0.1350 - lr 0.0200000\n",
-      "2021-09-08 11:48:29,313 DEV : loss 0.004403925500810146 - score 0.0\n",
-      "2021-09-08 11:48:29,314 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:19:36,771 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:36,855 epoch 2 - iter 1/8 - loss 0.02189321 - samples/sec: 20.29 - lr: 0.020000\n",
+      "2021-09-21 20:19:36,921 epoch 2 - iter 2/8 - loss 0.03672730 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 20:19:36,993 epoch 2 - iter 3/8 - loss 0.14635704 - samples/sec: 13.92 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,055 epoch 2 - iter 4/8 - loss 0.24336983 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,109 epoch 2 - iter 5/8 - loss 0.21014479 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,182 epoch 2 - iter 6/8 - loss 0.18782502 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,272 epoch 2 - iter 7/8 - loss 0.16878512 - samples/sec: 11.16 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,348 epoch 2 - iter 8/8 - loss 0.15131214 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,349 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:37,349 EPOCH 2 done: loss 0.1513 - lr 0.0200000\n",
+      "2021-09-21 20:19:37,580 DEV : loss 1.3160991668701172 - score 0.0\n",
+      "2021-09-21 20:19:37,582 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:19:37,584 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:37,661 epoch 3 - iter 1/8 - loss 0.00393513 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,730 epoch 3 - iter 2/8 - loss 0.02414272 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,784 epoch 3 - iter 3/8 - loss 0.40707297 - samples/sec: 18.66 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,870 epoch 3 - iter 4/8 - loss 0.30583600 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,938 epoch 3 - iter 5/8 - loss 0.25330698 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 20:19:37,994 epoch 3 - iter 6/8 - loss 0.21169342 - samples/sec: 18.07 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,056 epoch 3 - iter 7/8 - loss 0.18209781 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,103 epoch 3 - iter 8/8 - loss 0.18257359 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,104 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:38,104 EPOCH 3 done: loss 0.1826 - lr 0.0200000\n",
+      "2021-09-21 20:19:38,271 DEV : loss 0.6335484981536865 - score 0.0\n",
+      "2021-09-21 20:19:38,272 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:19:38,274 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:38,348 epoch 4 - iter 1/8 - loss 0.00093729 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,397 epoch 4 - iter 2/8 - loss 0.07624222 - samples/sec: 20.75 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,461 epoch 4 - iter 3/8 - loss 0.05215959 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,509 epoch 4 - iter 4/8 - loss 0.03960193 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,562 epoch 4 - iter 5/8 - loss 0.03267737 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,612 epoch 4 - iter 6/8 - loss 0.05571129 - samples/sec: 20.10 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,659 epoch 4 - iter 7/8 - loss 0.12430936 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,702 epoch 4 - iter 8/8 - loss 0.10961898 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 20:19:38,703 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:38,704 EPOCH 4 done: loss 0.1096 - lr 0.0200000\n",
+      "2021-09-21 20:19:38,977 DEV : loss 0.3733237683773041 - score 0.0\n",
+      "2021-09-21 20:19:38,978 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:48:39,457 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:39,578 epoch 10 - iter 1/8 - loss 0.00305848 - samples/sec: 9.47 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,625 epoch 10 - iter 2/8 - loss 0.00186806 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,672 epoch 10 - iter 3/8 - loss 0.00274308 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,717 epoch 10 - iter 4/8 - loss 0.00221126 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,769 epoch 10 - iter 5/8 - loss 0.31944126 - samples/sec: 19.52 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,814 epoch 10 - iter 6/8 - loss 0.26643930 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,859 epoch 10 - iter 7/8 - loss 0.22848764 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,906 epoch 10 - iter 8/8 - loss 0.20004929 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,907 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:39,907 EPOCH 10 done: loss 0.2000 - lr 0.0200000\n",
-      "2021-09-08 11:48:42,192 DEV : loss 0.00953616388142109 - score 0.0\n",
-      "2021-09-08 11:48:42,193 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:48:49,299 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:49,300 Testing using best model ...\n",
-      "2021-09-08 11:48:49,302 loading file None/best-model.pt\n",
+      "2021-09-21 20:19:42,717 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:42,804 epoch 5 - iter 1/8 - loss 0.00365363 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 20:19:42,859 epoch 5 - iter 2/8 - loss 0.02459593 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 20:19:42,923 epoch 5 - iter 3/8 - loss 0.01691734 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 20:19:42,971 epoch 5 - iter 4/8 - loss 0.01330948 - samples/sec: 21.42 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,014 epoch 5 - iter 5/8 - loss 0.01090960 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,061 epoch 5 - iter 6/8 - loss 0.04174296 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,104 epoch 5 - iter 7/8 - loss 0.03599517 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,158 epoch 5 - iter 8/8 - loss 0.03193395 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,159 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:43,160 EPOCH 5 done: loss 0.0319 - lr 0.0200000\n",
+      "2021-09-21 20:19:43,325 DEV : loss 0.47309207916259766 - score 0.0\n",
+      "2021-09-21 20:19:43,326 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:19:43,329 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:43,388 epoch 6 - iter 1/8 - loss 0.00669497 - samples/sec: 22.27 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,432 epoch 6 - iter 2/8 - loss 0.00499434 - samples/sec: 23.03 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,486 epoch 6 - iter 3/8 - loss 0.00414054 - samples/sec: 18.52 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,549 epoch 6 - iter 4/8 - loss 0.00360285 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,593 epoch 6 - iter 5/8 - loss 0.00420004 - samples/sec: 23.09 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,640 epoch 6 - iter 6/8 - loss 0.00380314 - samples/sec: 21.41 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,699 epoch 6 - iter 7/8 - loss 0.00331024 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,747 epoch 6 - iter 8/8 - loss 0.00297514 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 20:19:43,748 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:43,749 EPOCH 6 done: loss 0.0030 - lr 0.0200000\n",
+      "2021-09-21 20:19:43,916 DEV : loss 0.5046802759170532 - score 0.0\n",
+      "2021-09-21 20:19:43,917 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:19:43,919 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:43,982 epoch 7 - iter 1/8 - loss 0.00044741 - samples/sec: 20.58 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,059 epoch 7 - iter 2/8 - loss 0.00091432 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,106 epoch 7 - iter 3/8 - loss 0.00119982 - samples/sec: 21.40 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,153 epoch 7 - iter 4/8 - loss 0.00121159 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,200 epoch 7 - iter 5/8 - loss 0.00116797 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,260 epoch 7 - iter 6/8 - loss 0.00215853 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,310 epoch 7 - iter 7/8 - loss 0.00200554 - samples/sec: 20.13 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,373 epoch 7 - iter 8/8 - loss 0.00181872 - samples/sec: 16.06 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,374 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,374 EPOCH 7 done: loss 0.0018 - lr 0.0200000\n",
+      "2021-09-21 20:19:44,592 DEV : loss 0.5108273029327393 - score 0.0\n",
+      "2021-09-21 20:19:44,593 BAD EPOCHS (no improvement): 3\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:19:44,595 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:44,732 epoch 8 - iter 1/8 - loss 0.00326640 - samples/sec: 8.59 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,811 epoch 8 - iter 2/8 - loss 0.00332069 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,854 epoch 8 - iter 3/8 - loss 0.00254874 - samples/sec: 23.04 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,902 epoch 8 - iter 4/8 - loss 0.00234539 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 20:19:44,953 epoch 8 - iter 5/8 - loss 0.00199670 - samples/sec: 19.54 - lr: 0.020000\n",
+      "2021-09-21 20:19:45,003 epoch 8 - iter 6/8 - loss 0.00176070 - samples/sec: 20.43 - lr: 0.020000\n",
+      "2021-09-21 20:19:45,051 epoch 8 - iter 7/8 - loss 0.00161162 - samples/sec: 21.32 - lr: 0.020000\n",
+      "2021-09-21 20:19:45,110 epoch 8 - iter 8/8 - loss 0.00151605 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 20:19:45,111 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:45,112 EPOCH 8 done: loss 0.0015 - lr 0.0200000\n",
+      "2021-09-21 20:19:45,281 DEV : loss 0.5497645139694214 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:19:45,282 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:19:45,284 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:45,348 epoch 9 - iter 1/8 - loss 0.00094513 - samples/sec: 20.37 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,396 epoch 9 - iter 2/8 - loss 0.00113483 - samples/sec: 21.24 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,461 epoch 9 - iter 3/8 - loss 0.00110518 - samples/sec: 15.62 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,506 epoch 9 - iter 4/8 - loss 0.00103912 - samples/sec: 22.43 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,566 epoch 9 - iter 5/8 - loss 0.00114978 - samples/sec: 16.93 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,620 epoch 9 - iter 6/8 - loss 0.00112757 - samples/sec: 18.69 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,667 epoch 9 - iter 7/8 - loss 0.00100571 - samples/sec: 21.24 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,713 epoch 9 - iter 8/8 - loss 0.00112001 - samples/sec: 22.28 - lr: 0.010000\n",
+      "2021-09-21 20:19:45,714 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:45,714 EPOCH 9 done: loss 0.0011 - lr 0.0100000\n",
+      "2021-09-21 20:19:45,975 DEV : loss 0.5546301007270813 - score 0.0\n",
+      "2021-09-21 20:19:45,976 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:19:46,049 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:46,112 epoch 10 - iter 1/8 - loss 0.00061062 - samples/sec: 21.09 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,156 epoch 10 - iter 2/8 - loss 0.00091394 - samples/sec: 22.82 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,210 epoch 10 - iter 3/8 - loss 0.00110148 - samples/sec: 18.73 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,255 epoch 10 - iter 4/8 - loss 0.00121072 - samples/sec: 22.42 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,312 epoch 10 - iter 5/8 - loss 0.00110468 - samples/sec: 17.63 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,374 epoch 10 - iter 6/8 - loss 0.00112178 - samples/sec: 16.23 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,418 epoch 10 - iter 7/8 - loss 0.00121757 - samples/sec: 23.04 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,465 epoch 10 - iter 8/8 - loss 0.00114574 - samples/sec: 21.44 - lr: 0.010000\n",
+      "2021-09-21 20:19:46,466 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:46,466 EPOCH 10 done: loss 0.0011 - lr 0.0100000\n",
+      "2021-09-21 20:19:46,844 DEV : loss 0.5599662661552429 - score 0.0\n",
+      "2021-09-21 20:19:46,845 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:19:57,145 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:19:57,145 Testing using best model ...\n",
+      "2021-09-21 20:19:57,147 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:48:54,699 \t1.0\n",
-      "2021-09-08 11:48:54,700 \n",
+      "2021-09-21 20:20:03,175 \t1.0\n",
+      "2021-09-21 20:20:03,176 \n",
       "Results:\n",
       "- F-score (micro) 1.0\n",
       "- F-score (macro) 0.1111\n",
@@ -590,38 +588,38 @@
       "Family & Relationships     0.0000    0.0000    0.0000         0\n",
       "                Health     0.0000    0.0000    0.0000         0\n",
       " Entertainment & Music     0.0000    0.0000    0.0000         0\n",
-      " Education & Reference     0.0000    0.0000    0.0000         0\n",
+      "  Computers & Internet     0.0000    0.0000    0.0000         0\n",
       "                Sports     0.0000    0.0000    0.0000         0\n",
       "     Society & Culture     0.0000    0.0000    0.0000         0\n",
       "    Business & Finance     0.0000    0.0000    0.0000         0\n",
       " Politics & Government     0.0000    0.0000    0.0000         0\n",
-      "  Computers & Internet     1.0000    1.0000    1.0000         1\n",
+      " Education & Reference     1.0000    1.0000    1.0000         1\n",
       "\n",
       "             micro avg     1.0000    1.0000    1.0000         1\n",
       "             macro avg     0.1111    0.1111    0.1111         1\n",
       "          weighted avg     1.0000    1.0000    1.0000         1\n",
       "           samples avg     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "2021-09-08 11:48:54,700 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,882 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:20:03,176 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:18,775 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:49:11,401 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:20:23,396 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 10390.51it/s]"
+      "100%|██████████| 9/9 [00:00<00:00, 6605.20it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:11,403 [b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government', b'Family & Relationships']\n",
-      "2021-09-08 11:49:11,413 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,415 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:20:23,400 [b'Health', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government', b'Family & Relationships']\n",
+      "2021-09-21 20:20:23,426 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,429 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -934,27 +932,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:11,415 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,416 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:49:11,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,416 Parameters:\n",
-      "2021-09-08 11:49:11,416  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:49:11,417  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:49:11,417  - patience: \"3\"\n",
-      "2021-09-08 11:49:11,417  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:49:11,418  - max_epochs: \"10\"\n",
-      "2021-09-08 11:49:11,418  - shuffle: \"True\"\n",
-      "2021-09-08 11:49:11,418  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:49:11,418  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:49:11,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,419 Model training base path: \"None\"\n",
-      "2021-09-08 11:49:11,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,420 Device: cuda:1\n",
-      "2021-09-08 11:49:11,420 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,420 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:49:11,450 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,508 epoch 1 - iter 1/8 - loss 0.26623678 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,562 epoch 1 - iter 2/8 - loss 0.23499467 - samples/sec: 18.91 - lr: 0.020000\n"
+      "2021-09-21 20:20:23,429 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,430 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:20:23,430 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,431 Parameters:\n",
+      "2021-09-21 20:20:23,431  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:20:23,431  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:20:23,432  - patience: \"3\"\n",
+      "2021-09-21 20:20:23,432  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:20:23,432  - max_epochs: \"10\"\n",
+      "2021-09-21 20:20:23,433  - shuffle: \"True\"\n",
+      "2021-09-21 20:20:23,433  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:20:23,433  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:20:23,434 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,434 Model training base path: \"None\"\n",
+      "2021-09-21 20:20:23,434 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,435 Device: cuda:0\n",
+      "2021-09-21 20:20:23,435 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,436 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:20:23,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,563 epoch 1 - iter 1/8 - loss 0.01920271 - samples/sec: 17.64 - lr: 0.020000\n"
      ]
     },
     {
@@ -968,205 +965,196 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:11,610 epoch 1 - iter 3/8 - loss 0.33620683 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,655 epoch 1 - iter 4/8 - loss 0.26048617 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,699 epoch 1 - iter 5/8 - loss 0.20914380 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,747 epoch 1 - iter 6/8 - loss 0.26509482 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,797 epoch 1 - iter 7/8 - loss 0.47965472 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,946 epoch 1 - iter 8/8 - loss 0.67006195 - samples/sec: 6.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,947 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,947 EPOCH 1 done: loss 0.6701 - lr 0.0200000\n",
-      "2021-09-08 11:49:11,990 DEV : loss 0.13643470406532288 - score 0.0\n",
-      "2021-09-08 11:49:11,990 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:20:23,647 epoch 1 - iter 2/8 - loss 0.11332437 - samples/sec: 11.92 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,717 epoch 1 - iter 3/8 - loss 0.07757569 - samples/sec: 14.50 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,800 epoch 1 - iter 4/8 - loss 0.69098664 - samples/sec: 12.16 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,876 epoch 1 - iter 5/8 - loss 0.58545082 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,955 epoch 1 - iter 6/8 - loss 0.74822376 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 20:20:24,049 epoch 1 - iter 7/8 - loss 0.83927780 - samples/sec: 10.69 - lr: 0.020000\n",
+      "2021-09-21 20:20:24,115 epoch 1 - iter 8/8 - loss 0.89708027 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 20:20:24,116 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:24,117 EPOCH 1 done: loss 0.8971 - lr 0.0200000\n",
+      "2021-09-21 20:20:24,188 DEV : loss 0.1658076047897339 - score 0.0\n",
+      "2021-09-21 20:20:24,189 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:20:36,823 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:36,943 epoch 2 - iter 1/8 - loss 1.01121867 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,995 epoch 2 - iter 2/8 - loss 0.57626782 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,067 epoch 2 - iter 3/8 - loss 0.38797981 - samples/sec: 13.98 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,148 epoch 2 - iter 4/8 - loss 0.31006249 - samples/sec: 12.46 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,267 epoch 2 - iter 5/8 - loss 0.34613940 - samples/sec: 8.38 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,396 epoch 2 - iter 6/8 - loss 0.38159300 - samples/sec: 7.79 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,519 epoch 2 - iter 7/8 - loss 0.41827837 - samples/sec: 8.20 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,609 epoch 2 - iter 8/8 - loss 0.37922493 - samples/sec: 11.14 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,610 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:37,611 EPOCH 2 done: loss 0.3792 - lr 0.0200000\n",
+      "2021-09-21 20:20:37,685 DEV : loss 0.03461692854762077 - score 0.0\n",
+      "2021-09-21 20:20:37,690 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:16,747 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:16,813 epoch 2 - iter 1/8 - loss 0.01748699 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 11:49:16,863 epoch 2 - iter 2/8 - loss 0.01372855 - samples/sec: 19.83 - lr: 0.020000\n",
-      "2021-09-08 11:49:16,912 epoch 2 - iter 3/8 - loss 0.03701132 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 11:49:16,961 epoch 2 - iter 4/8 - loss 0.14488097 - samples/sec: 20.55 - lr: 0.020000\n",
-      "2021-09-08 11:49:17,009 epoch 2 - iter 5/8 - loss 0.14493037 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 11:49:17,057 epoch 2 - iter 6/8 - loss 0.22535083 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 11:49:17,101 epoch 2 - iter 7/8 - loss 0.19615506 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 11:49:17,251 epoch 2 - iter 8/8 - loss 0.22442118 - samples/sec: 6.69 - lr: 0.020000\n",
-      "2021-09-08 11:49:17,252 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:17,253 EPOCH 2 done: loss 0.2244 - lr 0.0200000\n",
-      "2021-09-08 11:49:17,413 DEV : loss 0.0927758514881134 - score 0.0\n",
-      "2021-09-08 11:49:17,414 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:20:53,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:53,216 epoch 3 - iter 1/8 - loss 0.05501922 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,265 epoch 3 - iter 2/8 - loss 0.52412344 - samples/sec: 20.63 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,315 epoch 3 - iter 3/8 - loss 0.35309683 - samples/sec: 19.98 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,377 epoch 3 - iter 4/8 - loss 0.52768933 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,436 epoch 3 - iter 5/8 - loss 0.46215261 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,479 epoch 3 - iter 6/8 - loss 0.38807036 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,523 epoch 3 - iter 7/8 - loss 0.33303164 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,575 epoch 3 - iter 8/8 - loss 0.29802493 - samples/sec: 19.44 - lr: 0.020000\n",
+      "2021-09-21 20:20:53,576 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:53,577 EPOCH 3 done: loss 0.2980 - lr 0.0200000\n",
+      "2021-09-21 20:20:54,536 DEV : loss 0.0144829535856843 - score 0.0\n",
+      "2021-09-21 20:20:54,536 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:23,738 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:23,799 epoch 3 - iter 1/8 - loss 0.02379320 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:49:23,851 epoch 3 - iter 2/8 - loss 0.06076338 - samples/sec: 19.34 - lr: 0.020000\n",
-      "2021-09-08 11:49:24,018 epoch 3 - iter 3/8 - loss 0.34641814 - samples/sec: 6.01 - lr: 0.020000\n",
-      "2021-09-08 11:49:24,070 epoch 3 - iter 4/8 - loss 0.26743395 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 11:49:24,116 epoch 3 - iter 5/8 - loss 0.21421077 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:49:24,162 epoch 3 - iter 6/8 - loss 0.18030548 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 11:49:24,212 epoch 3 - iter 7/8 - loss 0.16736144 - samples/sec: 20.40 - lr: 0.020000\n",
-      "2021-09-08 11:49:24,256 epoch 3 - iter 8/8 - loss 0.14711646 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:24,257 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:24,258 EPOCH 3 done: loss 0.1471 - lr 0.0200000\n",
-      "2021-09-08 11:49:24,420 DEV : loss 0.01121446117758751 - score 0.0\n",
-      "2021-09-08 11:49:24,421 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:21:03,462 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:03,549 epoch 4 - iter 1/8 - loss 0.01063694 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,609 epoch 4 - iter 2/8 - loss 0.00708495 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,685 epoch 4 - iter 3/8 - loss 0.00550958 - samples/sec: 13.25 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,771 epoch 4 - iter 4/8 - loss 0.01122990 - samples/sec: 11.68 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,847 epoch 4 - iter 5/8 - loss 0.03271115 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,927 epoch 4 - iter 6/8 - loss 0.28299834 - samples/sec: 12.63 - lr: 0.020000\n",
+      "2021-09-21 20:21:03,972 epoch 4 - iter 7/8 - loss 0.24282270 - samples/sec: 22.36 - lr: 0.020000\n",
+      "2021-09-21 20:21:04,022 epoch 4 - iter 8/8 - loss 0.21393933 - samples/sec: 20.24 - lr: 0.020000\n",
+      "2021-09-21 20:21:04,023 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:04,023 EPOCH 4 done: loss 0.2139 - lr 0.0200000\n",
+      "2021-09-21 20:21:04,490 DEV : loss 0.00558426883071661 - score 0.0\n",
+      "2021-09-21 20:21:04,491 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:28,837 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:28,899 epoch 4 - iter 1/8 - loss 0.00497925 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:49:28,945 epoch 4 - iter 2/8 - loss 0.00544638 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:49:28,990 epoch 4 - iter 3/8 - loss 0.00423924 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,146 epoch 4 - iter 4/8 - loss 0.05666647 - samples/sec: 6.42 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,193 epoch 4 - iter 5/8 - loss 0.04586637 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,239 epoch 4 - iter 6/8 - loss 0.03972727 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,288 epoch 4 - iter 7/8 - loss 0.04100971 - samples/sec: 20.43 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,335 epoch 4 - iter 8/8 - loss 0.03606402 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,336 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:29,336 EPOCH 4 done: loss 0.0361 - lr 0.0200000\n",
-      "2021-09-08 11:49:29,485 DEV : loss 0.17179735004901886 - score 0.0\n",
-      "2021-09-08 11:49:29,485 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:49:29,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:29,698 epoch 5 - iter 1/8 - loss 0.01047934 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,744 epoch 5 - iter 2/8 - loss 0.00687320 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,791 epoch 5 - iter 3/8 - loss 0.00542134 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,836 epoch 5 - iter 4/8 - loss 0.00448100 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:49:29,881 epoch 5 - iter 5/8 - loss 0.00393984 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,027 epoch 5 - iter 6/8 - loss 0.00655210 - samples/sec: 6.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,072 epoch 5 - iter 7/8 - loss 0.00566154 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,119 epoch 5 - iter 8/8 - loss 0.00502063 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,120 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:30,120 EPOCH 5 done: loss 0.0050 - lr 0.0200000\n",
-      "2021-09-08 11:49:30,267 DEV : loss 0.23926721513271332 - score 0.0\n",
-      "2021-09-08 11:49:30,268 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:49:30,352 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:30,411 epoch 6 - iter 1/8 - loss 0.00119792 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,458 epoch 6 - iter 2/8 - loss 0.00114508 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,503 epoch 6 - iter 3/8 - loss 0.00092592 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,551 epoch 6 - iter 4/8 - loss 0.00187727 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,596 epoch 6 - iter 5/8 - loss 0.00174772 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,640 epoch 6 - iter 6/8 - loss 0.00196745 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,786 epoch 6 - iter 7/8 - loss 0.00439112 - samples/sec: 6.89 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,833 epoch 6 - iter 8/8 - loss 0.00388900 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 11:49:30,834 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:30,835 EPOCH 6 done: loss 0.0039 - lr 0.0200000\n",
-      "2021-09-08 11:49:31,862 DEV : loss 0.23242804408073425 - score 0.0\n",
-      "2021-09-08 11:49:31,863 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:49:31,865 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:31,924 epoch 7 - iter 1/8 - loss 0.00194957 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,072 epoch 7 - iter 2/8 - loss 0.27559866 - samples/sec: 6.78 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,117 epoch 7 - iter 3/8 - loss 0.18407844 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,161 epoch 7 - iter 4/8 - loss 0.13812682 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,206 epoch 7 - iter 5/8 - loss 0.11094965 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,252 epoch 7 - iter 6/8 - loss 0.09256842 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,298 epoch 7 - iter 7/8 - loss 0.08008341 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,345 epoch 7 - iter 8/8 - loss 0.07014149 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,346 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:32,346 EPOCH 7 done: loss 0.0701 - lr 0.0200000\n",
-      "2021-09-08 11:49:32,416 DEV : loss 0.1661098599433899 - score 0.0\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:49:32,417 BAD EPOCHS (no improvement): 4\n"
+      "2021-09-21 20:21:16,615 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:16,693 epoch 5 - iter 1/8 - loss 0.06869321 - samples/sec: 16.15 - lr: 0.020000\n",
+      "2021-09-21 20:21:16,740 epoch 5 - iter 2/8 - loss 0.03571864 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:21:16,787 epoch 5 - iter 3/8 - loss 0.02405420 - samples/sec: 21.20 - lr: 0.020000\n",
+      "2021-09-21 20:21:16,838 epoch 5 - iter 4/8 - loss 0.01880566 - samples/sec: 19.94 - lr: 0.020000\n",
+      "2021-09-21 20:21:16,901 epoch 5 - iter 5/8 - loss 0.02231937 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 20:21:16,953 epoch 5 - iter 6/8 - loss 0.01926787 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 20:21:16,998 epoch 5 - iter 7/8 - loss 0.01679484 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 20:21:17,047 epoch 5 - iter 8/8 - loss 0.01491576 - samples/sec: 20.58 - lr: 0.020000\n",
+      "2021-09-21 20:21:17,048 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:17,048 EPOCH 5 done: loss 0.0149 - lr 0.0200000\n",
+      "2021-09-21 20:21:17,199 DEV : loss 0.000955466995947063 - score 0.0\n",
+      "2021-09-21 20:21:17,200 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:21:23,770 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:23,884 epoch 6 - iter 1/8 - loss 0.01463541 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 20:21:23,937 epoch 6 - iter 2/8 - loss 0.00784674 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 20:21:24,016 epoch 6 - iter 3/8 - loss 0.00676370 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 20:21:24,087 epoch 6 - iter 4/8 - loss 0.01008943 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 20:21:24,148 epoch 6 - iter 5/8 - loss 0.00839592 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 20:21:24,214 epoch 6 - iter 6/8 - loss 0.18289229 - samples/sec: 15.30 - lr: 0.020000\n",
+      "2021-09-21 20:21:24,271 epoch 6 - iter 7/8 - loss 0.15704585 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 20:21:24,325 epoch 6 - iter 8/8 - loss 0.13780671 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 20:21:24,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:24,327 EPOCH 6 done: loss 0.1378 - lr 0.0200000\n",
+      "2021-09-21 20:21:24,371 DEV : loss 0.0005458680097945035 - score 0.0\n",
+      "2021-09-21 20:21:24,374 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:21:28,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:28,651 epoch 7 - iter 1/8 - loss 0.00468829 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 20:21:28,723 epoch 7 - iter 2/8 - loss 0.00287214 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 20:21:28,786 epoch 7 - iter 3/8 - loss 0.01207143 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 20:21:28,863 epoch 7 - iter 4/8 - loss 0.02557968 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 20:21:28,945 epoch 7 - iter 5/8 - loss 0.34077901 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,024 epoch 7 - iter 6/8 - loss 0.28454248 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,098 epoch 7 - iter 7/8 - loss 0.24404821 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,170 epoch 7 - iter 8/8 - loss 0.21386719 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,171 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:29,172 EPOCH 7 done: loss 0.2139 - lr 0.0200000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:32,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:32,477 epoch 8 - iter 1/8 - loss 0.00089569 - samples/sec: 22.96 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,521 epoch 8 - iter 2/8 - loss 0.00102448 - samples/sec: 22.77 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,568 epoch 8 - iter 3/8 - loss 0.00116063 - samples/sec: 21.73 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,713 epoch 8 - iter 4/8 - loss 0.00252185 - samples/sec: 6.89 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,758 epoch 8 - iter 5/8 - loss 0.00211054 - samples/sec: 22.52 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,805 epoch 8 - iter 6/8 - loss 0.00189894 - samples/sec: 21.57 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,850 epoch 8 - iter 7/8 - loss 0.00222835 - samples/sec: 22.50 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,894 epoch 8 - iter 8/8 - loss 0.00198227 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 11:49:32,895 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:32,896 EPOCH 8 done: loss 0.0020 - lr 0.0100000\n",
-      "2021-09-08 11:49:32,946 DEV : loss 0.16629239916801453 - score 0.0\n",
-      "2021-09-08 11:49:32,947 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:49:32,949 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:33,007 epoch 9 - iter 1/8 - loss 0.00707603 - samples/sec: 22.40 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,052 epoch 9 - iter 2/8 - loss 0.00380738 - samples/sec: 22.79 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,096 epoch 9 - iter 3/8 - loss 0.00354187 - samples/sec: 22.82 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,245 epoch 9 - iter 4/8 - loss 0.00405906 - samples/sec: 6.75 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,293 epoch 9 - iter 5/8 - loss 0.00343321 - samples/sec: 20.95 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,342 epoch 9 - iter 6/8 - loss 0.00312281 - samples/sec: 20.82 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,389 epoch 9 - iter 7/8 - loss 0.00272106 - samples/sec: 21.28 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,437 epoch 9 - iter 8/8 - loss 0.00247299 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,438 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:33,438 EPOCH 9 done: loss 0.0025 - lr 0.0100000\n",
-      "2021-09-08 11:49:33,469 DEV : loss 0.1519452929496765 - score 0.0\n",
-      "2021-09-08 11:49:33,470 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:49:33,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:33,636 epoch 10 - iter 1/8 - loss 0.00302278 - samples/sec: 6.72 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,685 epoch 10 - iter 2/8 - loss 0.00182641 - samples/sec: 20.60 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,734 epoch 10 - iter 3/8 - loss 0.00158882 - samples/sec: 20.69 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,780 epoch 10 - iter 4/8 - loss 0.00132869 - samples/sec: 22.29 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,826 epoch 10 - iter 5/8 - loss 0.00229425 - samples/sec: 22.09 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,870 epoch 10 - iter 6/8 - loss 0.00194897 - samples/sec: 22.78 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,919 epoch 10 - iter 7/8 - loss 0.00178196 - samples/sec: 20.77 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,963 epoch 10 - iter 8/8 - loss 0.00185880 - samples/sec: 22.78 - lr: 0.010000\n",
-      "2021-09-08 11:49:33,964 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:33,964 EPOCH 10 done: loss 0.0019 - lr 0.0100000\n",
-      "2021-09-08 11:49:34,095 DEV : loss 0.1351539045572281 - score 0.0\n",
-      "2021-09-08 11:49:34,096 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:49:39,766 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:39,767 Testing using best model ...\n",
-      "2021-09-08 11:49:39,768 loading file None/best-model.pt\n",
+      "2021-09-21 20:21:29,238 DEV : loss 0.0006203249795362353 - score 0.0\n",
+      "2021-09-21 20:21:29,239 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:21:29,242 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:29,389 epoch 8 - iter 1/8 - loss 0.00208826 - samples/sec: 11.09 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,456 epoch 8 - iter 2/8 - loss 0.00244130 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,521 epoch 8 - iter 3/8 - loss 0.00187079 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,590 epoch 8 - iter 4/8 - loss 0.00190929 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,660 epoch 8 - iter 5/8 - loss 0.01334532 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,754 epoch 8 - iter 6/8 - loss 0.01131182 - samples/sec: 10.76 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,831 epoch 8 - iter 7/8 - loss 0.01002868 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,898 epoch 8 - iter 8/8 - loss 0.00894268 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 20:21:29,899 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:29,900 EPOCH 8 done: loss 0.0089 - lr 0.0200000\n",
+      "2021-09-21 20:21:29,945 DEV : loss 0.001274835434742272 - score 0.0\n",
+      "2021-09-21 20:21:29,947 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:21:29,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:30,082 epoch 9 - iter 1/8 - loss 0.00061381 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,164 epoch 9 - iter 2/8 - loss 0.00066984 - samples/sec: 12.34 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,235 epoch 9 - iter 3/8 - loss 0.00077134 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,296 epoch 9 - iter 4/8 - loss 0.00065407 - samples/sec: 16.60 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,363 epoch 9 - iter 5/8 - loss 0.00081390 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,467 epoch 9 - iter 6/8 - loss 0.23576320 - samples/sec: 9.61 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,545 epoch 9 - iter 7/8 - loss 0.20224924 - samples/sec: 12.90 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,641 epoch 9 - iter 8/8 - loss 0.17749393 - samples/sec: 10.49 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,643 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:30,643 EPOCH 9 done: loss 0.1775 - lr 0.0200000\n",
+      "2021-09-21 20:21:30,706 DEV : loss 0.0006818916881456971 - score 0.0\n",
+      "2021-09-21 20:21:30,707 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:21:30,744 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:30,845 epoch 10 - iter 1/8 - loss 0.00180092 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,922 epoch 10 - iter 2/8 - loss 0.00140932 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 20:21:30,991 epoch 10 - iter 3/8 - loss 0.00108137 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 20:21:31,062 epoch 10 - iter 4/8 - loss 0.00098162 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 20:21:31,147 epoch 10 - iter 5/8 - loss 0.00316357 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 20:21:31,220 epoch 10 - iter 6/8 - loss 0.00278218 - samples/sec: 13.76 - lr: 0.020000\n",
+      "2021-09-21 20:21:31,308 epoch 10 - iter 7/8 - loss 0.00344823 - samples/sec: 11.57 - lr: 0.020000\n",
+      "2021-09-21 20:21:31,387 epoch 10 - iter 8/8 - loss 0.00316320 - samples/sec: 12.73 - lr: 0.020000\n",
+      "2021-09-21 20:21:31,388 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:31,389 EPOCH 10 done: loss 0.0032 - lr 0.0200000\n",
+      "2021-09-21 20:21:31,449 DEV : loss 0.00032001949148252606 - score 0.0\n",
+      "2021-09-21 20:21:31,451 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:21:39,534 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:39,535 Testing using best model ...\n",
+      "2021-09-21 20:21:39,537 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:49:44,518 \t1.0\n",
-      "2021-09-08 11:49:44,519 \n",
+      "2021-09-21 20:21:44,654 \t0.0\n",
+      "2021-09-21 20:21:44,655 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1111\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
       "                Health     0.0000    0.0000    0.0000         0\n",
-      " Science & Mathematics     0.0000    0.0000    0.0000         0\n",
       " Entertainment & Music     0.0000    0.0000    0.0000         0\n",
       "  Computers & Internet     0.0000    0.0000    0.0000         0\n",
+      " Education & Reference     0.0000    0.0000    0.0000         0\n",
       "                Sports     0.0000    0.0000    0.0000         0\n",
       "     Society & Culture     0.0000    0.0000    0.0000         0\n",
       "    Business & Finance     0.0000    0.0000    0.0000         0\n",
       " Politics & Government     0.0000    0.0000    0.0000         0\n",
-      "Family & Relationships     1.0000    1.0000    1.0000         1\n",
+      "Family & Relationships     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "             micro avg     1.0000    1.0000    1.0000         1\n",
-      "             macro avg     0.1111    0.1111    0.1111         1\n",
-      "          weighted avg     1.0000    1.0000    1.0000         1\n",
-      "           samples avg     1.0000    1.0000    1.0000         1\n",
+      "             micro avg     0.0000    0.0000    0.0000         1\n",
+      "             macro avg     0.0000    0.0000    0.0000         1\n",
+      "          weighted avg     0.0000    0.0000    0.0000         1\n",
+      "           samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:49:44,519 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:56,759 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:21:44,655 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:03,728 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:50:01,102 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 12212.47it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:50:01,104 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Computers & Internet', b'Education & Reference', b'Society & Culture', b'Business & Finance', b'Politics & Government', b'Entertainment & Music']\n"
+      "2021-09-21 20:22:08,439 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 9/9 [00:00<00:00, 11697.78it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:50:03,026 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,028 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:22:08,442 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Sports', b'Society & Culture', b'Business & Finance', b'Politics & Government', b'Computers & Internet']\n",
+      "2021-09-21 20:22:08,471 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,473 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1479,174 +1467,186 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:50:03,028 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,029 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:50:03,029 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,029 Parameters:\n",
-      "2021-09-08 11:50:03,030  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:50:03,030  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:50:03,030  - patience: \"3\"\n",
-      "2021-09-08 11:50:03,030  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:50:03,031  - max_epochs: \"10\"\n",
-      "2021-09-08 11:50:03,031  - shuffle: \"True\"\n",
-      "2021-09-08 11:50:03,031  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:50:03,032  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:50:03,032 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,032 Model training base path: \"None\"\n",
-      "2021-09-08 11:50:03,032 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,033 Device: cuda:1\n",
-      "2021-09-08 11:50:03,033 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,033 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:50:03,071 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,132 epoch 1 - iter 1/8 - loss 1.44145203 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,192 epoch 1 - iter 2/8 - loss 0.73393452 - samples/sec: 16.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,252 epoch 1 - iter 3/8 - loss 0.50857702 - samples/sec: 16.86 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,306 epoch 1 - iter 4/8 - loss 0.47475012 - samples/sec: 18.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,368 epoch 1 - iter 5/8 - loss 0.69403056 - samples/sec: 16.09 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,466 epoch 1 - iter 6/8 - loss 0.74174152 - samples/sec: 10.32 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,538 epoch 1 - iter 7/8 - loss 0.73949686 - samples/sec: 13.92 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,590 epoch 1 - iter 8/8 - loss 0.87976181 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 11:50:03,591 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:03,592 EPOCH 1 done: loss 0.8798 - lr 0.0200000\n",
-      "2021-09-08 11:50:03,632 DEV : loss 0.15859083831310272 - score 0.0\n",
-      "2021-09-08 11:50:03,633 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:50:12,408 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:12,534 epoch 2 - iter 1/8 - loss 0.33463970 - samples/sec: 9.19 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,591 epoch 2 - iter 2/8 - loss 0.16993271 - samples/sec: 17.66 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,643 epoch 2 - iter 3/8 - loss 0.43967308 - samples/sec: 19.25 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,705 epoch 2 - iter 4/8 - loss 0.63329283 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,778 epoch 2 - iter 5/8 - loss 0.53607559 - samples/sec: 13.68 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,831 epoch 2 - iter 6/8 - loss 0.44757866 - samples/sec: 19.24 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,877 epoch 2 - iter 7/8 - loss 0.38614003 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,924 epoch 2 - iter 8/8 - loss 0.39238109 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:50:12,925 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:12,925 EPOCH 2 done: loss 0.3924 - lr 0.0200000\n",
-      "2021-09-08 11:50:15,055 DEV : loss 0.39001479744911194 - score 0.0\n",
-      "2021-09-08 11:50:15,056 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:50:15,067 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:15,220 epoch 3 - iter 1/8 - loss 1.05089426 - samples/sec: 7.25 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,269 epoch 3 - iter 2/8 - loss 0.56506130 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,346 epoch 3 - iter 3/8 - loss 0.38973607 - samples/sec: 13.06 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,395 epoch 3 - iter 4/8 - loss 0.29337336 - samples/sec: 20.63 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,447 epoch 3 - iter 5/8 - loss 0.26208486 - samples/sec: 19.38 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,551 epoch 3 - iter 6/8 - loss 0.29155504 - samples/sec: 9.63 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,609 epoch 3 - iter 7/8 - loss 0.36979807 - samples/sec: 17.56 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,661 epoch 3 - iter 8/8 - loss 0.32438723 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,662 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:15,663 EPOCH 3 done: loss 0.3244 - lr 0.0200000\n",
-      "2021-09-08 11:50:15,725 DEV : loss 0.2857881188392639 - score 0.0\n",
-      "2021-09-08 11:50:15,726 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:50:15,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:15,791 epoch 4 - iter 1/8 - loss 0.00748132 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,843 epoch 4 - iter 2/8 - loss 0.00553132 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,887 epoch 4 - iter 3/8 - loss 0.00551899 - samples/sec: 22.96 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,984 epoch 4 - iter 4/8 - loss 0.14608850 - samples/sec: 10.37 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,042 epoch 4 - iter 5/8 - loss 0.31091353 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,110 epoch 4 - iter 6/8 - loss 0.25951648 - samples/sec: 14.74 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,163 epoch 4 - iter 7/8 - loss 0.22588668 - samples/sec: 19.19 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,211 epoch 4 - iter 8/8 - loss 0.19837273 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,212 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:16,212 EPOCH 4 done: loss 0.1984 - lr 0.0200000\n",
-      "2021-09-08 11:50:16,282 DEV : loss 0.043082449585199356 - score 0.0\n",
-      "2021-09-08 11:50:16,283 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:50:27,918 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:27,984 epoch 5 - iter 1/8 - loss 0.00849123 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,043 epoch 5 - iter 2/8 - loss 0.00662210 - samples/sec: 16.99 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,087 epoch 5 - iter 3/8 - loss 0.00584339 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,195 epoch 5 - iter 4/8 - loss 0.07023436 - samples/sec: 9.30 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,242 epoch 5 - iter 5/8 - loss 0.05769139 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,296 epoch 5 - iter 6/8 - loss 0.04853987 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,353 epoch 5 - iter 7/8 - loss 0.05694669 - samples/sec: 17.62 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,422 epoch 5 - iter 8/8 - loss 0.04999109 - samples/sec: 14.77 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,423 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:28,423 EPOCH 5 done: loss 0.0500 - lr 0.0200000\n",
-      "2021-09-08 11:50:28,489 DEV : loss 0.1608383059501648 - score 0.0\n",
-      "2021-09-08 11:50:28,490 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:50:28,507 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:28,573 epoch 6 - iter 1/8 - loss 0.00789120 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,641 epoch 6 - iter 2/8 - loss 0.00424915 - samples/sec: 14.89 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,738 epoch 6 - iter 3/8 - loss 0.42650477 - samples/sec: 10.38 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,788 epoch 6 - iter 4/8 - loss 0.33784231 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,845 epoch 6 - iter 5/8 - loss 0.28260645 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,898 epoch 6 - iter 6/8 - loss 0.23561289 - samples/sec: 19.17 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,945 epoch 6 - iter 7/8 - loss 0.20227467 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,989 epoch 6 - iter 8/8 - loss 0.17738452 - samples/sec: 22.82 - lr: 0.020000\n"
+      "2021-09-21 20:22:08,473 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,474 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:22:08,474 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,474 Parameters:\n",
+      "2021-09-21 20:22:08,475  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:22:08,475  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:22:08,475  - patience: \"3\"\n",
+      "2021-09-21 20:22:08,476  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:22:08,476  - max_epochs: \"10\"\n",
+      "2021-09-21 20:22:08,477  - shuffle: \"True\"\n",
+      "2021-09-21 20:22:08,477  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:22:08,477  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:22:08,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,478 Model training base path: \"None\"\n",
+      "2021-09-21 20:22:08,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,479 Device: cuda:0\n",
+      "2021-09-21 20:22:08,479 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,479 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:22:08,486 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,582 epoch 1 - iter 1/8 - loss 0.31350997 - samples/sec: 15.01 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:50:28,990 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:28,990 EPOCH 6 done: loss 0.1774 - lr 0.0200000\n",
-      "2021-09-08 11:50:29,041 DEV : loss 0.18514616787433624 - score 0.0\n",
-      "2021-09-08 11:50:29,041 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:50:29,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:29,116 epoch 7 - iter 1/8 - loss 0.00482680 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,161 epoch 7 - iter 2/8 - loss 0.00377288 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,207 epoch 7 - iter 3/8 - loss 0.00464006 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,263 epoch 7 - iter 4/8 - loss 0.00852643 - samples/sec: 18.15 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,317 epoch 7 - iter 5/8 - loss 0.00728418 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,370 epoch 7 - iter 6/8 - loss 0.00623496 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,465 epoch 7 - iter 7/8 - loss 0.02596022 - samples/sec: 10.49 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,535 epoch 7 - iter 8/8 - loss 0.02288206 - samples/sec: 14.34 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,537 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:29,537 EPOCH 7 done: loss 0.0229 - lr 0.0200000\n",
-      "2021-09-08 11:50:29,572 DEV : loss 0.36212214827537537 - score 0.0\n",
-      "2021-09-08 11:50:29,573 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:50:29,575 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:29,685 epoch 8 - iter 1/8 - loss 0.05200035 - samples/sec: 10.39 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,738 epoch 8 - iter 2/8 - loss 0.02624952 - samples/sec: 19.21 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,788 epoch 8 - iter 3/8 - loss 0.36600315 - samples/sec: 20.21 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,842 epoch 8 - iter 4/8 - loss 0.27494614 - samples/sec: 18.67 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,890 epoch 8 - iter 5/8 - loss 0.22095561 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,937 epoch 8 - iter 6/8 - loss 0.18453490 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 11:50:30,007 epoch 8 - iter 7/8 - loss 0.15853190 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 11:50:30,062 epoch 8 - iter 8/8 - loss 0.13892974 - samples/sec: 18.38 - lr: 0.020000\n",
-      "2021-09-08 11:50:30,064 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:30,064 EPOCH 8 done: loss 0.1389 - lr 0.0200000\n",
-      "2021-09-08 11:50:30,102 DEV : loss 0.009803185239434242 - score 0.0\n",
-      "2021-09-08 11:50:30,103 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:22:08,655 epoch 1 - iter 2/8 - loss 0.33373381 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,800 epoch 1 - iter 3/8 - loss 0.60741155 - samples/sec: 6.96 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,889 epoch 1 - iter 4/8 - loss 0.64705598 - samples/sec: 11.23 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,951 epoch 1 - iter 5/8 - loss 0.55174509 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 20:22:09,043 epoch 1 - iter 6/8 - loss 0.62876001 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 20:22:09,104 epoch 1 - iter 7/8 - loss 0.73708528 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 20:22:09,165 epoch 1 - iter 8/8 - loss 0.79045295 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 20:22:09,166 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:09,166 EPOCH 1 done: loss 0.7905 - lr 0.0200000\n",
+      "2021-09-21 20:22:09,349 DEV : loss 0.5062081813812256 - score 0.0\n",
+      "2021-09-21 20:22:09,350 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:50:36,495 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:36,588 epoch 9 - iter 1/8 - loss 0.00202049 - samples/sec: 12.93 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,638 epoch 9 - iter 2/8 - loss 0.00292715 - samples/sec: 20.23 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,692 epoch 9 - iter 3/8 - loss 0.00234112 - samples/sec: 18.74 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,749 epoch 9 - iter 4/8 - loss 0.01589142 - samples/sec: 17.57 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,842 epoch 9 - iter 5/8 - loss 0.01293883 - samples/sec: 10.85 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,889 epoch 9 - iter 6/8 - loss 0.01099772 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,943 epoch 9 - iter 7/8 - loss 0.00955392 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,987 epoch 9 - iter 8/8 - loss 0.00843711 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:36,988 EPOCH 9 done: loss 0.0084 - lr 0.0200000\n",
-      "2021-09-08 11:50:37,023 DEV : loss 0.0068962909281253815 - score 0.0\n",
-      "2021-09-08 11:50:37,024 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:50:43,509 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:45,433 epoch 10 - iter 1/8 - loss 0.00058113 - samples/sec: 16.33 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,486 epoch 10 - iter 2/8 - loss 0.00172491 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,546 epoch 10 - iter 3/8 - loss 0.00129755 - samples/sec: 16.87 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,653 epoch 10 - iter 4/8 - loss 0.00125453 - samples/sec: 9.38 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,727 epoch 10 - iter 5/8 - loss 0.00114068 - samples/sec: 13.78 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,781 epoch 10 - iter 6/8 - loss 0.00106304 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,828 epoch 10 - iter 7/8 - loss 0.00112842 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,876 epoch 10 - iter 8/8 - loss 0.00111690 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:50:45,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:45,878 EPOCH 10 done: loss 0.0011 - lr 0.0200000\n",
-      "2021-09-08 11:50:45,916 DEV : loss 0.00799597892910242 - score 0.0\n",
-      "2021-09-08 11:50:45,917 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:50:52,780 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:52,781 Testing using best model ...\n",
-      "2021-09-08 11:50:52,782 loading file None/best-model.pt\n",
+      "2021-09-21 20:22:19,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:19,404 epoch 2 - iter 1/8 - loss 0.51137829 - samples/sec: 6.55 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,508 epoch 2 - iter 2/8 - loss 0.43975978 - samples/sec: 9.75 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,590 epoch 2 - iter 3/8 - loss 0.29881678 - samples/sec: 12.26 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,704 epoch 2 - iter 4/8 - loss 0.28092694 - samples/sec: 8.77 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,816 epoch 2 - iter 5/8 - loss 0.24472283 - samples/sec: 8.96 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,052 epoch 2 - iter 6/8 - loss 0.59004548 - samples/sec: 4.26 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,152 epoch 2 - iter 7/8 - loss 0.52937262 - samples/sec: 10.09 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,266 epoch 2 - iter 8/8 - loss 0.47414482 - samples/sec: 8.80 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,267 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,268 EPOCH 2 done: loss 0.4741 - lr 0.0200000\n",
+      "2021-09-21 20:22:20,436 DEV : loss 0.6923801898956299 - score 0.0\n",
+      "2021-09-21 20:22:20,437 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:20,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,589 epoch 3 - iter 1/8 - loss 0.00355190 - samples/sec: 11.94 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,808 epoch 3 - iter 2/8 - loss 0.09395611 - samples/sec: 4.56 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,923 epoch 3 - iter 3/8 - loss 0.06494585 - samples/sec: 8.77 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,047 epoch 3 - iter 4/8 - loss 0.09995119 - samples/sec: 8.10 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,147 epoch 3 - iter 5/8 - loss 0.10789725 - samples/sec: 10.03 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,286 epoch 3 - iter 6/8 - loss 0.12187007 - samples/sec: 7.26 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,402 epoch 3 - iter 7/8 - loss 0.10532199 - samples/sec: 8.63 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,484 epoch 3 - iter 8/8 - loss 0.21778010 - samples/sec: 12.35 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,485 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:21,485 EPOCH 3 done: loss 0.2178 - lr 0.0200000\n",
+      "2021-09-21 20:22:21,655 DEV : loss 0.7204721570014954 - score 0.0\n",
+      "2021-09-21 20:22:21,657 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:22:21,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:21,805 epoch 4 - iter 1/8 - loss 1.07426512 - samples/sec: 8.77 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,885 epoch 4 - iter 2/8 - loss 0.53999139 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,969 epoch 4 - iter 3/8 - loss 0.40017498 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,124 epoch 4 - iter 4/8 - loss 0.30146548 - samples/sec: 6.45 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,225 epoch 4 - iter 5/8 - loss 0.33098930 - samples/sec: 9.95 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,297 epoch 4 - iter 6/8 - loss 0.27698458 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,390 epoch 4 - iter 7/8 - loss 0.24334566 - samples/sec: 10.78 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,459 epoch 4 - iter 8/8 - loss 0.24834765 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,460 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:22,461 EPOCH 4 done: loss 0.2483 - lr 0.0200000\n",
+      "2021-09-21 20:22:22,651 DEV : loss 0.6597853899002075 - score 0.0\n",
+      "2021-09-21 20:22:22,652 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:22:22,730 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:22,842 epoch 5 - iter 1/8 - loss 0.07201500 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,913 epoch 5 - iter 2/8 - loss 0.16978800 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 20:22:22,969 epoch 5 - iter 3/8 - loss 0.11361046 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 20:22:23,070 epoch 5 - iter 4/8 - loss 0.08994923 - samples/sec: 9.96 - lr: 0.020000\n",
+      "2021-09-21 20:22:23,133 epoch 5 - iter 5/8 - loss 0.07478671 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 20:22:23,190 epoch 5 - iter 6/8 - loss 0.06278475 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 20:22:23,251 epoch 5 - iter 7/8 - loss 0.05514676 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 20:22:23,327 epoch 5 - iter 8/8 - loss 0.05808550 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 20:22:23,329 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:23,329 EPOCH 5 done: loss 0.0581 - lr 0.0200000\n",
+      "2021-09-21 20:22:23,514 DEV : loss 0.6882521510124207 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:22:23,515 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:22:23,593 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:23,711 epoch 6 - iter 1/8 - loss 0.00122818 - samples/sec: 9.88 - lr: 0.010000\n",
+      "2021-09-21 20:22:23,767 epoch 6 - iter 2/8 - loss 0.00520053 - samples/sec: 18.01 - lr: 0.010000\n",
+      "2021-09-21 20:22:23,823 epoch 6 - iter 3/8 - loss 0.00369899 - samples/sec: 18.41 - lr: 0.010000\n",
+      "2021-09-21 20:22:23,884 epoch 6 - iter 4/8 - loss 0.23376264 - samples/sec: 16.51 - lr: 0.010000\n",
+      "2021-09-21 20:22:23,947 epoch 6 - iter 5/8 - loss 0.18743300 - samples/sec: 16.13 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,008 epoch 6 - iter 6/8 - loss 0.15676268 - samples/sec: 16.54 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,066 epoch 6 - iter 7/8 - loss 0.13480362 - samples/sec: 17.59 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,137 epoch 6 - iter 8/8 - loss 0.12115834 - samples/sec: 14.08 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:24,139 EPOCH 6 done: loss 0.1212 - lr 0.0100000\n",
+      "2021-09-21 20:22:24,346 DEV : loss 0.748106062412262 - score 0.0\n",
+      "2021-09-21 20:22:24,347 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:24,444 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:24,526 epoch 7 - iter 1/8 - loss 1.20530307 - samples/sec: 15.46 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,585 epoch 7 - iter 2/8 - loss 0.60434184 - samples/sec: 17.41 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,648 epoch 7 - iter 3/8 - loss 0.40366260 - samples/sec: 16.02 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,707 epoch 7 - iter 4/8 - loss 0.30634279 - samples/sec: 17.04 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,787 epoch 7 - iter 5/8 - loss 0.26051578 - samples/sec: 12.75 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,842 epoch 7 - iter 6/8 - loss 0.21956032 - samples/sec: 18.19 - lr: 0.010000\n",
+      "2021-09-21 20:22:24,898 epoch 7 - iter 7/8 - loss 0.18827404 - samples/sec: 18.27 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,000 epoch 7 - iter 8/8 - loss 0.16502683 - samples/sec: 9.90 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,001 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:25,001 EPOCH 7 done: loss 0.1650 - lr 0.0100000\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:22:25,226 DEV : loss 0.672667384147644 - score 0.0\n",
+      "2021-09-21 20:22:25,228 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:22:25,317 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:25,390 epoch 8 - iter 1/8 - loss 0.00316562 - samples/sec: 17.89 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,492 epoch 8 - iter 2/8 - loss 0.00219739 - samples/sec: 9.89 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,567 epoch 8 - iter 3/8 - loss 0.00343202 - samples/sec: 13.52 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,628 epoch 8 - iter 4/8 - loss 0.01379222 - samples/sec: 16.66 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,685 epoch 8 - iter 5/8 - loss 0.01122669 - samples/sec: 17.58 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,741 epoch 8 - iter 6/8 - loss 0.00942330 - samples/sec: 18.40 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,804 epoch 8 - iter 7/8 - loss 0.00842872 - samples/sec: 15.99 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,863 epoch 8 - iter 8/8 - loss 0.00751680 - samples/sec: 16.98 - lr: 0.010000\n",
+      "2021-09-21 20:22:25,865 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:25,866 EPOCH 8 done: loss 0.0075 - lr 0.0100000\n",
+      "2021-09-21 20:22:26,196 DEV : loss 0.7190343141555786 - score 0.0\n",
+      "2021-09-21 20:22:26,197 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:22:26,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:26,347 epoch 9 - iter 1/8 - loss 0.02402127 - samples/sec: 16.30 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,419 epoch 9 - iter 2/8 - loss 0.01394526 - samples/sec: 14.07 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,478 epoch 9 - iter 3/8 - loss 0.00966386 - samples/sec: 17.22 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,533 epoch 9 - iter 4/8 - loss 0.00735577 - samples/sec: 18.45 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,634 epoch 9 - iter 5/8 - loss 0.00622999 - samples/sec: 9.99 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,697 epoch 9 - iter 6/8 - loss 0.00560223 - samples/sec: 15.97 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,753 epoch 9 - iter 7/8 - loss 0.00488597 - samples/sec: 18.17 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,814 epoch 9 - iter 8/8 - loss 0.00468701 - samples/sec: 16.49 - lr: 0.010000\n",
+      "2021-09-21 20:22:26,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:26,816 EPOCH 9 done: loss 0.0047 - lr 0.0100000\n",
+      "2021-09-21 20:22:27,015 DEV : loss 0.7269400358200073 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:22:27,016 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:22:27,106 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:27,183 epoch 10 - iter 1/8 - loss 0.00245819 - samples/sec: 17.01 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,284 epoch 10 - iter 2/8 - loss 0.00151500 - samples/sec: 9.90 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,342 epoch 10 - iter 3/8 - loss 0.00117477 - samples/sec: 17.62 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,405 epoch 10 - iter 4/8 - loss 0.00116445 - samples/sec: 16.03 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,460 epoch 10 - iter 5/8 - loss 0.00101257 - samples/sec: 18.37 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,521 epoch 10 - iter 6/8 - loss 0.05009699 - samples/sec: 16.73 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,595 epoch 10 - iter 7/8 - loss 0.04335921 - samples/sec: 13.61 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,651 epoch 10 - iter 8/8 - loss 0.03817540 - samples/sec: 18.00 - lr: 0.005000\n",
+      "2021-09-21 20:22:27,653 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:27,653 EPOCH 10 done: loss 0.0382 - lr 0.0050000\n",
+      "2021-09-21 20:22:28,631 DEV : loss 0.7349896430969238 - score 0.0\n",
+      "2021-09-21 20:22:28,633 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:34,417 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:34,418 Testing using best model ...\n",
+      "2021-09-21 20:22:34,419 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:50:57,354 \t0.0\n",
-      "2021-09-08 11:50:57,355 \n",
+      "2021-09-21 20:22:40,378 \t1.0\n",
+      "2021-09-21 20:22:40,379 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 1.0\n",
+      "- F-score (macro) 0.1111\n",
+      "- Accuracy 1.0\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
@@ -1654,38 +1654,38 @@
       "Family & Relationships     0.0000    0.0000    0.0000         0\n",
       "                Health     0.0000    0.0000    0.0000         0\n",
       " Science & Mathematics     0.0000    0.0000    0.0000         0\n",
-      "  Computers & Internet     0.0000    0.0000    0.0000         0\n",
-      " Education & Reference     0.0000    0.0000    0.0000         0\n",
+      " Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      "                Sports     0.0000    0.0000    0.0000         0\n",
       "     Society & Culture     0.0000    0.0000    0.0000         0\n",
       "    Business & Finance     0.0000    0.0000    0.0000         0\n",
       " Politics & Government     0.0000    0.0000    0.0000         0\n",
-      " Entertainment & Music     0.0000    0.0000    0.0000         1\n",
+      "  Computers & Internet     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "             micro avg     0.0000    0.0000    0.0000         1\n",
-      "             macro avg     0.0000    0.0000    0.0000         1\n",
-      "          weighted avg     0.0000    0.0000    0.0000         1\n",
-      "           samples avg     0.0000    0.0000    0.0000         1\n",
+      "             micro avg     1.0000    1.0000    1.0000         1\n",
+      "             macro avg     0.1111    0.1111    0.1111         1\n",
+      "          weighted avg     1.0000    1.0000    1.0000         1\n",
+      "           samples avg     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "2021-09-08 11:50:57,355 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:09,565 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:22:40,379 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:02,544 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:51:13,947 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:23:06,703 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 11778.08it/s]"
+      "100%|██████████| 9/9 [00:00<00:00, 11346.18it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:13,951 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Entertainment & Music', b'Education & Reference', b'Sports', b'Business & Finance', b'Politics & Government', b'Computers & Internet']\n",
-      "2021-09-08 11:51:13,985 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,987 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:23:06,706 [b'Family & Relationships', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Politics & Government', b'Business & Finance']\n",
+      "2021-09-21 20:23:06,716 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:06,718 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1998,27 +1998,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:13,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,988 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:51:13,989 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,989 Parameters:\n",
-      "2021-09-08 11:51:13,989  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:51:13,990  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:51:13,990  - patience: \"3\"\n",
-      "2021-09-08 11:51:13,990  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:51:13,991  - max_epochs: \"10\"\n",
-      "2021-09-08 11:51:13,991  - shuffle: \"True\"\n",
-      "2021-09-08 11:51:13,991  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:51:13,991  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:51:13,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,993 Model training base path: \"None\"\n",
-      "2021-09-08 11:51:13,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,994 Device: cuda:1\n",
-      "2021-09-08 11:51:13,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,995 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:51:14,002 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:14,071 epoch 1 - iter 1/8 - loss 1.59124851 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,135 epoch 1 - iter 2/8 - loss 0.94440643 - samples/sec: 15.75 - lr: 0.020000\n"
+      "2021-09-21 20:23:06,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:06,719 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:23:06,719 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:06,719 Parameters:\n",
+      "2021-09-21 20:23:06,720  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:23:06,720  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:23:06,720  - patience: \"3\"\n",
+      "2021-09-21 20:23:06,721  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:23:06,721  - max_epochs: \"10\"\n",
+      "2021-09-21 20:23:06,721  - shuffle: \"True\"\n",
+      "2021-09-21 20:23:06,722  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:23:06,722  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:23:06,722 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:06,723 Model training base path: \"None\"\n",
+      "2021-09-21 20:23:06,723 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:06,724 Device: cuda:0\n",
+      "2021-09-21 20:23:06,724 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:06,724 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:23:06,731 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:06,821 epoch 1 - iter 1/8 - loss 0.94097489 - samples/sec: 17.16 - lr: 0.020000\n"
      ]
     },
     {
@@ -2032,205 +2031,194 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:14,323 epoch 1 - iter 3/8 - loss 0.67256010 - samples/sec: 5.33 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,374 epoch 1 - iter 4/8 - loss 0.93538255 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,428 epoch 1 - iter 5/8 - loss 0.79276511 - samples/sec: 18.72 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,518 epoch 1 - iter 6/8 - loss 0.66317243 - samples/sec: 11.25 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,596 epoch 1 - iter 7/8 - loss 0.58155582 - samples/sec: 12.88 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,646 epoch 1 - iter 8/8 - loss 0.65305793 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,648 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:14,648 EPOCH 1 done: loss 0.6531 - lr 0.0200000\n",
-      "2021-09-08 11:51:14,683 DEV : loss 0.27999722957611084 - score 0.0\n",
-      "2021-09-08 11:51:14,683 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:23:07,000 epoch 1 - iter 2/8 - loss 0.50531927 - samples/sec: 5.59 - lr: 0.020000\n",
+      "2021-09-21 20:23:07,065 epoch 1 - iter 3/8 - loss 1.00439940 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:23:07,163 epoch 1 - iter 4/8 - loss 0.93213911 - samples/sec: 10.27 - lr: 0.020000\n",
+      "2021-09-21 20:23:07,288 epoch 1 - iter 5/8 - loss 0.91332295 - samples/sec: 8.02 - lr: 0.020000\n",
+      "2021-09-21 20:23:07,395 epoch 1 - iter 6/8 - loss 0.77927542 - samples/sec: 9.45 - lr: 0.020000\n",
+      "2021-09-21 20:23:07,490 epoch 1 - iter 7/8 - loss 0.80748820 - samples/sec: 10.62 - lr: 0.020000\n",
+      "2021-09-21 20:23:07,579 epoch 1 - iter 8/8 - loss 0.90404995 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 20:23:07,581 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:07,581 EPOCH 1 done: loss 0.9040 - lr 0.0200000\n",
+      "2021-09-21 20:23:07,886 DEV : loss 0.08725479990243912 - score 0.0\n",
+      "2021-09-21 20:23:07,887 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:23:11,890 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:12,022 epoch 2 - iter 1/8 - loss 0.56235486 - samples/sec: 11.87 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,100 epoch 2 - iter 2/8 - loss 0.38032634 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,249 epoch 2 - iter 3/8 - loss 0.47903773 - samples/sec: 6.73 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,338 epoch 2 - iter 4/8 - loss 0.49476552 - samples/sec: 11.39 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,424 epoch 2 - iter 5/8 - loss 0.40083803 - samples/sec: 11.68 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,492 epoch 2 - iter 6/8 - loss 0.35071115 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,653 epoch 2 - iter 7/8 - loss 0.30547605 - samples/sec: 6.23 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,733 epoch 2 - iter 8/8 - loss 0.38352611 - samples/sec: 12.63 - lr: 0.020000\n",
+      "2021-09-21 20:23:12,735 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:12,735 EPOCH 2 done: loss 0.3835 - lr 0.0200000\n",
+      "2021-09-21 20:23:13,058 DEV : loss 0.05872712656855583 - score 0.0\n",
+      "2021-09-21 20:23:13,060 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:51:25,624 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:25,711 epoch 2 - iter 1/8 - loss 0.49088588 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 11:51:25,773 epoch 2 - iter 2/8 - loss 0.25525520 - samples/sec: 16.31 - lr: 0.020000\n",
-      "2021-09-08 11:51:25,820 epoch 2 - iter 3/8 - loss 0.46916612 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 11:51:25,916 epoch 2 - iter 4/8 - loss 0.35236684 - samples/sec: 10.46 - lr: 0.020000\n",
-      "2021-09-08 11:51:25,963 epoch 2 - iter 5/8 - loss 0.28507042 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:51:26,011 epoch 2 - iter 6/8 - loss 0.27668065 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 11:51:26,083 epoch 2 - iter 7/8 - loss 0.23740224 - samples/sec: 14.06 - lr: 0.020000\n",
-      "2021-09-08 11:51:26,127 epoch 2 - iter 8/8 - loss 0.20909240 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 11:51:26,128 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:26,128 EPOCH 2 done: loss 0.2091 - lr 0.0200000\n",
-      "2021-09-08 11:51:26,835 DEV : loss 0.13633844256401062 - score 0.0\n",
-      "2021-09-08 11:51:26,836 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:23:26,969 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:27,040 epoch 3 - iter 1/8 - loss 0.48298419 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,204 epoch 3 - iter 2/8 - loss 0.40293951 - samples/sec: 6.16 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,297 epoch 3 - iter 3/8 - loss 0.27076705 - samples/sec: 10.80 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,456 epoch 3 - iter 4/8 - loss 0.20457105 - samples/sec: 6.31 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,507 epoch 3 - iter 5/8 - loss 0.16860142 - samples/sec: 19.87 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,590 epoch 3 - iter 6/8 - loss 0.30672146 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,650 epoch 3 - iter 7/8 - loss 0.26367850 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,715 epoch 3 - iter 8/8 - loss 0.31572471 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,716 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:27,716 EPOCH 3 done: loss 0.3157 - lr 0.0200000\n",
+      "2021-09-21 20:23:27,913 DEV : loss 0.32941511273384094 - score 0.0\n",
+      "2021-09-21 20:23:27,914 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:23:27,919 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:27,981 epoch 4 - iter 1/8 - loss 0.00585576 - samples/sec: 21.08 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,039 epoch 4 - iter 2/8 - loss 0.63891635 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,085 epoch 4 - iter 3/8 - loss 0.43437815 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,193 epoch 4 - iter 4/8 - loss 0.32688058 - samples/sec: 9.27 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,243 epoch 4 - iter 5/8 - loss 0.30667000 - samples/sec: 20.36 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,292 epoch 4 - iter 6/8 - loss 0.26289056 - samples/sec: 20.79 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,391 epoch 4 - iter 7/8 - loss 0.52556469 - samples/sec: 10.16 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,449 epoch 4 - iter 8/8 - loss 0.46391684 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 20:23:28,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:28,451 EPOCH 4 done: loss 0.4639 - lr 0.0200000\n",
+      "2021-09-21 20:23:28,708 DEV : loss 0.01160782016813755 - score 0.0\n",
+      "2021-09-21 20:23:28,710 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:51:32,659 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:32,758 epoch 3 - iter 1/8 - loss 0.48683688 - samples/sec: 12.08 - lr: 0.020000\n",
-      "2021-09-08 11:51:32,818 epoch 3 - iter 2/8 - loss 0.24992879 - samples/sec: 16.81 - lr: 0.020000\n",
-      "2021-09-08 11:51:32,881 epoch 3 - iter 3/8 - loss 0.16880789 - samples/sec: 15.91 - lr: 0.020000\n",
-      "2021-09-08 11:51:32,975 epoch 3 - iter 4/8 - loss 0.12707284 - samples/sec: 10.78 - lr: 0.020000\n",
-      "2021-09-08 11:51:33,022 epoch 3 - iter 5/8 - loss 0.11879297 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 11:51:33,070 epoch 3 - iter 6/8 - loss 0.10473569 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 11:51:33,117 epoch 3 - iter 7/8 - loss 0.12323107 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:51:33,165 epoch 3 - iter 8/8 - loss 0.14062957 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 11:51:33,166 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:33,166 EPOCH 3 done: loss 0.1406 - lr 0.0200000\n",
-      "2021-09-08 11:51:35,850 DEV : loss 0.3861728608608246 - score 0.0\n",
-      "2021-09-08 11:51:35,851 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:51:35,971 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:36,065 epoch 4 - iter 1/8 - loss 0.00096426 - samples/sec: 12.61 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,125 epoch 4 - iter 2/8 - loss 0.00180920 - samples/sec: 16.82 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,213 epoch 4 - iter 3/8 - loss 0.00165714 - samples/sec: 11.46 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,257 epoch 4 - iter 4/8 - loss 0.00225124 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,302 epoch 4 - iter 5/8 - loss 0.00366138 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,350 epoch 4 - iter 6/8 - loss 0.00819965 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,407 epoch 4 - iter 7/8 - loss 0.00712738 - samples/sec: 17.59 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,457 epoch 4 - iter 8/8 - loss 0.06047541 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 11:51:36,458 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:36,458 EPOCH 4 done: loss 0.0605 - lr 0.0200000\n",
-      "2021-09-08 11:51:36,703 DEV : loss 0.07808978110551834 - score 0.0\n",
-      "2021-09-08 11:51:36,704 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:23:35,910 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:35,984 epoch 5 - iter 1/8 - loss 0.00408825 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,043 epoch 5 - iter 2/8 - loss 0.91722558 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,141 epoch 5 - iter 3/8 - loss 0.74901849 - samples/sec: 10.35 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,188 epoch 5 - iter 4/8 - loss 0.57072516 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,241 epoch 5 - iter 5/8 - loss 0.46862852 - samples/sec: 19.23 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,350 epoch 5 - iter 6/8 - loss 0.39206871 - samples/sec: 9.19 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,397 epoch 5 - iter 7/8 - loss 0.33630019 - samples/sec: 21.83 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,444 epoch 5 - iter 8/8 - loss 0.29463081 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,445 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:36,446 EPOCH 5 done: loss 0.2946 - lr 0.0200000\n",
+      "2021-09-21 20:23:36,718 DEV : loss 0.13766378164291382 - score 0.0\n",
+      "2021-09-21 20:23:36,720 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:23:36,811 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:36,878 epoch 6 - iter 1/8 - loss 1.10047019 - samples/sec: 19.17 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,926 epoch 6 - iter 2/8 - loss 0.55876802 - samples/sec: 21.30 - lr: 0.020000\n",
+      "2021-09-21 20:23:36,974 epoch 6 - iter 3/8 - loss 0.37388145 - samples/sec: 21.22 - lr: 0.020000\n",
+      "2021-09-21 20:23:37,032 epoch 6 - iter 4/8 - loss 0.28143446 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 20:23:37,129 epoch 6 - iter 5/8 - loss 0.29435440 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 20:23:37,190 epoch 6 - iter 6/8 - loss 0.26261879 - samples/sec: 16.49 - lr: 0.020000\n",
+      "2021-09-21 20:23:37,297 epoch 6 - iter 7/8 - loss 0.22557138 - samples/sec: 9.42 - lr: 0.020000\n",
+      "2021-09-21 20:23:37,344 epoch 6 - iter 8/8 - loss 0.19785898 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:23:37,345 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:37,346 EPOCH 6 done: loss 0.1979 - lr 0.0200000\n",
+      "2021-09-21 20:23:43,500 DEV : loss 0.010183817707002163 - score 0.0\n",
+      "2021-09-21 20:23:43,501 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:51:43,655 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:43,734 epoch 5 - iter 1/8 - loss 0.00091056 - samples/sec: 15.82 - lr: 0.020000\n",
-      "2021-09-08 11:51:43,830 epoch 5 - iter 2/8 - loss 0.00068919 - samples/sec: 10.44 - lr: 0.020000\n",
-      "2021-09-08 11:51:43,880 epoch 5 - iter 3/8 - loss 0.00082860 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 11:51:43,924 epoch 5 - iter 4/8 - loss 0.00255890 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 11:51:43,968 epoch 5 - iter 5/8 - loss 0.00321170 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,039 epoch 5 - iter 6/8 - loss 0.00300217 - samples/sec: 14.16 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,084 epoch 5 - iter 7/8 - loss 0.00318329 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,137 epoch 5 - iter 8/8 - loss 0.00299396 - samples/sec: 18.83 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,138 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:44,138 EPOCH 5 done: loss 0.0030 - lr 0.0200000\n",
-      "2021-09-08 11:51:44,174 DEV : loss 0.11385876685380936 - score 0.0\n",
-      "2021-09-08 11:51:44,175 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:51:44,176 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:44,246 epoch 6 - iter 1/8 - loss 0.00110579 - samples/sec: 18.10 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,332 epoch 6 - iter 2/8 - loss 0.00089262 - samples/sec: 11.73 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,376 epoch 6 - iter 3/8 - loss 0.00117120 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,420 epoch 6 - iter 4/8 - loss 0.00152689 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,474 epoch 6 - iter 5/8 - loss 0.00167517 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,520 epoch 6 - iter 6/8 - loss 0.00158305 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,591 epoch 6 - iter 7/8 - loss 0.00142570 - samples/sec: 14.29 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,635 epoch 6 - iter 8/8 - loss 0.00134485 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,636 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:44,636 EPOCH 6 done: loss 0.0013 - lr 0.0200000\n",
-      "2021-09-08 11:51:44,670 DEV : loss 0.12673360109329224 - score 0.0\n",
-      "2021-09-08 11:51:44,670 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:51:44,672 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:44,730 epoch 7 - iter 1/8 - loss 0.00159366 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,799 epoch 7 - iter 2/8 - loss 0.00115566 - samples/sec: 14.53 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,844 epoch 7 - iter 3/8 - loss 0.00124837 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,916 epoch 7 - iter 4/8 - loss 0.00130152 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 11:51:44,962 epoch 7 - iter 5/8 - loss 0.00129859 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,019 epoch 7 - iter 6/8 - loss 0.00114122 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,107 epoch 7 - iter 7/8 - loss 0.00108300 - samples/sec: 11.54 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,165 epoch 7 - iter 8/8 - loss 0.00109130 - samples/sec: 17.53 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,166 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:45,166 EPOCH 7 done: loss 0.0011 - lr 0.0200000\n",
-      "2021-09-08 11:51:45,200 DEV : loss 0.13629479706287384 - score 0.0\n",
-      "2021-09-08 11:51:45,201 BAD EPOCHS (no improvement): 3\n"
+      "2021-09-21 20:23:48,732 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:48,896 epoch 7 - iter 1/8 - loss 0.00093249 - samples/sec: 9.80 - lr: 0.020000\n",
+      "2021-09-21 20:23:48,982 epoch 7 - iter 2/8 - loss 0.04782082 - samples/sec: 11.72 - lr: 0.020000\n",
+      "2021-09-21 20:23:49,050 epoch 7 - iter 3/8 - loss 0.03261825 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 20:23:49,137 epoch 7 - iter 4/8 - loss 0.02517299 - samples/sec: 11.58 - lr: 0.020000\n",
+      "2021-09-21 20:23:49,216 epoch 7 - iter 5/8 - loss 0.23845888 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 20:23:49,382 epoch 7 - iter 6/8 - loss 0.21297509 - samples/sec: 6.05 - lr: 0.020000\n",
+      "2021-09-21 20:23:49,436 epoch 7 - iter 7/8 - loss 0.18282493 - samples/sec: 18.62 - lr: 0.020000\n",
+      "2021-09-21 20:23:49,621 epoch 7 - iter 8/8 - loss 0.16012892 - samples/sec: 5.41 - lr: 0.020000\n",
+      "2021-09-21 20:23:49,622 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:49,623 EPOCH 7 done: loss 0.1601 - lr 0.0200000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:45,204 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:45,278 epoch 8 - iter 1/8 - loss 0.00135026 - samples/sec: 17.75 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,328 epoch 8 - iter 2/8 - loss 0.00140059 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,377 epoch 8 - iter 3/8 - loss 0.00158478 - samples/sec: 20.97 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,466 epoch 8 - iter 4/8 - loss 0.00134359 - samples/sec: 11.34 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,535 epoch 8 - iter 5/8 - loss 0.00126103 - samples/sec: 14.46 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,591 epoch 8 - iter 6/8 - loss 0.00111207 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,636 epoch 8 - iter 7/8 - loss 0.00115182 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,681 epoch 8 - iter 8/8 - loss 0.00161682 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 11:51:45,682 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:45,682 EPOCH 8 done: loss 0.0016 - lr 0.0200000\n",
-      "2021-09-08 11:51:45,809 DEV : loss 0.15130679309368134 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:51:45,810 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:51:45,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:45,954 epoch 9 - iter 1/8 - loss 0.00042636 - samples/sec: 17.55 - lr: 0.010000\n",
-      "2021-09-08 11:51:45,999 epoch 9 - iter 2/8 - loss 0.00332410 - samples/sec: 22.71 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,083 epoch 9 - iter 3/8 - loss 0.00235485 - samples/sec: 11.90 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,130 epoch 9 - iter 4/8 - loss 0.00208262 - samples/sec: 21.80 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,199 epoch 9 - iter 5/8 - loss 0.00179402 - samples/sec: 14.54 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,246 epoch 9 - iter 6/8 - loss 0.00162472 - samples/sec: 21.45 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,290 epoch 9 - iter 7/8 - loss 0.00148480 - samples/sec: 22.86 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,344 epoch 9 - iter 8/8 - loss 0.00141178 - samples/sec: 18.75 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,345 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:46,345 EPOCH 9 done: loss 0.0014 - lr 0.0100000\n",
-      "2021-09-08 11:51:46,472 DEV : loss 0.16631406545639038 - score 0.0\n",
-      "2021-09-08 11:51:46,473 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:51:46,550 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:46,620 epoch 10 - iter 1/8 - loss 0.00033134 - samples/sec: 17.99 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,665 epoch 10 - iter 2/8 - loss 0.00050038 - samples/sec: 22.78 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,711 epoch 10 - iter 3/8 - loss 0.00063785 - samples/sec: 21.60 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,759 epoch 10 - iter 4/8 - loss 0.05458321 - samples/sec: 21.28 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,845 epoch 10 - iter 5/8 - loss 0.04383080 - samples/sec: 11.67 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,898 epoch 10 - iter 6/8 - loss 0.03668066 - samples/sec: 18.91 - lr: 0.010000\n",
-      "2021-09-08 11:51:46,967 epoch 10 - iter 7/8 - loss 0.03155726 - samples/sec: 14.58 - lr: 0.010000\n",
-      "2021-09-08 11:51:47,011 epoch 10 - iter 8/8 - loss 0.02779736 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 11:51:47,012 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:47,013 EPOCH 10 done: loss 0.0278 - lr 0.0100000\n",
-      "2021-09-08 11:51:47,162 DEV : loss 0.15283611416816711 - score 0.0\n",
-      "2021-09-08 11:51:47,163 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:51:52,569 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:52,570 Testing using best model ...\n",
-      "2021-09-08 11:51:52,571 loading file None/best-model.pt\n",
+      "2021-09-21 20:23:49,992 DEV : loss 0.08414498716592789 - score 0.0\n",
+      "2021-09-21 20:23:49,994 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:23:50,073 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:50,146 epoch 8 - iter 1/8 - loss 0.00099645 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,192 epoch 8 - iter 2/8 - loss 0.00142635 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,241 epoch 8 - iter 3/8 - loss 0.00201633 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,288 epoch 8 - iter 4/8 - loss 0.00184438 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,345 epoch 8 - iter 5/8 - loss 0.00257220 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,438 epoch 8 - iter 6/8 - loss 0.00283652 - samples/sec: 10.76 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,487 epoch 8 - iter 7/8 - loss 0.17185561 - samples/sec: 20.70 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,715 epoch 8 - iter 8/8 - loss 0.15046382 - samples/sec: 4.40 - lr: 0.020000\n",
+      "2021-09-21 20:23:50,716 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:50,717 EPOCH 8 done: loss 0.1505 - lr 0.0200000\n",
+      "2021-09-21 20:23:51,003 DEV : loss 0.02663728967308998 - score 0.0\n",
+      "2021-09-21 20:23:51,004 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:23:51,095 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:51,158 epoch 9 - iter 1/8 - loss 0.00188327 - samples/sec: 20.82 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,266 epoch 9 - iter 2/8 - loss 0.00130618 - samples/sec: 9.26 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,316 epoch 9 - iter 3/8 - loss 0.02423314 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,375 epoch 9 - iter 4/8 - loss 0.33796006 - samples/sec: 17.28 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,423 epoch 9 - iter 5/8 - loss 0.27295539 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,469 epoch 9 - iter 6/8 - loss 0.22801497 - samples/sec: 22.12 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,527 epoch 9 - iter 7/8 - loss 0.19597070 - samples/sec: 17.35 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,627 epoch 9 - iter 8/8 - loss 0.23587276 - samples/sec: 10.09 - lr: 0.020000\n",
+      "2021-09-21 20:23:51,628 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:51,629 EPOCH 9 done: loss 0.2359 - lr 0.0200000\n",
+      "2021-09-21 20:23:51,915 DEV : loss 0.002366668777540326 - score 0.0\n",
+      "2021-09-21 20:23:51,917 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:24:00,246 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:00,328 epoch 10 - iter 1/8 - loss 0.00337684 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,379 epoch 10 - iter 2/8 - loss 0.00541640 - samples/sec: 19.92 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,426 epoch 10 - iter 3/8 - loss 0.01293071 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,477 epoch 10 - iter 4/8 - loss 0.01099549 - samples/sec: 20.17 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,575 epoch 10 - iter 5/8 - loss 0.02136087 - samples/sec: 10.27 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,685 epoch 10 - iter 6/8 - loss 0.01796945 - samples/sec: 9.17 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,733 epoch 10 - iter 7/8 - loss 0.01575452 - samples/sec: 21.28 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,788 epoch 10 - iter 8/8 - loss 0.01449848 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 20:24:00,789 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:00,790 EPOCH 10 done: loss 0.0145 - lr 0.0200000\n",
+      "2021-09-21 20:24:01,446 DEV : loss 0.06194175034761429 - score 0.0\n",
+      "2021-09-21 20:24:01,448 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:24:07,173 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:07,174 Testing using best model ...\n",
+      "2021-09-21 20:24:07,222 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:51:57,217 \t1.0\n",
-      "2021-09-08 11:51:57,218 \n",
+      "2021-09-21 20:24:12,397 \t0.0\n",
+      "2021-09-21 20:24:12,397 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1111\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
       "Family & Relationships     0.0000    0.0000    0.0000         0\n",
-      "                Health     0.0000    0.0000    0.0000         0\n",
       " Science & Mathematics     0.0000    0.0000    0.0000         0\n",
       " Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      "  Computers & Internet     0.0000    0.0000    0.0000         0\n",
       " Education & Reference     0.0000    0.0000    0.0000         0\n",
       "                Sports     0.0000    0.0000    0.0000         0\n",
-      "    Business & Finance     0.0000    0.0000    0.0000         0\n",
+      "     Society & Culture     0.0000    0.0000    0.0000         0\n",
       " Politics & Government     0.0000    0.0000    0.0000         0\n",
-      "  Computers & Internet     1.0000    1.0000    1.0000         1\n",
+      "    Business & Finance     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "             micro avg     1.0000    1.0000    1.0000         1\n",
-      "             macro avg     0.1111    0.1111    0.1111         1\n",
-      "          weighted avg     1.0000    1.0000    1.0000         1\n",
-      "           samples avg     1.0000    1.0000    1.0000         1\n",
+      "             micro avg     0.0000    0.0000    0.0000         1\n",
+      "             macro avg     0.0000    0.0000    0.0000         1\n",
+      "          weighted avg     0.0000    0.0000    0.0000         1\n",
+      "           samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:51:57,218 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:09,656 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:24:12,398 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:33,952 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:52:14,078 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 10609.54it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:52:14,080 [b'Family & Relationships', b'Health', b'Science & Mathematics', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Politics & Government', b'Entertainment & Music']\n"
+      "2021-09-21 20:24:38,135 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 9/9 [00:00<00:00, 14979.66it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:14,312 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:14,314 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:24:38,138 [b'Family & Relationships', b'Science & Mathematics', b'Entertainment & Music', b'Computers & Internet', b'Education & Reference', b'Sports', b'Society & Culture', b'Business & Finance', b'Health']\n",
+      "2021-09-21 20:24:38,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,148 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2543,194 +2531,212 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:14,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:14,315 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:52:14,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:14,315 Parameters:\n",
-      "2021-09-08 11:52:14,316  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:52:14,316  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:52:14,316  - patience: \"3\"\n",
-      "2021-09-08 11:52:14,316  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:52:14,317  - max_epochs: \"10\"\n",
-      "2021-09-08 11:52:14,317  - shuffle: \"True\"\n",
-      "2021-09-08 11:52:14,317  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:52:14,318  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:52:14,318 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:14,318 Model training base path: \"None\"\n",
-      "2021-09-08 11:52:14,318 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:14,319 Device: cuda:1\n",
-      "2021-09-08 11:52:14,319 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:14,319 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:52:14,560 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:14,619 epoch 1 - iter 1/8 - loss 1.60441685 - samples/sec: 23.18 - lr: 0.020000\n",
-      "2021-09-08 11:52:14,780 epoch 1 - iter 2/8 - loss 0.99007601 - samples/sec: 6.20 - lr: 0.020000\n",
-      "2021-09-08 11:52:14,829 epoch 1 - iter 3/8 - loss 1.17610073 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 11:52:14,882 epoch 1 - iter 4/8 - loss 1.21487391 - samples/sec: 19.39 - lr: 0.020000\n",
-      "2021-09-08 11:52:14,931 epoch 1 - iter 5/8 - loss 1.18884323 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 11:52:14,980 epoch 1 - iter 6/8 - loss 1.02444860 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 11:52:15,123 epoch 1 - iter 7/8 - loss 0.94558992 - samples/sec: 7.00 - lr: 0.020000\n",
-      "2021-09-08 11:52:15,174 epoch 1 - iter 8/8 - loss 0.87188377 - samples/sec: 19.67 - lr: 0.020000\n",
-      "2021-09-08 11:52:15,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:15,176 EPOCH 1 done: loss 0.8719 - lr 0.0200000\n",
-      "2021-09-08 11:52:15,492 DEV : loss 0.3764647841453552 - score 0.0\n",
-      "2021-09-08 11:52:15,493 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:24:38,149 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,149 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:24:38,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,150 Parameters:\n",
+      "2021-09-21 20:24:38,150  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:24:38,151  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:24:38,151  - patience: \"3\"\n",
+      "2021-09-21 20:24:38,151  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:24:38,152  - max_epochs: \"10\"\n",
+      "2021-09-21 20:24:38,152  - shuffle: \"True\"\n",
+      "2021-09-21 20:24:38,152  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:24:38,153  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:24:38,153 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,153 Model training base path: \"None\"\n",
+      "2021-09-21 20:24:38,154 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,154 Device: cuda:0\n",
+      "2021-09-21 20:24:38,154 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,155 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:24:38,161 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,271 epoch 1 - iter 1/8 - loss 0.97970676 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 20:24:38,331 epoch 1 - iter 2/8 - loss 0.49658229 - samples/sec: 16.59 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:24:38,403 epoch 1 - iter 3/8 - loss 0.78843389 - samples/sec: 14.09 - lr: 0.020000\n",
+      "2021-09-21 20:24:38,472 epoch 1 - iter 4/8 - loss 0.63783506 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 20:24:38,551 epoch 1 - iter 5/8 - loss 0.83774086 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 20:24:38,624 epoch 1 - iter 6/8 - loss 0.70015139 - samples/sec: 13.83 - lr: 0.020000\n",
+      "2021-09-21 20:24:38,693 epoch 1 - iter 7/8 - loss 0.80741374 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 20:24:38,934 epoch 1 - iter 8/8 - loss 0.97661673 - samples/sec: 4.16 - lr: 0.020000\n",
+      "2021-09-21 20:24:38,935 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:38,935 EPOCH 1 done: loss 0.9766 - lr 0.0200000\n",
+      "2021-09-21 20:24:39,001 DEV : loss 0.20633478462696075 - score 0.0\n",
+      "2021-09-21 20:24:39,002 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:52:20,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:20,635 epoch 2 - iter 1/8 - loss 2.00723362 - samples/sec: 6.33 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,780 epoch 2 - iter 2/8 - loss 1.08144006 - samples/sec: 6.95 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,827 epoch 2 - iter 3/8 - loss 0.74913164 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,872 epoch 2 - iter 4/8 - loss 0.56428118 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,922 epoch 2 - iter 5/8 - loss 0.49489491 - samples/sec: 19.98 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,973 epoch 2 - iter 6/8 - loss 0.45724639 - samples/sec: 19.92 - lr: 0.020000\n",
-      "2021-09-08 11:52:21,020 epoch 2 - iter 7/8 - loss 0.46982247 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:52:21,068 epoch 2 - iter 8/8 - loss 0.45285388 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 11:52:21,068 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:21,069 EPOCH 2 done: loss 0.4529 - lr 0.0200000\n",
-      "2021-09-08 11:52:24,312 DEV : loss 0.6235830783843994 - score 0.0\n",
-      "2021-09-08 11:52:24,313 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:52:24,323 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:24,384 epoch 3 - iter 1/8 - loss 0.01047130 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,428 epoch 3 - iter 2/8 - loss 0.01318990 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,588 epoch 3 - iter 3/8 - loss 0.30027031 - samples/sec: 6.28 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,638 epoch 3 - iter 4/8 - loss 0.23228967 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,779 epoch 3 - iter 5/8 - loss 0.18652362 - samples/sec: 7.10 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,827 epoch 3 - iter 6/8 - loss 0.16852895 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,874 epoch 3 - iter 7/8 - loss 0.14500661 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,918 epoch 3 - iter 8/8 - loss 0.12832217 - samples/sec: 23.12 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,919 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:24,919 EPOCH 3 done: loss 0.1283 - lr 0.0200000\n",
-      "2021-09-08 11:52:24,950 DEV : loss 1.055966854095459 - score 0.0\n",
-      "2021-09-08 11:52:24,951 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:52:24,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:25,121 epoch 4 - iter 1/8 - loss 0.00615876 - samples/sec: 7.16 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,171 epoch 4 - iter 2/8 - loss 0.00908268 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,315 epoch 4 - iter 3/8 - loss 0.36868608 - samples/sec: 7.00 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,363 epoch 4 - iter 4/8 - loss 0.27918855 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,407 epoch 4 - iter 5/8 - loss 0.22362516 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,454 epoch 4 - iter 6/8 - loss 0.19816268 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,498 epoch 4 - iter 7/8 - loss 0.17006903 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,543 epoch 4 - iter 8/8 - loss 0.14997453 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,544 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:25,544 EPOCH 4 done: loss 0.1500 - lr 0.0200000\n",
-      "2021-09-08 11:52:25,616 DEV : loss 0.7629687190055847 - score 0.0\n",
-      "2021-09-08 11:52:25,616 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:52:25,620 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:25,681 epoch 5 - iter 1/8 - loss 0.01601009 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,828 epoch 5 - iter 2/8 - loss 0.07052343 - samples/sec: 6.81 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,876 epoch 5 - iter 3/8 - loss 0.04774775 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:52:26,021 epoch 5 - iter 4/8 - loss 0.03666043 - samples/sec: 6.95 - lr: 0.020000\n",
-      "2021-09-08 11:52:26,070 epoch 5 - iter 5/8 - loss 0.02970617 - samples/sec: 20.52 - lr: 0.020000\n",
-      "2021-09-08 11:52:26,118 epoch 5 - iter 6/8 - loss 0.02483136 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:52:26,165 epoch 5 - iter 7/8 - loss 0.02157747 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:52:26,216 epoch 5 - iter 8/8 - loss 0.01907264 - samples/sec: 20.02 - lr: 0.020000\n",
-      "2021-09-08 11:52:26,217 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:26,218 EPOCH 5 done: loss 0.0191 - lr 0.0200000\n",
-      "2021-09-08 11:52:26,355 DEV : loss 0.9064276218414307 - score 0.0\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:52:26,356 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:52:26,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:26,493 epoch 6 - iter 1/8 - loss 0.05407056 - samples/sec: 19.23 - lr: 0.010000\n",
-      "2021-09-08 11:52:26,637 epoch 6 - iter 2/8 - loss 0.84703325 - samples/sec: 7.00 - lr: 0.010000\n",
-      "2021-09-08 11:52:26,685 epoch 6 - iter 3/8 - loss 0.56606583 - samples/sec: 20.77 - lr: 0.010000\n",
-      "2021-09-08 11:52:26,730 epoch 6 - iter 4/8 - loss 0.42482909 - samples/sec: 22.55 - lr: 0.010000\n",
-      "2021-09-08 11:52:26,776 epoch 6 - iter 5/8 - loss 0.34062760 - samples/sec: 21.95 - lr: 0.010000\n",
-      "2021-09-08 11:52:26,821 epoch 6 - iter 6/8 - loss 0.28395378 - samples/sec: 22.68 - lr: 0.010000\n",
-      "2021-09-08 11:52:26,964 epoch 6 - iter 7/8 - loss 0.24368018 - samples/sec: 7.02 - lr: 0.010000\n"
+      "2021-09-21 20:24:42,992 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:43,118 epoch 2 - iter 1/8 - loss 0.69049197 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,175 epoch 2 - iter 2/8 - loss 0.35709945 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,235 epoch 2 - iter 3/8 - loss 0.25564018 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,304 epoch 2 - iter 4/8 - loss 0.24722560 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,379 epoch 2 - iter 5/8 - loss 0.21302402 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,454 epoch 2 - iter 6/8 - loss 0.28497802 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,536 epoch 2 - iter 7/8 - loss 0.25135505 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,749 epoch 2 - iter 8/8 - loss 0.23152900 - samples/sec: 4.69 - lr: 0.020000\n",
+      "2021-09-21 20:24:43,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:43,751 EPOCH 2 done: loss 0.2315 - lr 0.0200000\n",
+      "2021-09-21 20:24:43,813 DEV : loss 0.12476576864719391 - score 0.0\n",
+      "2021-09-21 20:24:43,814 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:24:48,073 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:48,149 epoch 3 - iter 1/8 - loss 0.00876945 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,211 epoch 3 - iter 2/8 - loss 0.13694654 - samples/sec: 16.34 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,266 epoch 3 - iter 3/8 - loss 0.09392192 - samples/sec: 18.27 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,328 epoch 3 - iter 4/8 - loss 0.14756966 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,383 epoch 3 - iter 5/8 - loss 0.12179833 - samples/sec: 18.33 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,439 epoch 3 - iter 6/8 - loss 0.10273538 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,591 epoch 3 - iter 7/8 - loss 0.23629206 - samples/sec: 6.59 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,649 epoch 3 - iter 8/8 - loss 0.20727805 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 20:24:48,650 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:48,650 EPOCH 3 done: loss 0.2073 - lr 0.0200000\n",
+      "2021-09-21 20:24:48,791 DEV : loss 0.0989973172545433 - score 0.0\n",
+      "2021-09-21 20:24:48,792 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:24:55,627 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,774 epoch 4 - iter 1/8 - loss 0.00100018 - samples/sec: 9.36 - lr: 0.020000\n",
+      "2021-09-21 20:24:55,898 epoch 4 - iter 2/8 - loss 0.22565062 - samples/sec: 8.14 - lr: 0.020000\n",
+      "2021-09-21 20:24:55,991 epoch 4 - iter 3/8 - loss 0.15105711 - samples/sec: 10.84 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,280 epoch 4 - iter 4/8 - loss 0.11667200 - samples/sec: 3.46 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,344 epoch 4 - iter 5/8 - loss 0.10265306 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,404 epoch 4 - iter 6/8 - loss 0.08626540 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,474 epoch 4 - iter 7/8 - loss 0.07421398 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,546 epoch 4 - iter 8/8 - loss 0.07651518 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:56,547 EPOCH 4 done: loss 0.0765 - lr 0.0200000\n",
+      "2021-09-21 20:24:56,593 DEV : loss 0.0016179956728592515 - score 0.0\n",
+      "2021-09-21 20:24:56,594 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:25:03,161 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:03,237 epoch 5 - iter 1/8 - loss 0.00471560 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,293 epoch 5 - iter 2/8 - loss 0.00288836 - samples/sec: 18.38 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,348 epoch 5 - iter 3/8 - loss 0.00686488 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,408 epoch 5 - iter 4/8 - loss 0.02393489 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,552 epoch 5 - iter 5/8 - loss 0.01959277 - samples/sec: 6.95 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,610 epoch 5 - iter 6/8 - loss 0.01932330 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,668 epoch 5 - iter 7/8 - loss 0.02103410 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,724 epoch 5 - iter 8/8 - loss 0.01847138 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 20:25:03,725 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:03,726 EPOCH 5 done: loss 0.0185 - lr 0.0200000\n",
+      "2021-09-21 20:25:03,904 DEV : loss 0.0011032891925424337 - score 0.0\n",
+      "2021-09-21 20:25:03,905 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:25:17,813 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:17,921 epoch 6 - iter 1/8 - loss 0.02302460 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 20:25:17,984 epoch 6 - iter 2/8 - loss 0.01186535 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,077 epoch 6 - iter 3/8 - loss 0.00872397 - samples/sec: 10.78 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,146 epoch 6 - iter 4/8 - loss 0.00694961 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,219 epoch 6 - iter 5/8 - loss 0.00568805 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,287 epoch 6 - iter 6/8 - loss 0.00480496 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,341 epoch 6 - iter 7/8 - loss 0.00543027 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,591 epoch 6 - iter 8/8 - loss 0.00505415 - samples/sec: 4.02 - lr: 0.020000\n",
+      "2021-09-21 20:25:18,592 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:18,593 EPOCH 6 done: loss 0.0051 - lr 0.0200000\n",
+      "2021-09-21 20:25:18,693 DEV : loss 0.00042923266300931573 - score 0.0\n",
+      "2021-09-21 20:25:18,694 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:25:24,076 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:24,156 epoch 7 - iter 1/8 - loss 0.00098631 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,221 epoch 7 - iter 2/8 - loss 0.00068552 - samples/sec: 15.58 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,392 epoch 7 - iter 3/8 - loss 0.00109470 - samples/sec: 5.87 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,464 epoch 7 - iter 4/8 - loss 0.43693355 - samples/sec: 14.05 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,522 epoch 7 - iter 5/8 - loss 0.35115172 - samples/sec: 17.59 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,583 epoch 7 - iter 6/8 - loss 0.29273844 - samples/sec: 16.37 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,646 epoch 7 - iter 7/8 - loss 0.25118477 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,709 epoch 7 - iter 8/8 - loss 0.21984861 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 20:25:24,711 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:24,711 EPOCH 7 done: loss 0.2198 - lr 0.0200000\n",
+      "2021-09-21 20:25:24,849 DEV : loss 0.0012852274812757969 - score 0.0\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:27,012 epoch 6 - iter 8/8 - loss 0.22172171 - samples/sec: 21.07 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,013 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:27,013 EPOCH 6 done: loss 0.2217 - lr 0.0100000\n",
-      "2021-09-08 11:52:27,143 DEV : loss 0.5234455466270447 - score 0.0\n",
-      "2021-09-08 11:52:27,143 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:52:27,222 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:27,284 epoch 7 - iter 1/8 - loss 0.00637871 - samples/sec: 21.28 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,328 epoch 7 - iter 2/8 - loss 0.00505804 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,469 epoch 7 - iter 3/8 - loss 0.00711077 - samples/sec: 7.12 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,614 epoch 7 - iter 4/8 - loss 0.07912122 - samples/sec: 6.92 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,662 epoch 7 - iter 5/8 - loss 0.06350508 - samples/sec: 21.15 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,707 epoch 7 - iter 6/8 - loss 0.05403989 - samples/sec: 22.29 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,752 epoch 7 - iter 7/8 - loss 0.04643754 - samples/sec: 22.56 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,796 epoch 7 - iter 8/8 - loss 0.04082303 - samples/sec: 22.74 - lr: 0.010000\n",
-      "2021-09-08 11:52:27,797 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:27,798 EPOCH 7 done: loss 0.0408 - lr 0.0100000\n",
-      "2021-09-08 11:52:27,929 DEV : loss 0.49857601523399353 - score 0.0\n",
-      "2021-09-08 11:52:27,930 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:52:28,016 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:28,077 epoch 8 - iter 1/8 - loss 0.00354888 - samples/sec: 21.16 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,122 epoch 8 - iter 2/8 - loss 0.00431435 - samples/sec: 22.57 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,167 epoch 8 - iter 3/8 - loss 0.00315680 - samples/sec: 22.61 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,211 epoch 8 - iter 4/8 - loss 0.00251091 - samples/sec: 22.89 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,255 epoch 8 - iter 5/8 - loss 0.00236591 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,306 epoch 8 - iter 6/8 - loss 0.16623503 - samples/sec: 19.83 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,446 epoch 8 - iter 7/8 - loss 0.14271164 - samples/sec: 7.16 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,587 epoch 8 - iter 8/8 - loss 0.12511299 - samples/sec: 7.10 - lr: 0.010000\n",
-      "2021-09-08 11:52:28,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:28,589 EPOCH 8 done: loss 0.1251 - lr 0.0100000\n",
-      "2021-09-08 11:52:28,724 DEV : loss 0.4013904631137848 - score 0.0\n",
-      "2021-09-08 11:52:28,724 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:52:28,810 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:28,869 epoch 9 - iter 1/8 - loss 0.00075404 - samples/sec: 22.53 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,009 epoch 9 - iter 2/8 - loss 0.00139304 - samples/sec: 7.17 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,053 epoch 9 - iter 3/8 - loss 0.00135541 - samples/sec: 22.96 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,097 epoch 9 - iter 4/8 - loss 0.00133578 - samples/sec: 22.74 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,144 epoch 9 - iter 5/8 - loss 0.00175051 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,287 epoch 9 - iter 6/8 - loss 0.07598041 - samples/sec: 7.04 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,334 epoch 9 - iter 7/8 - loss 0.06535844 - samples/sec: 21.33 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,378 epoch 9 - iter 8/8 - loss 0.05738535 - samples/sec: 22.80 - lr: 0.010000\n",
-      "2021-09-08 11:52:29,379 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:29,380 EPOCH 9 done: loss 0.0574 - lr 0.0100000\n",
-      "2021-09-08 11:52:29,778 DEV : loss 0.391232967376709 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:52:29,779 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:52:29,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:29,888 epoch 10 - iter 1/8 - loss 0.00099037 - samples/sec: 22.79 - lr: 0.005000\n",
-      "2021-09-08 11:52:29,932 epoch 10 - iter 2/8 - loss 0.00083726 - samples/sec: 22.88 - lr: 0.005000\n",
-      "2021-09-08 11:52:29,976 epoch 10 - iter 3/8 - loss 0.00083772 - samples/sec: 22.89 - lr: 0.005000\n",
-      "2021-09-08 11:52:30,119 epoch 10 - iter 4/8 - loss 0.00754078 - samples/sec: 7.02 - lr: 0.005000\n",
-      "2021-09-08 11:52:30,163 epoch 10 - iter 5/8 - loss 0.00634910 - samples/sec: 22.79 - lr: 0.005000\n",
-      "2021-09-08 11:52:30,306 epoch 10 - iter 6/8 - loss 0.00552372 - samples/sec: 7.05 - lr: 0.005000\n",
-      "2021-09-08 11:52:30,353 epoch 10 - iter 7/8 - loss 0.00529939 - samples/sec: 21.16 - lr: 0.005000\n",
-      "2021-09-08 11:52:30,401 epoch 10 - iter 8/8 - loss 0.00478523 - samples/sec: 21.29 - lr: 0.005000\n",
-      "2021-09-08 11:52:30,402 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:30,402 EPOCH 10 done: loss 0.0048 - lr 0.0050000\n",
-      "2021-09-08 11:52:30,433 DEV : loss 0.40112847089767456 - score 0.0\n",
-      "2021-09-08 11:52:30,433 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:52:42,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:42,417 Testing using best model ...\n",
-      "2021-09-08 11:52:42,419 loading file None/best-model.pt\n",
+      "2021-09-21 20:25:24,850 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:25:25,677 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:25,761 epoch 8 - iter 1/8 - loss 0.00042378 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 20:25:25,938 epoch 8 - iter 2/8 - loss 0.00083185 - samples/sec: 5.69 - lr: 0.020000\n",
+      "2021-09-21 20:25:25,995 epoch 8 - iter 3/8 - loss 0.00277061 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 20:25:26,057 epoch 8 - iter 4/8 - loss 0.00612685 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 20:25:26,112 epoch 8 - iter 5/8 - loss 0.00500346 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 20:25:26,167 epoch 8 - iter 6/8 - loss 0.00440488 - samples/sec: 18.40 - lr: 0.020000\n",
+      "2021-09-21 20:25:26,229 epoch 8 - iter 7/8 - loss 0.00404356 - samples/sec: 16.41 - lr: 0.020000\n",
+      "2021-09-21 20:25:26,294 epoch 8 - iter 8/8 - loss 0.00366411 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 20:25:26,296 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:26,296 EPOCH 8 done: loss 0.0037 - lr 0.0200000\n",
+      "2021-09-21 20:25:30,647 DEV : loss 0.00043851861846633255 - score 0.0\n",
+      "2021-09-21 20:25:30,650 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:25:30,676 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:30,781 epoch 9 - iter 1/8 - loss 0.00137216 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 20:25:30,856 epoch 9 - iter 2/8 - loss 0.00101091 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 20:25:30,931 epoch 9 - iter 3/8 - loss 0.00098531 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 20:25:31,018 epoch 9 - iter 4/8 - loss 0.14876147 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 20:25:31,097 epoch 9 - iter 5/8 - loss 0.11914728 - samples/sec: 12.72 - lr: 0.020000\n",
+      "2021-09-21 20:25:31,163 epoch 9 - iter 6/8 - loss 0.10204564 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 20:25:31,214 epoch 9 - iter 7/8 - loss 0.08754959 - samples/sec: 19.69 - lr: 0.020000\n",
+      "2021-09-21 20:25:31,433 epoch 9 - iter 8/8 - loss 0.07674157 - samples/sec: 4.58 - lr: 0.020000\n",
+      "2021-09-21 20:25:31,435 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:31,435 EPOCH 9 done: loss 0.0767 - lr 0.0200000\n",
+      "2021-09-21 20:25:31,511 DEV : loss 0.000181188530405052 - score 0.0\n",
+      "2021-09-21 20:25:31,513 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:25:35,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:35,808 epoch 10 - iter 1/8 - loss 0.01114247 - samples/sec: 13.74 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,001 epoch 10 - iter 2/8 - loss 0.00606422 - samples/sec: 5.19 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,054 epoch 10 - iter 3/8 - loss 0.00416748 - samples/sec: 19.32 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,121 epoch 10 - iter 4/8 - loss 0.00454320 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,180 epoch 10 - iter 5/8 - loss 0.00368552 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,235 epoch 10 - iter 6/8 - loss 0.00318918 - samples/sec: 18.41 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,313 epoch 10 - iter 7/8 - loss 0.00537916 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,361 epoch 10 - iter 8/8 - loss 0.00488342 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 20:25:36,362 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:36,363 EPOCH 10 done: loss 0.0049 - lr 0.0200000\n",
+      "2021-09-21 20:25:36,411 DEV : loss 0.00013760807632934302 - score 0.0\n",
+      "2021-09-21 20:25:36,413 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:25:48,000 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:48,002 Testing using best model ...\n",
+      "2021-09-21 20:25:48,025 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:52:47,595 \t1.0\n",
-      "2021-09-08 11:52:47,596 \n",
+      "2021-09-21 20:25:57,463 \t0.0\n",
+      "2021-09-21 20:25:57,464 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1111\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                        precision    recall  f1-score   support\n",
       "\n",
       "Family & Relationships     0.0000    0.0000    0.0000         0\n",
-      "                Health     0.0000    0.0000    0.0000         0\n",
       " Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      " Entertainment & Music     0.0000    0.0000    0.0000         0\n",
       "  Computers & Internet     0.0000    0.0000    0.0000         0\n",
       " Education & Reference     0.0000    0.0000    0.0000         0\n",
       "                Sports     0.0000    0.0000    0.0000         0\n",
       "     Society & Culture     0.0000    0.0000    0.0000         0\n",
-      " Politics & Government     0.0000    0.0000    0.0000         0\n",
-      " Entertainment & Music     1.0000    1.0000    1.0000         1\n",
+      "    Business & Finance     0.0000    0.0000    0.0000         0\n",
+      "                Health     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "             micro avg     1.0000    1.0000    1.0000         1\n",
-      "             macro avg     0.1111    0.1111    0.1111         1\n",
-      "          weighted avg     1.0000    1.0000    1.0000         1\n",
-      "           samples avg     1.0000    1.0000    1.0000         1\n",
+      "             micro avg     0.0000    0.0000    0.0000         1\n",
+      "             macro avg     0.0000    0.0000    0.0000         1\n",
+      "          weighted avg     0.0000    0.0000    0.0000         1\n",
+      "           samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:52:47,597 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.5374149659863945\n"
+      "2021-09-21 20:25:57,464 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.5149659863945578\n"
      ]
     }
    ],
@@ -2808,11 +2814,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "0c4025f0",
+   "execution_count": 5,
+   "id": "4bafdccb",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.5544217687074829, 0.47959183673469385, 0.5306122448979592, 0.5136054421768708, 0.4965986394557823]\n",
+      "0.026046516624594073\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -2824,7 +2842,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -2832,25 +2850,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:00,105 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:26:16,897 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:53:04,075 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:26:21,049 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 17806.01it/s]"
+      "100%|██████████| 9/9 [00:00<00:00, 14048.66it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:04,077 [b'This text is about Family & Relationships', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government', b'This text is about Sports']\n",
-      "2021-09-08 11:53:04,090 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,091 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:26:21,052 [b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Politics & Government', b'This text is about Business & Finance']\n",
+      "2021-09-21 20:26:21,203 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,205 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3163,28 +3181,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:04,092 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,093 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:53:04,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,093 Parameters:\n",
-      "2021-09-08 11:53:04,094  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:53:04,094  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:53:04,095  - patience: \"3\"\n",
-      "2021-09-08 11:53:04,095  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:53:04,095  - max_epochs: \"10\"\n",
-      "2021-09-08 11:53:04,096  - shuffle: \"True\"\n",
-      "2021-09-08 11:53:04,097  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:53:04,097  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:53:04,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,098 Model training base path: \"None\"\n",
-      "2021-09-08 11:53:04,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,099 Device: cuda:1\n",
-      "2021-09-08 11:53:04,099 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,100 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:53:04,105 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,173 epoch 1 - iter 1/8 - loss 0.53290629 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 11:53:04,220 epoch 1 - iter 2/8 - loss 1.26376045 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:53:04,268 epoch 1 - iter 3/8 - loss 0.97841304 - samples/sec: 21.30 - lr: 0.020000\n"
+      "2021-09-21 20:26:21,206 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,206 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:26:21,207 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,207 Parameters:\n",
+      "2021-09-21 20:26:21,207  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:26:21,208  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:26:21,208  - patience: \"3\"\n",
+      "2021-09-21 20:26:21,208  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:26:21,209  - max_epochs: \"10\"\n",
+      "2021-09-21 20:26:21,209  - shuffle: \"True\"\n",
+      "2021-09-21 20:26:21,209  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:26:21,210  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:26:21,210 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,210 Model training base path: \"None\"\n",
+      "2021-09-21 20:26:21,211 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,211 Device: cuda:0\n",
+      "2021-09-21 20:26:21,211 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,212 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -3198,191 +3212,195 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:04,313 epoch 1 - iter 4/8 - loss 0.74219897 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 11:53:04,369 epoch 1 - iter 5/8 - loss 0.60563103 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 11:53:04,439 epoch 1 - iter 6/8 - loss 0.69742722 - samples/sec: 14.41 - lr: 0.020000\n",
-      "2021-09-08 11:53:04,487 epoch 1 - iter 7/8 - loss 0.74291887 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 11:53:04,537 epoch 1 - iter 8/8 - loss 0.67569124 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 11:53:04,538 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:04,539 EPOCH 1 done: loss 0.6757 - lr 0.0200000\n",
-      "2021-09-08 11:53:04,581 DEV : loss 0.1526939868927002 - score 0.0\n",
-      "2021-09-08 11:53:04,582 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:26:21,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,498 epoch 1 - iter 1/8 - loss 0.01371999 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,578 epoch 1 - iter 2/8 - loss 0.30671892 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,634 epoch 1 - iter 3/8 - loss 0.23285307 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,685 epoch 1 - iter 4/8 - loss 0.20510458 - samples/sec: 20.09 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,736 epoch 1 - iter 5/8 - loss 0.21662991 - samples/sec: 19.87 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,798 epoch 1 - iter 6/8 - loss 0.18488241 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,861 epoch 1 - iter 7/8 - loss 0.19488806 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,911 epoch 1 - iter 8/8 - loss 0.44460914 - samples/sec: 20.49 - lr: 0.020000\n",
+      "2021-09-21 20:26:21,912 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:21,913 EPOCH 1 done: loss 0.4446 - lr 0.0200000\n",
+      "2021-09-21 20:26:22,070 DEV : loss 0.5967884659767151 - score 0.0\n",
+      "2021-09-21 20:26:22,071 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:53:08,857 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:08,956 epoch 2 - iter 1/8 - loss 0.90472752 - samples/sec: 12.43 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,004 epoch 2 - iter 2/8 - loss 0.62382843 - samples/sec: 21.03 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,053 epoch 2 - iter 3/8 - loss 0.48350169 - samples/sec: 20.70 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,114 epoch 2 - iter 4/8 - loss 0.63045443 - samples/sec: 16.32 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,162 epoch 2 - iter 5/8 - loss 0.61329598 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,219 epoch 2 - iter 6/8 - loss 0.62341536 - samples/sec: 17.76 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,264 epoch 2 - iter 7/8 - loss 0.54009010 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,312 epoch 2 - iter 8/8 - loss 0.54598160 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 11:53:09,313 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:09,314 EPOCH 2 done: loss 0.5460 - lr 0.0200000\n",
-      "2021-09-08 11:53:09,456 DEV : loss 0.0814899429678917 - score 0.0\n",
-      "2021-09-08 11:53:09,457 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:26:34,082 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:34,173 epoch 2 - iter 1/8 - loss 0.13562271 - samples/sec: 13.39 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,223 epoch 2 - iter 2/8 - loss 0.06886765 - samples/sec: 20.56 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,285 epoch 2 - iter 3/8 - loss 0.04643679 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,356 epoch 2 - iter 4/8 - loss 0.62413699 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,405 epoch 2 - iter 5/8 - loss 0.54454786 - samples/sec: 20.53 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,455 epoch 2 - iter 6/8 - loss 0.55781183 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,505 epoch 2 - iter 7/8 - loss 0.48302190 - samples/sec: 20.41 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,557 epoch 2 - iter 8/8 - loss 0.42568184 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 20:26:34,558 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:34,558 EPOCH 2 done: loss 0.4257 - lr 0.0200000\n",
+      "2021-09-21 20:26:34,694 DEV : loss 0.14844802021980286 - score 0.0\n",
+      "2021-09-21 20:26:34,695 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:53:19,258 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:19,355 epoch 3 - iter 1/8 - loss 0.31724170 - samples/sec: 12.59 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,420 epoch 3 - iter 2/8 - loss 0.22848650 - samples/sec: 15.68 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,465 epoch 3 - iter 3/8 - loss 0.15526587 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,527 epoch 3 - iter 4/8 - loss 0.26589862 - samples/sec: 16.17 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,575 epoch 3 - iter 5/8 - loss 0.24217746 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,621 epoch 3 - iter 6/8 - loss 0.20513959 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,665 epoch 3 - iter 7/8 - loss 0.17801766 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,714 epoch 3 - iter 8/8 - loss 0.22652820 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,715 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:19,715 EPOCH 3 done: loss 0.2265 - lr 0.0200000\n",
-      "2021-09-08 11:53:19,772 DEV : loss 0.3554025888442993 - score 0.0\n",
-      "2021-09-08 11:53:19,772 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:53:19,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:19,837 epoch 4 - iter 1/8 - loss 0.00481658 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,885 epoch 4 - iter 2/8 - loss 0.09951717 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,930 epoch 4 - iter 3/8 - loss 0.07158386 - samples/sec: 22.65 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,988 epoch 4 - iter 4/8 - loss 0.10821434 - samples/sec: 17.43 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,060 epoch 4 - iter 5/8 - loss 0.09155016 - samples/sec: 14.04 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,121 epoch 4 - iter 6/8 - loss 0.12036817 - samples/sec: 16.61 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,171 epoch 4 - iter 7/8 - loss 0.10983578 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,220 epoch 4 - iter 8/8 - loss 0.09649059 - samples/sec: 20.86 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,222 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:20,222 EPOCH 4 done: loss 0.0965 - lr 0.0200000\n",
-      "2021-09-08 11:53:20,269 DEV : loss 0.11595837026834488 - score 0.0\n",
-      "2021-09-08 11:53:20,270 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:53:20,274 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:20,339 epoch 5 - iter 1/8 - loss 0.00136930 - samples/sec: 20.21 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,413 epoch 5 - iter 2/8 - loss 0.01960840 - samples/sec: 13.68 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,471 epoch 5 - iter 3/8 - loss 0.25044355 - samples/sec: 17.47 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,526 epoch 5 - iter 4/8 - loss 0.19827692 - samples/sec: 18.60 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,571 epoch 5 - iter 5/8 - loss 0.15891539 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,616 epoch 5 - iter 6/8 - loss 0.13339852 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,665 epoch 5 - iter 7/8 - loss 0.15644932 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,713 epoch 5 - iter 8/8 - loss 0.14275949 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 11:53:20,714 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:20,714 EPOCH 5 done: loss 0.1428 - lr 0.0200000\n",
-      "2021-09-08 11:53:20,863 DEV : loss 0.11657313257455826 - score 0.0\n",
-      "2021-09-08 11:53:20,863 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:53:20,940 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:21,008 epoch 6 - iter 1/8 - loss 0.01186418 - samples/sec: 18.88 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,053 epoch 6 - iter 2/8 - loss 0.00695882 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,098 epoch 6 - iter 3/8 - loss 0.00491031 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,146 epoch 6 - iter 4/8 - loss 0.01244658 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,205 epoch 6 - iter 5/8 - loss 0.01687003 - samples/sec: 17.22 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,250 epoch 6 - iter 6/8 - loss 0.01433895 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,295 epoch 6 - iter 7/8 - loss 0.01246724 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,365 epoch 6 - iter 8/8 - loss 0.01388036 - samples/sec: 14.43 - lr: 0.020000\n",
-      "2021-09-08 11:53:21,366 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:21,366 EPOCH 6 done: loss 0.0139 - lr 0.0200000\n",
-      "2021-09-08 11:53:21,523 DEV : loss 0.23856967687606812 - score 0.0\n",
+      "2021-09-21 20:26:58,455 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:58,567 epoch 3 - iter 1/8 - loss 0.01918515 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,631 epoch 3 - iter 2/8 - loss 0.01029657 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,704 epoch 3 - iter 3/8 - loss 0.00810350 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,816 epoch 3 - iter 4/8 - loss 0.00755834 - samples/sec: 8.98 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,878 epoch 3 - iter 5/8 - loss 0.00631206 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 20:26:58,949 epoch 3 - iter 6/8 - loss 0.05797066 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,004 epoch 3 - iter 7/8 - loss 0.05063861 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,102 epoch 3 - iter 8/8 - loss 0.05111490 - samples/sec: 10.33 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,103 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:59,103 EPOCH 3 done: loss 0.0511 - lr 0.0200000\n",
+      "2021-09-21 20:26:59,230 DEV : loss 0.5520024299621582 - score 0.0\n",
+      "2021-09-21 20:26:59,231 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:26:59,322 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:59,445 epoch 4 - iter 1/8 - loss 0.00174732 - samples/sec: 11.58 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,518 epoch 4 - iter 2/8 - loss 0.00196001 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,579 epoch 4 - iter 3/8 - loss 0.04074765 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,678 epoch 4 - iter 4/8 - loss 0.03129923 - samples/sec: 10.17 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,743 epoch 4 - iter 5/8 - loss 0.14194472 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,794 epoch 4 - iter 6/8 - loss 0.11858408 - samples/sec: 19.94 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,852 epoch 4 - iter 7/8 - loss 0.10343536 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,915 epoch 4 - iter 8/8 - loss 0.19720054 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 20:26:59,917 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:59,917 EPOCH 4 done: loss 0.1972 - lr 0.0200000\n",
+      "2021-09-21 20:27:00,104 DEV : loss 0.19467534124851227 - score 0.0\n",
+      "2021-09-21 20:27:00,105 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:27:00,202 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:00,324 epoch 5 - iter 1/8 - loss 0.00616600 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,389 epoch 5 - iter 2/8 - loss 0.00577840 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,451 epoch 5 - iter 3/8 - loss 0.00508343 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,527 epoch 5 - iter 4/8 - loss 0.00445398 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,581 epoch 5 - iter 5/8 - loss 0.00365273 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,629 epoch 5 - iter 6/8 - loss 0.00339396 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,744 epoch 5 - iter 7/8 - loss 0.00302261 - samples/sec: 8.74 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,820 epoch 5 - iter 8/8 - loss 0.00272781 - samples/sec: 13.18 - lr: 0.020000\n",
+      "2021-09-21 20:27:00,821 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:00,822 EPOCH 5 done: loss 0.0027 - lr 0.0200000\n",
+      "2021-09-21 20:27:00,954 DEV : loss 0.24207547307014465 - score 0.0\n",
+      "2021-09-21 20:27:00,955 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:27:01,051 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:01,138 epoch 6 - iter 1/8 - loss 0.00191607 - samples/sec: 18.27 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,196 epoch 6 - iter 2/8 - loss 0.00284145 - samples/sec: 17.65 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,311 epoch 6 - iter 3/8 - loss 0.00229554 - samples/sec: 8.73 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,381 epoch 6 - iter 4/8 - loss 0.00225559 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,443 epoch 6 - iter 5/8 - loss 0.00247816 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,493 epoch 6 - iter 6/8 - loss 0.00248691 - samples/sec: 19.93 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,554 epoch 6 - iter 7/8 - loss 0.00221466 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,638 epoch 6 - iter 8/8 - loss 0.00206584 - samples/sec: 12.04 - lr: 0.020000\n",
+      "2021-09-21 20:27:01,639 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:01,639 EPOCH 6 done: loss 0.0021 - lr 0.0200000\n",
+      "2021-09-21 20:27:02,424 DEV : loss 0.25329655408859253 - score 0.0\n",
       "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:53:21,524 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:53:21,608 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:21,670 epoch 7 - iter 1/8 - loss 0.00101493 - samples/sec: 22.39 - lr: 0.010000\n",
-      "2021-09-08 11:53:21,714 epoch 7 - iter 2/8 - loss 0.00232085 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 11:53:21,768 epoch 7 - iter 3/8 - loss 0.00217501 - samples/sec: 18.87 - lr: 0.010000\n",
-      "2021-09-08 11:53:21,813 epoch 7 - iter 4/8 - loss 0.00179601 - samples/sec: 22.65 - lr: 0.010000\n",
-      "2021-09-08 11:53:21,857 epoch 7 - iter 5/8 - loss 0.00174160 - samples/sec: 22.71 - lr: 0.010000\n",
-      "2021-09-08 11:53:21,910 epoch 7 - iter 6/8 - loss 0.00175064 - samples/sec: 19.10 - lr: 0.010000\n",
-      "2021-09-08 11:53:21,978 epoch 7 - iter 7/8 - loss 0.00198589 - samples/sec: 14.78 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,024 epoch 7 - iter 8/8 - loss 0.00185873 - samples/sec: 21.95 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,025 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:22,026 EPOCH 7 done: loss 0.0019 - lr 0.0100000\n",
-      "2021-09-08 11:53:22,176 DEV : loss 0.24151809513568878 - score 0.0\n",
-      "2021-09-08 11:53:22,177 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:53:22,255 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:27:02,425 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:27:02,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:02,494 epoch 7 - iter 1/8 - loss 0.00067251 - samples/sec: 19.46 - lr: 0.010000\n",
+      "2021-09-21 20:27:02,558 epoch 7 - iter 2/8 - loss 0.24958254 - samples/sec: 15.96 - lr: 0.010000\n",
+      "2021-09-21 20:27:02,604 epoch 7 - iter 3/8 - loss 0.16671991 - samples/sec: 21.95 - lr: 0.010000\n",
+      "2021-09-21 20:27:02,653 epoch 7 - iter 4/8 - loss 0.12552900 - samples/sec: 20.78 - lr: 0.010000\n",
+      "2021-09-21 20:27:02,699 epoch 7 - iter 5/8 - loss 0.10126752 - samples/sec: 21.76 - lr: 0.010000\n",
+      "2021-09-21 20:27:02,746 epoch 7 - iter 6/8 - loss 0.08539981 - samples/sec: 21.46 - lr: 0.010000\n",
+      "2021-09-21 20:27:02,813 epoch 7 - iter 7/8 - loss 0.07340534 - samples/sec: 15.02 - lr: 0.010000\n",
+      "2021-09-21 20:27:02,871 epoch 7 - iter 8/8 - loss 0.06428716 - samples/sec: 17.49 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:22,315 epoch 8 - iter 1/8 - loss 0.00162036 - samples/sec: 22.54 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,368 epoch 8 - iter 2/8 - loss 0.00337636 - samples/sec: 19.00 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,421 epoch 8 - iter 3/8 - loss 0.00282778 - samples/sec: 18.84 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,467 epoch 8 - iter 4/8 - loss 0.00250275 - samples/sec: 22.25 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,512 epoch 8 - iter 5/8 - loss 0.00209382 - samples/sec: 22.62 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,556 epoch 8 - iter 6/8 - loss 0.00192076 - samples/sec: 22.52 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,602 epoch 8 - iter 7/8 - loss 0.00172455 - samples/sec: 22.24 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,669 epoch 8 - iter 8/8 - loss 0.00169093 - samples/sec: 15.16 - lr: 0.010000\n",
-      "2021-09-08 11:53:22,670 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:22,670 EPOCH 8 done: loss 0.0017 - lr 0.0100000\n",
-      "2021-09-08 11:53:22,824 DEV : loss 0.24583357572555542 - score 0.0\n",
-      "2021-09-08 11:53:22,824 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:53:22,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:22,965 epoch 9 - iter 1/8 - loss 0.00448346 - samples/sec: 18.76 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,013 epoch 9 - iter 2/8 - loss 0.01584443 - samples/sec: 21.02 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,062 epoch 9 - iter 3/8 - loss 0.22270244 - samples/sec: 20.75 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,130 epoch 9 - iter 4/8 - loss 0.16719529 - samples/sec: 14.87 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,176 epoch 9 - iter 5/8 - loss 0.13394435 - samples/sec: 21.94 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,221 epoch 9 - iter 6/8 - loss 0.11177220 - samples/sec: 22.53 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,266 epoch 9 - iter 7/8 - loss 0.09585996 - samples/sec: 22.54 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,319 epoch 9 - iter 8/8 - loss 0.08427656 - samples/sec: 18.92 - lr: 0.010000\n",
-      "2021-09-08 11:53:23,320 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:23,320 EPOCH 9 done: loss 0.0843 - lr 0.0100000\n",
-      "2021-09-08 11:53:24,044 DEV : loss 0.1932026594877243 - score 0.0\n",
-      "2021-09-08 11:53:24,045 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:53:24,047 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:24,117 epoch 10 - iter 1/8 - loss 0.00084207 - samples/sec: 18.89 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,162 epoch 10 - iter 2/8 - loss 0.00095679 - samples/sec: 22.17 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,207 epoch 10 - iter 3/8 - loss 0.00117403 - samples/sec: 22.71 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,252 epoch 10 - iter 4/8 - loss 0.00116452 - samples/sec: 22.47 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,318 epoch 10 - iter 5/8 - loss 0.00117763 - samples/sec: 15.18 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,363 epoch 10 - iter 6/8 - loss 0.00109546 - samples/sec: 22.50 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,408 epoch 10 - iter 7/8 - loss 0.00100148 - samples/sec: 22.58 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,463 epoch 10 - iter 8/8 - loss 0.00110061 - samples/sec: 18.25 - lr: 0.010000\n",
-      "2021-09-08 11:53:24,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:24,465 EPOCH 10 done: loss 0.0011 - lr 0.0100000\n",
-      "2021-09-08 11:53:24,707 DEV : loss 0.20087338984012604 - score 0.0\n",
+      "2021-09-21 20:27:02,872 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:02,873 EPOCH 7 done: loss 0.0643 - lr 0.0100000\n",
+      "2021-09-21 20:27:02,902 DEV : loss 0.20966902375221252 - score 0.0\n",
+      "2021-09-21 20:27:02,903 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:27:02,917 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:02,986 epoch 8 - iter 1/8 - loss 0.00082120 - samples/sec: 18.55 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,032 epoch 8 - iter 2/8 - loss 0.00074822 - samples/sec: 21.82 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,079 epoch 8 - iter 3/8 - loss 0.00084712 - samples/sec: 21.48 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,131 epoch 8 - iter 4/8 - loss 0.00122543 - samples/sec: 19.75 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,190 epoch 8 - iter 5/8 - loss 0.00150892 - samples/sec: 17.00 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,236 epoch 8 - iter 6/8 - loss 0.00176543 - samples/sec: 22.03 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,283 epoch 8 - iter 7/8 - loss 0.00178126 - samples/sec: 21.68 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,350 epoch 8 - iter 8/8 - loss 0.00173344 - samples/sec: 15.05 - lr: 0.010000\n",
+      "2021-09-21 20:27:03,351 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:03,352 EPOCH 8 done: loss 0.0017 - lr 0.0100000\n",
+      "2021-09-21 20:27:03,598 DEV : loss 0.21986863017082214 - score 0.0\n",
+      "2021-09-21 20:27:03,600 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:27:03,973 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:04,037 epoch 9 - iter 1/8 - loss 0.00102899 - samples/sec: 21.21 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,084 epoch 9 - iter 2/8 - loss 0.00124913 - samples/sec: 21.80 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,134 epoch 9 - iter 3/8 - loss 0.00172712 - samples/sec: 20.60 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,180 epoch 9 - iter 4/8 - loss 0.00177862 - samples/sec: 21.66 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,228 epoch 9 - iter 5/8 - loss 0.00153768 - samples/sec: 21.49 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,283 epoch 9 - iter 6/8 - loss 0.00136360 - samples/sec: 18.47 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,353 epoch 9 - iter 7/8 - loss 0.00132401 - samples/sec: 14.25 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,417 epoch 9 - iter 8/8 - loss 0.01010083 - samples/sec: 15.76 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:04,419 EPOCH 9 done: loss 0.0101 - lr 0.0100000\n",
+      "2021-09-21 20:27:04,732 DEV : loss 0.20940575003623962 - score 0.0\n",
+      "2021-09-21 20:27:04,733 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:27:04,865 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:04,939 epoch 10 - iter 1/8 - loss 0.00287570 - samples/sec: 16.93 - lr: 0.010000\n",
+      "2021-09-21 20:27:04,986 epoch 10 - iter 2/8 - loss 0.00219665 - samples/sec: 21.72 - lr: 0.010000\n",
+      "2021-09-21 20:27:05,035 epoch 10 - iter 3/8 - loss 0.00185890 - samples/sec: 20.82 - lr: 0.010000\n",
+      "2021-09-21 20:27:05,102 epoch 10 - iter 4/8 - loss 0.00174091 - samples/sec: 14.96 - lr: 0.010000\n",
+      "2021-09-21 20:27:05,149 epoch 10 - iter 5/8 - loss 0.00162080 - samples/sec: 21.91 - lr: 0.010000\n",
+      "2021-09-21 20:27:05,198 epoch 10 - iter 6/8 - loss 0.00148419 - samples/sec: 20.54 - lr: 0.010000\n",
+      "2021-09-21 20:27:05,246 epoch 10 - iter 7/8 - loss 0.00131485 - samples/sec: 21.41 - lr: 0.010000\n",
+      "2021-09-21 20:27:05,300 epoch 10 - iter 8/8 - loss 0.00121914 - samples/sec: 18.57 - lr: 0.010000\n",
+      "2021-09-21 20:27:05,302 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:05,302 EPOCH 10 done: loss 0.0012 - lr 0.0100000\n",
+      "2021-09-21 20:27:05,431 DEV : loss 0.22340673208236694 - score 0.0\n",
       "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:53:24,708 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:53:29,218 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:29,219 Testing using best model ...\n",
-      "2021-09-08 11:53:29,248 loading file None/best-model.pt\n",
+      "2021-09-21 20:27:05,432 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:27:16,061 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:16,062 Testing using best model ...\n",
+      "2021-09-21 20:27:16,063 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:53:36,005 \t1.0\n",
-      "2021-09-08 11:53:36,005 \n",
+      "2021-09-21 20:27:21,090 \t0.0\n",
+      "2021-09-21 20:27:21,090 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1111\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
-      "                                           precision    recall  f1-score   support\n",
+      "                                          precision    recall  f1-score   support\n",
       "\n",
-      "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
-      " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
-      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
-      "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
-      " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
-      "     This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
-      "    This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
-      " This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
-      "                This text is about Sports     1.0000    1.0000    1.0000         1\n",
+      "               This text is about Health     0.0000    0.0000    0.0000         0\n",
+      "This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
+      "This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
+      " This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
+      "This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
+      "               This text is about Sports     0.0000    0.0000    0.0000         0\n",
+      "    This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
+      "This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
+      "   This text is about Business & Finance     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                micro avg     1.0000    1.0000    1.0000         1\n",
-      "                                macro avg     0.1111    0.1111    0.1111         1\n",
-      "                             weighted avg     1.0000    1.0000    1.0000         1\n",
-      "                              samples avg     1.0000    1.0000    1.0000         1\n",
+      "                               micro avg     0.0000    0.0000    0.0000         1\n",
+      "                               macro avg     0.0000    0.0000    0.0000         1\n",
+      "                            weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                             samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:53:36,006 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:50,414 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:27:21,091 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:46,229 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:53:54,441 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:27:50,317 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 8703.88it/s]"
+      "100%|██████████| 9/9 [00:00<00:00, 9832.96it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:54,444 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Politics & Government', b'This text is about Business & Finance']\n",
-      "2021-09-08 11:53:54,455 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:54,458 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:27:50,320 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government']\n",
+      "2021-09-21 20:27:50,453 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:50,454 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3695,27 +3713,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:54,458 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:54,458 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:53:54,459 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:54,459 Parameters:\n",
-      "2021-09-08 11:53:54,459  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:53:54,460  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:53:54,460  - patience: \"3\"\n",
-      "2021-09-08 11:53:54,460  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:53:54,460  - max_epochs: \"10\"\n",
-      "2021-09-08 11:53:54,461  - shuffle: \"True\"\n",
-      "2021-09-08 11:53:54,461  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:53:54,461  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:53:54,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:54,462 Model training base path: \"None\"\n",
-      "2021-09-08 11:53:54,462 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:54,462 Device: cuda:1\n",
-      "2021-09-08 11:53:54,463 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:54,463 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:53:54,469 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:54,541 epoch 1 - iter 1/8 - loss 0.73652983 - samples/sec: 17.74 - lr: 0.020000\n",
-      "2021-09-08 11:53:54,587 epoch 1 - iter 2/8 - loss 0.37961239 - samples/sec: 22.23 - lr: 0.020000\n"
+      "2021-09-21 20:27:50,455 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:50,455 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:27:50,456 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:50,456 Parameters:\n",
+      "2021-09-21 20:27:50,456  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:27:50,457  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:27:50,457  - patience: \"3\"\n",
+      "2021-09-21 20:27:50,457  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:27:50,458  - max_epochs: \"10\"\n",
+      "2021-09-21 20:27:50,458  - shuffle: \"True\"\n",
+      "2021-09-21 20:27:50,459  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:27:50,459  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:27:50,459 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:50,460 Model training base path: \"None\"\n",
+      "2021-09-21 20:27:50,460 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:50,460 Device: cuda:0\n",
+      "2021-09-21 20:27:50,461 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:50,461 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -3729,153 +3744,158 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:54,642 epoch 1 - iter 3/8 - loss 0.48801141 - samples/sec: 18.29 - lr: 0.020000\n",
-      "2021-09-08 11:53:54,698 epoch 1 - iter 4/8 - loss 0.45029790 - samples/sec: 17.95 - lr: 0.020000\n",
-      "2021-09-08 11:53:54,779 epoch 1 - iter 5/8 - loss 0.36311484 - samples/sec: 12.50 - lr: 0.020000\n",
-      "2021-09-08 11:53:54,929 epoch 1 - iter 6/8 - loss 0.59646736 - samples/sec: 6.67 - lr: 0.020000\n",
-      "2021-09-08 11:53:54,977 epoch 1 - iter 7/8 - loss 0.51140116 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:53:55,025 epoch 1 - iter 8/8 - loss 0.75186481 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 11:53:55,026 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:55,027 EPOCH 1 done: loss 0.7519 - lr 0.0200000\n",
-      "2021-09-08 11:53:55,056 DEV : loss 0.20159713923931122 - score 0.0\n",
-      "2021-09-08 11:53:55,057 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:27:50,688 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:50,822 epoch 1 - iter 1/8 - loss 0.27540514 - samples/sec: 9.59 - lr: 0.020000\n",
+      "2021-09-21 20:27:50,902 epoch 1 - iter 2/8 - loss 0.32294109 - samples/sec: 12.52 - lr: 0.020000\n",
+      "2021-09-21 20:27:50,981 epoch 1 - iter 3/8 - loss 0.23946394 - samples/sec: 12.81 - lr: 0.020000\n",
+      "2021-09-21 20:27:51,063 epoch 1 - iter 4/8 - loss 0.18213935 - samples/sec: 12.26 - lr: 0.020000\n",
+      "2021-09-21 20:27:51,133 epoch 1 - iter 5/8 - loss 0.24600544 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 20:27:51,200 epoch 1 - iter 6/8 - loss 0.59489652 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 20:27:51,277 epoch 1 - iter 7/8 - loss 0.66840458 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 20:27:51,360 epoch 1 - iter 8/8 - loss 0.60837290 - samples/sec: 12.13 - lr: 0.020000\n",
+      "2021-09-21 20:27:51,362 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:51,362 EPOCH 1 done: loss 0.6084 - lr 0.0200000\n",
+      "2021-09-21 20:27:51,490 DEV : loss 0.2013835310935974 - score 0.0\n",
+      "2021-09-21 20:27:51,491 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:54:04,947 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:05,018 epoch 2 - iter 1/8 - loss 0.07619698 - samples/sec: 18.21 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,070 epoch 2 - iter 2/8 - loss 0.05824931 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,122 epoch 2 - iter 3/8 - loss 0.24372050 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,206 epoch 2 - iter 4/8 - loss 0.23714343 - samples/sec: 11.97 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,265 epoch 2 - iter 5/8 - loss 0.26254841 - samples/sec: 17.10 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,309 epoch 2 - iter 6/8 - loss 0.22097039 - samples/sec: 22.72 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,358 epoch 2 - iter 7/8 - loss 0.18988123 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,508 epoch 2 - iter 8/8 - loss 0.28885786 - samples/sec: 6.68 - lr: 0.020000\n",
-      "2021-09-08 11:54:05,509 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:05,509 EPOCH 2 done: loss 0.2889 - lr 0.0200000\n",
-      "2021-09-08 11:54:06,688 DEV : loss 0.12355372309684753 - score 0.0\n",
-      "2021-09-08 11:54:06,689 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:27:55,533 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:55,611 epoch 2 - iter 1/8 - loss 0.00553914 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,673 epoch 2 - iter 2/8 - loss 0.05526244 - samples/sec: 16.39 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,756 epoch 2 - iter 3/8 - loss 0.07871971 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,816 epoch 2 - iter 4/8 - loss 0.22358004 - samples/sec: 16.82 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,876 epoch 2 - iter 5/8 - loss 0.19217080 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,937 epoch 2 - iter 6/8 - loss 0.18452371 - samples/sec: 16.63 - lr: 0.020000\n",
+      "2021-09-21 20:27:56,002 epoch 2 - iter 7/8 - loss 0.28265844 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:27:56,066 epoch 2 - iter 8/8 - loss 0.29320874 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 20:27:56,067 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:56,067 EPOCH 2 done: loss 0.2932 - lr 0.0200000\n",
+      "2021-09-21 20:27:56,099 DEV : loss 0.1822226643562317 - score 0.0\n",
+      "2021-09-21 20:27:56,100 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:54:15,271 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:15,332 epoch 3 - iter 1/8 - loss 0.01286129 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,384 epoch 3 - iter 2/8 - loss 0.00758005 - samples/sec: 19.61 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,435 epoch 3 - iter 3/8 - loss 0.01162531 - samples/sec: 19.85 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,584 epoch 3 - iter 4/8 - loss 0.14407504 - samples/sec: 6.71 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,644 epoch 3 - iter 5/8 - loss 0.16893402 - samples/sec: 16.96 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,719 epoch 3 - iter 6/8 - loss 0.22062743 - samples/sec: 13.34 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,767 epoch 3 - iter 7/8 - loss 0.21624472 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,815 epoch 3 - iter 8/8 - loss 0.18977907 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 11:54:15,816 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:15,816 EPOCH 3 done: loss 0.1898 - lr 0.0200000\n",
-      "2021-09-08 11:54:16,739 DEV : loss 0.2277507781982422 - score 0.0\n",
-      "2021-09-08 11:54:16,739 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:54:16,743 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:16,814 epoch 4 - iter 1/8 - loss 0.04913993 - samples/sec: 17.92 - lr: 0.020000\n",
-      "2021-09-08 11:54:16,860 epoch 4 - iter 2/8 - loss 0.02556894 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 11:54:16,905 epoch 4 - iter 3/8 - loss 0.02019280 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 11:54:16,976 epoch 4 - iter 4/8 - loss 0.01627058 - samples/sec: 14.08 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,127 epoch 4 - iter 5/8 - loss 0.05156556 - samples/sec: 6.65 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,175 epoch 4 - iter 6/8 - loss 0.04371190 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,223 epoch 4 - iter 7/8 - loss 0.03848655 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,268 epoch 4 - iter 8/8 - loss 0.03600738 - samples/sec: 22.73 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:17,269 EPOCH 4 done: loss 0.0360 - lr 0.0200000\n",
-      "2021-09-08 11:54:17,303 DEV : loss 0.340305894613266 - score 0.0\n",
-      "2021-09-08 11:54:17,304 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:54:17,306 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:17,366 epoch 5 - iter 1/8 - loss 0.00616088 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,413 epoch 5 - iter 2/8 - loss 0.00341415 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,460 epoch 5 - iter 3/8 - loss 0.00248610 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,505 epoch 5 - iter 4/8 - loss 0.00228284 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,559 epoch 5 - iter 5/8 - loss 0.00230839 - samples/sec: 18.37 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,607 epoch 5 - iter 6/8 - loss 0.00200351 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,681 epoch 5 - iter 7/8 - loss 0.00196558 - samples/sec: 13.69 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,831 epoch 5 - iter 8/8 - loss 0.03449522 - samples/sec: 6.70 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,832 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:17,832 EPOCH 5 done: loss 0.0345 - lr 0.0200000\n",
-      "2021-09-08 11:54:17,860 DEV : loss 0.29060453176498413 - score 0.0\n",
-      "2021-09-08 11:54:17,861 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:54:17,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:18,026 epoch 6 - iter 1/8 - loss 0.05794650 - samples/sec: 6.70 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,074 epoch 6 - iter 2/8 - loss 0.02933378 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,133 epoch 6 - iter 3/8 - loss 0.09908808 - samples/sec: 17.12 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,178 epoch 6 - iter 4/8 - loss 0.07467348 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,251 epoch 6 - iter 5/8 - loss 0.05989810 - samples/sec: 13.79 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,295 epoch 6 - iter 6/8 - loss 0.05068004 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,343 epoch 6 - iter 7/8 - loss 0.04382115 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,390 epoch 6 - iter 8/8 - loss 0.03840634 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,391 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:18,391 EPOCH 6 done: loss 0.0384 - lr 0.0200000\n",
-      "2021-09-08 11:54:18,419 DEV : loss 0.19803807139396667 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:54:18,419 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:54:18,423 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:18,486 epoch 7 - iter 1/8 - loss 0.25067106 - samples/sec: 20.48 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,533 epoch 7 - iter 2/8 - loss 0.12553002 - samples/sec: 21.69 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,604 epoch 7 - iter 3/8 - loss 0.08462120 - samples/sec: 14.10 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,660 epoch 7 - iter 4/8 - loss 0.06398083 - samples/sec: 18.15 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,707 epoch 7 - iter 5/8 - loss 0.05141229 - samples/sec: 21.39 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,856 epoch 7 - iter 6/8 - loss 0.04534933 - samples/sec: 6.71 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,907 epoch 7 - iter 7/8 - loss 0.03901807 - samples/sec: 20.22 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,957 epoch 7 - iter 8/8 - loss 0.03425255 - samples/sec: 20.06 - lr: 0.010000\n",
-      "2021-09-08 11:54:18,959 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:18,959 EPOCH 7 done: loss 0.0343 - lr 0.0100000\n",
-      "2021-09-08 11:54:18,990 DEV : loss 0.24187861382961273 - score 0.0\n"
+      "2021-09-21 20:28:12,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:13,069 epoch 3 - iter 1/8 - loss 0.00286487 - samples/sec: 10.48 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,206 epoch 3 - iter 2/8 - loss 0.00609838 - samples/sec: 7.35 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,284 epoch 3 - iter 3/8 - loss 0.00599165 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,422 epoch 3 - iter 4/8 - loss 0.10170136 - samples/sec: 7.25 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,484 epoch 3 - iter 5/8 - loss 0.38019377 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,541 epoch 3 - iter 6/8 - loss 0.31785651 - samples/sec: 17.96 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,610 epoch 3 - iter 7/8 - loss 0.33811050 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,682 epoch 3 - iter 8/8 - loss 0.29715453 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,683 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:13,684 EPOCH 3 done: loss 0.2972 - lr 0.0200000\n",
+      "2021-09-21 20:28:13,719 DEV : loss 0.17530550062656403 - score 0.0\n",
+      "2021-09-21 20:28:13,720 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:28:29,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:29,935 epoch 4 - iter 1/8 - loss 0.11734170 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 20:28:29,993 epoch 4 - iter 2/8 - loss 0.06208565 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,063 epoch 4 - iter 3/8 - loss 0.08102919 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,136 epoch 4 - iter 4/8 - loss 0.06413564 - samples/sec: 13.90 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,187 epoch 4 - iter 5/8 - loss 0.05182233 - samples/sec: 19.88 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,297 epoch 4 - iter 6/8 - loss 0.04970137 - samples/sec: 9.13 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,367 epoch 4 - iter 7/8 - loss 0.04274094 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,423 epoch 4 - iter 8/8 - loss 0.03825531 - samples/sec: 18.39 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,425 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:30,425 EPOCH 4 done: loss 0.0383 - lr 0.0200000\n",
+      "2021-09-21 20:28:30,462 DEV : loss 0.07514998316764832 - score 0.0\n",
+      "2021-09-21 20:28:30,463 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:28:41,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:42,061 epoch 5 - iter 1/8 - loss 0.00206607 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,144 epoch 5 - iter 2/8 - loss 0.00155350 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,264 epoch 5 - iter 3/8 - loss 0.08424119 - samples/sec: 8.36 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,325 epoch 5 - iter 4/8 - loss 0.06492018 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,374 epoch 5 - iter 5/8 - loss 0.05281278 - samples/sec: 20.64 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,429 epoch 5 - iter 6/8 - loss 0.04486338 - samples/sec: 18.38 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,478 epoch 5 - iter 7/8 - loss 0.03868846 - samples/sec: 20.68 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,527 epoch 5 - iter 8/8 - loss 0.03398653 - samples/sec: 20.56 - lr: 0.020000\n",
+      "2021-09-21 20:28:42,528 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:42,529 EPOCH 5 done: loss 0.0340 - lr 0.0200000\n",
+      "2021-09-21 20:28:43,455 DEV : loss 0.10119759291410446 - score 0.0\n",
+      "2021-09-21 20:28:43,456 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:28:43,618 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:43,706 epoch 6 - iter 1/8 - loss 0.00282327 - samples/sec: 13.96 - lr: 0.020000\n",
+      "2021-09-21 20:28:43,755 epoch 6 - iter 2/8 - loss 0.00245959 - samples/sec: 20.50 - lr: 0.020000\n",
+      "2021-09-21 20:28:43,806 epoch 6 - iter 3/8 - loss 0.00217403 - samples/sec: 19.65 - lr: 0.020000\n",
+      "2021-09-21 20:28:43,856 epoch 6 - iter 4/8 - loss 0.00182137 - samples/sec: 20.43 - lr: 0.020000\n",
+      "2021-09-21 20:28:43,905 epoch 6 - iter 5/8 - loss 0.00171559 - samples/sec: 20.72 - lr: 0.020000\n",
+      "2021-09-21 20:28:43,958 epoch 6 - iter 6/8 - loss 0.00172534 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 20:28:44,008 epoch 6 - iter 7/8 - loss 0.00168528 - samples/sec: 20.42 - lr: 0.020000\n",
+      "2021-09-21 20:28:44,058 epoch 6 - iter 8/8 - loss 0.00337634 - samples/sec: 20.15 - lr: 0.020000\n",
+      "2021-09-21 20:28:44,059 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:44,059 EPOCH 6 done: loss 0.0034 - lr 0.0200000\n",
+      "2021-09-21 20:28:44,232 DEV : loss 0.03581494837999344 - score 0.0\n",
+      "2021-09-21 20:28:44,233 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:28:51,065 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:51,163 epoch 7 - iter 1/8 - loss 0.00108267 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,290 epoch 7 - iter 2/8 - loss 0.00221060 - samples/sec: 7.93 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,354 epoch 7 - iter 3/8 - loss 0.00188532 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,427 epoch 7 - iter 4/8 - loss 0.00172318 - samples/sec: 13.74 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,492 epoch 7 - iter 5/8 - loss 0.00160271 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,541 epoch 7 - iter 6/8 - loss 0.00142073 - samples/sec: 20.46 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,591 epoch 7 - iter 7/8 - loss 0.00150063 - samples/sec: 20.47 - lr: 0.020000\n",
+      "2021-09-21 20:28:51,639 epoch 7 - iter 8/8 - loss 0.00195260 - samples/sec: 20.79 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:18,991 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:54:18,993 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:19,082 epoch 8 - iter 1/8 - loss 0.00069358 - samples/sec: 13.68 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,133 epoch 8 - iter 2/8 - loss 0.00053055 - samples/sec: 20.14 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,180 epoch 8 - iter 3/8 - loss 0.00202815 - samples/sec: 21.59 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,329 epoch 8 - iter 4/8 - loss 0.00346372 - samples/sec: 6.71 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,375 epoch 8 - iter 5/8 - loss 0.00296308 - samples/sec: 22.12 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,422 epoch 8 - iter 6/8 - loss 0.00258039 - samples/sec: 21.32 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,471 epoch 8 - iter 7/8 - loss 0.00230947 - samples/sec: 20.96 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,526 epoch 8 - iter 8/8 - loss 0.00240549 - samples/sec: 18.21 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,527 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:19,527 EPOCH 8 done: loss 0.0024 - lr 0.0100000\n",
-      "2021-09-08 11:54:19,650 DEV : loss 0.23785002529621124 - score 0.0\n",
-      "2021-09-08 11:54:19,650 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:54:19,721 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:19,807 epoch 9 - iter 1/8 - loss 0.00053568 - samples/sec: 14.04 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,855 epoch 9 - iter 2/8 - loss 0.00071314 - samples/sec: 21.18 - lr: 0.010000\n",
-      "2021-09-08 11:54:19,911 epoch 9 - iter 3/8 - loss 0.00095882 - samples/sec: 18.12 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,170 epoch 9 - iter 4/8 - loss 0.00153015 - samples/sec: 3.87 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,215 epoch 9 - iter 5/8 - loss 0.00149521 - samples/sec: 22.29 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,261 epoch 9 - iter 6/8 - loss 0.00230863 - samples/sec: 22.27 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,308 epoch 9 - iter 7/8 - loss 0.00214430 - samples/sec: 21.43 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,356 epoch 9 - iter 8/8 - loss 0.00194091 - samples/sec: 20.90 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,357 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:20,358 EPOCH 9 done: loss 0.0019 - lr 0.0100000\n",
-      "2021-09-08 11:54:20,504 DEV : loss 0.2647674083709717 - score 0.0\n",
-      "2021-09-08 11:54:20,505 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:54:20,584 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:20,653 epoch 10 - iter 1/8 - loss 0.00098335 - samples/sec: 18.20 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,698 epoch 10 - iter 2/8 - loss 0.00107858 - samples/sec: 22.56 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,770 epoch 10 - iter 3/8 - loss 0.00105098 - samples/sec: 14.07 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,817 epoch 10 - iter 4/8 - loss 0.00094425 - samples/sec: 21.35 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,862 epoch 10 - iter 5/8 - loss 0.00130246 - samples/sec: 22.28 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,910 epoch 10 - iter 6/8 - loss 0.00142991 - samples/sec: 21.01 - lr: 0.010000\n",
-      "2021-09-08 11:54:20,958 epoch 10 - iter 7/8 - loss 0.00133097 - samples/sec: 21.23 - lr: 0.010000\n",
-      "2021-09-08 11:54:21,106 epoch 10 - iter 8/8 - loss 0.00136569 - samples/sec: 6.80 - lr: 0.010000\n",
-      "2021-09-08 11:54:21,107 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:21,107 EPOCH 10 done: loss 0.0014 - lr 0.0100000\n",
-      "2021-09-08 11:54:21,230 DEV : loss 0.2685827910900116 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:54:21,231 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:54:27,816 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:27,817 Testing using best model ...\n",
-      "2021-09-08 11:54:27,818 loading file None/best-model.pt\n",
+      "2021-09-21 20:28:51,640 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:51,641 EPOCH 7 done: loss 0.0020 - lr 0.0200000\n",
+      "2021-09-21 20:28:57,738 DEV : loss 0.010258250869810581 - score 0.0\n",
+      "2021-09-21 20:28:57,739 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:29:04,899 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:05,021 epoch 8 - iter 1/8 - loss 0.00088163 - samples/sec: 12.74 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,090 epoch 8 - iter 2/8 - loss 0.00196150 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,151 epoch 8 - iter 3/8 - loss 0.00174510 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,215 epoch 8 - iter 4/8 - loss 0.00145364 - samples/sec: 15.80 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,289 epoch 8 - iter 5/8 - loss 0.00133123 - samples/sec: 13.55 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,415 epoch 8 - iter 6/8 - loss 0.00127688 - samples/sec: 8.02 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,480 epoch 8 - iter 7/8 - loss 0.00141301 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,556 epoch 8 - iter 8/8 - loss 0.09333071 - samples/sec: 13.13 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,557 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:05,558 EPOCH 8 done: loss 0.0933 - lr 0.0200000\n",
+      "2021-09-21 20:29:05,663 DEV : loss 0.012320796959102154 - score 0.0\n",
+      "2021-09-21 20:29:05,664 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:29:05,671 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:05,793 epoch 9 - iter 1/8 - loss 0.00340948 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,861 epoch 9 - iter 2/8 - loss 0.00232620 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 20:29:05,922 epoch 9 - iter 3/8 - loss 0.00220584 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,040 epoch 9 - iter 4/8 - loss 0.00226900 - samples/sec: 8.56 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,120 epoch 9 - iter 5/8 - loss 0.00191414 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,204 epoch 9 - iter 6/8 - loss 0.00181749 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,275 epoch 9 - iter 7/8 - loss 0.00194494 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,342 epoch 9 - iter 8/8 - loss 0.00177599 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,343 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:06,344 EPOCH 9 done: loss 0.0018 - lr 0.0200000\n",
+      "2021-09-21 20:29:06,468 DEV : loss 0.012115341611206532 - score 0.0\n",
+      "2021-09-21 20:29:06,469 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:29:06,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:06,630 epoch 10 - iter 1/8 - loss 0.00051312 - samples/sec: 9.31 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,709 epoch 10 - iter 2/8 - loss 0.00116773 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,777 epoch 10 - iter 3/8 - loss 0.00096532 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,840 epoch 10 - iter 4/8 - loss 0.00085744 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,908 epoch 10 - iter 5/8 - loss 0.00078103 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 20:29:06,977 epoch 10 - iter 6/8 - loss 0.00099884 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 20:29:07,043 epoch 10 - iter 7/8 - loss 0.00128800 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 20:29:07,134 epoch 10 - iter 8/8 - loss 0.00161018 - samples/sec: 11.04 - lr: 0.020000\n",
+      "2021-09-21 20:29:07,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:07,135 EPOCH 10 done: loss 0.0016 - lr 0.0200000\n",
+      "2021-09-21 20:29:07,180 DEV : loss 0.01625813916325569 - score 0.0\n",
+      "2021-09-21 20:29:07,182 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:29:14,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:14,951 Testing using best model ...\n",
+      "2021-09-21 20:29:14,953 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:54:33,429 \t1.0\n",
-      "2021-09-08 11:54:33,429 \n",
+      "2021-09-21 20:29:20,166 \t0.0\n",
+      "2021-09-21 20:29:20,167 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1111\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                           precision    recall  f1-score   support\n",
@@ -3883,38 +3903,38 @@
       "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
       "                This text is about Health     0.0000    0.0000    0.0000         0\n",
       " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
-      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
       "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
       " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
       "                This text is about Sports     0.0000    0.0000    0.0000         0\n",
-      " This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
-      "    This text is about Business & Finance     1.0000    1.0000    1.0000         1\n",
+      "     This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
+      "    This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
+      " This text is about Politics & Government     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                micro avg     1.0000    1.0000    1.0000         1\n",
-      "                                macro avg     0.1111    0.1111    0.1111         1\n",
-      "                             weighted avg     1.0000    1.0000    1.0000         1\n",
-      "                              samples avg     1.0000    1.0000    1.0000         1\n",
+      "                                micro avg     0.0000    0.0000    0.0000         1\n",
+      "                                macro avg     0.0000    0.0000    0.0000         1\n",
+      "                             weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                              samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:54:33,429 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:47,663 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:29:20,167 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:44,299 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:54:51,603 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:29:48,841 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 16614.76it/s]"
+      "100%|██████████| 9/9 [00:00<00:00, 14474.21it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:51,605 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Sports']\n",
-      "2021-09-08 11:54:51,615 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,617 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:29:48,844 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government', b'This text is about Entertainment & Music']\n",
+      "2021-09-21 20:29:48,853 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:48,855 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4227,26 +4247,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:51,617 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,618 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:54:51,618 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,618 Parameters:\n",
-      "2021-09-08 11:54:51,619  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:54:51,619  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:54:51,619  - patience: \"3\"\n",
-      "2021-09-08 11:54:51,619  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:54:51,620  - max_epochs: \"10\"\n",
-      "2021-09-08 11:54:51,620  - shuffle: \"True\"\n",
-      "2021-09-08 11:54:51,620  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:54:51,621  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:54:51,621 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,621 Model training base path: \"None\"\n",
-      "2021-09-08 11:54:51,622 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,622 Device: cuda:1\n",
-      "2021-09-08 11:54:51,622 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,622 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:54:51,629 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,690 epoch 1 - iter 1/8 - loss 1.75929356 - samples/sec: 22.64 - lr: 0.020000\n"
+      "2021-09-21 20:29:48,855 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:48,856 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:29:48,856 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:48,856 Parameters:\n",
+      "2021-09-21 20:29:48,856  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:29:48,857  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:29:48,857  - patience: \"3\"\n",
+      "2021-09-21 20:29:48,857  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:29:48,858  - max_epochs: \"10\"\n",
+      "2021-09-21 20:29:48,858  - shuffle: \"True\"\n",
+      "2021-09-21 20:29:48,858  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:29:48,859  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:29:48,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:48,859 Model training base path: \"None\"\n",
+      "2021-09-21 20:29:48,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:48,860 Device: cuda:0\n",
+      "2021-09-21 20:29:48,860 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:48,860 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:29:48,867 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:48,986 epoch 1 - iter 1/8 - loss 0.72101003 - samples/sec: 16.39 - lr: 0.020000\n"
      ]
     },
     {
@@ -4260,151 +4280,152 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:51,822 epoch 1 - iter 2/8 - loss 0.90366338 - samples/sec: 7.62 - lr: 0.020000\n",
-      "2021-09-08 11:54:51,871 epoch 1 - iter 3/8 - loss 0.78463289 - samples/sec: 20.48 - lr: 0.020000\n",
-      "2021-09-08 11:54:51,919 epoch 1 - iter 4/8 - loss 0.60662301 - samples/sec: 20.99 - lr: 0.020000\n",
-      "2021-09-08 11:54:51,967 epoch 1 - iter 5/8 - loss 0.48740693 - samples/sec: 21.00 - lr: 0.020000\n",
-      "2021-09-08 11:54:52,016 epoch 1 - iter 6/8 - loss 0.61695759 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 11:54:52,069 epoch 1 - iter 7/8 - loss 0.73519631 - samples/sec: 18.95 - lr: 0.020000\n",
-      "2021-09-08 11:54:52,115 epoch 1 - iter 8/8 - loss 0.64384363 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:54:52,116 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:52,116 EPOCH 1 done: loss 0.6438 - lr 0.0200000\n",
-      "2021-09-08 11:54:52,272 DEV : loss 3.5392324924468994 - score 0.0\n",
-      "2021-09-08 11:54:52,272 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:29:49,048 epoch 1 - iter 2/8 - loss 0.38605526 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 20:29:49,104 epoch 1 - iter 3/8 - loss 0.52126886 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 20:29:49,181 epoch 1 - iter 4/8 - loss 0.39226746 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 20:29:49,244 epoch 1 - iter 5/8 - loss 0.50426992 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:29:49,316 epoch 1 - iter 6/8 - loss 0.84270603 - samples/sec: 13.88 - lr: 0.020000\n",
+      "2021-09-21 20:29:49,395 epoch 1 - iter 7/8 - loss 0.74606643 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 20:29:49,579 epoch 1 - iter 8/8 - loss 0.69877466 - samples/sec: 5.46 - lr: 0.020000\n",
+      "2021-09-21 20:29:49,580 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:49,580 EPOCH 1 done: loss 0.6988 - lr 0.0200000\n",
+      "2021-09-21 20:29:49,625 DEV : loss 0.022620242089033127 - score 0.0\n",
+      "2021-09-21 20:29:49,626 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:29:53,203 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:53,307 epoch 2 - iter 1/8 - loss 0.01050166 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,365 epoch 2 - iter 2/8 - loss 0.00810849 - samples/sec: 17.40 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,431 epoch 2 - iter 3/8 - loss 0.29706104 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,495 epoch 2 - iter 4/8 - loss 0.23521874 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,553 epoch 2 - iter 5/8 - loss 0.27309652 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,610 epoch 2 - iter 6/8 - loss 0.31592078 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,754 epoch 2 - iter 7/8 - loss 0.32174814 - samples/sec: 7.00 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,814 epoch 2 - iter 8/8 - loss 0.28355403 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 20:29:53,815 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:53,815 EPOCH 2 done: loss 0.2836 - lr 0.0200000\n",
+      "2021-09-21 20:29:53,853 DEV : loss 0.0063673388212919235 - score 0.0\n",
+      "2021-09-21 20:29:53,854 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:54:56,320 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,391 epoch 2 - iter 1/8 - loss 0.30295345 - samples/sec: 18.66 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,439 epoch 2 - iter 2/8 - loss 0.17509301 - samples/sec: 21.18 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,491 epoch 2 - iter 3/8 - loss 0.11906102 - samples/sec: 19.46 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,538 epoch 2 - iter 4/8 - loss 0.30506519 - samples/sec: 21.12 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,586 epoch 2 - iter 5/8 - loss 0.35621578 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,703 epoch 2 - iter 6/8 - loss 0.30112924 - samples/sec: 8.63 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,755 epoch 2 - iter 7/8 - loss 0.28351623 - samples/sec: 19.31 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,804 epoch 2 - iter 8/8 - loss 0.25014619 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,806 EPOCH 2 done: loss 0.2501 - lr 0.0200000\n",
-      "2021-09-08 11:54:57,076 DEV : loss 2.4875829219818115 - score 0.0\n",
-      "2021-09-08 11:54:57,078 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:29:59,670 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:59,772 epoch 3 - iter 1/8 - loss 0.04990518 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 20:29:59,905 epoch 3 - iter 2/8 - loss 0.03868974 - samples/sec: 7.57 - lr: 0.020000\n",
+      "2021-09-21 20:29:59,971 epoch 3 - iter 3/8 - loss 0.07201873 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 20:30:00,029 epoch 3 - iter 4/8 - loss 0.46552872 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 20:30:00,089 epoch 3 - iter 5/8 - loss 0.37339748 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 20:30:00,150 epoch 3 - iter 6/8 - loss 0.43845429 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 20:30:00,211 epoch 3 - iter 7/8 - loss 0.37684783 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 20:30:00,265 epoch 3 - iter 8/8 - loss 0.33033017 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 20:30:00,266 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:00,266 EPOCH 3 done: loss 0.3303 - lr 0.0200000\n",
+      "2021-09-21 20:30:04,068 DEV : loss 0.006981702521443367 - score 0.0\n",
+      "2021-09-21 20:30:04,070 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:04,101 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,339 epoch 4 - iter 1/8 - loss 0.01082703 - samples/sec: 4.63 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,401 epoch 4 - iter 2/8 - loss 0.13592612 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,451 epoch 4 - iter 3/8 - loss 0.09163579 - samples/sec: 20.33 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,504 epoch 4 - iter 4/8 - loss 0.08037774 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,556 epoch 4 - iter 5/8 - loss 0.06540690 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,608 epoch 4 - iter 6/8 - loss 0.39977133 - samples/sec: 19.39 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,660 epoch 4 - iter 7/8 - loss 0.34483728 - samples/sec: 19.45 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,720 epoch 4 - iter 8/8 - loss 0.50069634 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,721 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,721 EPOCH 4 done: loss 0.5007 - lr 0.0200000\n",
+      "2021-09-21 20:30:04,753 DEV : loss 0.010255285538733006 - score 0.0\n",
+      "2021-09-21 20:30:04,753 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:30:04,756 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,822 epoch 5 - iter 1/8 - loss 0.00582366 - samples/sec: 19.39 - lr: 0.020000\n",
+      "2021-09-21 20:30:04,946 epoch 5 - iter 2/8 - loss 0.04989549 - samples/sec: 8.09 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,000 epoch 5 - iter 3/8 - loss 0.17944867 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,060 epoch 5 - iter 4/8 - loss 0.13675328 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,117 epoch 5 - iter 5/8 - loss 0.10978297 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,196 epoch 5 - iter 6/8 - loss 0.09740876 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,261 epoch 5 - iter 7/8 - loss 0.08372462 - samples/sec: 15.56 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,330 epoch 5 - iter 8/8 - loss 0.07353510 - samples/sec: 14.54 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,331 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:05,331 EPOCH 5 done: loss 0.0735 - lr 0.0200000\n",
+      "2021-09-21 20:30:05,468 DEV : loss 0.0029847919940948486 - score 0.0\n",
+      "2021-09-21 20:30:05,469 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:55:06,277 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:06,339 epoch 3 - iter 1/8 - loss 0.02396949 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,388 epoch 3 - iter 2/8 - loss 0.97635398 - samples/sec: 20.80 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,438 epoch 3 - iter 3/8 - loss 0.65173684 - samples/sec: 19.97 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,489 epoch 3 - iter 4/8 - loss 0.75131880 - samples/sec: 19.76 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,614 epoch 3 - iter 5/8 - loss 0.60782293 - samples/sec: 8.07 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,658 epoch 3 - iter 6/8 - loss 0.50760090 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,710 epoch 3 - iter 7/8 - loss 0.44054724 - samples/sec: 19.55 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,762 epoch 3 - iter 8/8 - loss 0.47511016 - samples/sec: 19.36 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,763 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:06,763 EPOCH 3 done: loss 0.4751 - lr 0.0200000\n",
-      "2021-09-08 11:55:06,917 DEV : loss 3.269986152648926 - score 0.0\n",
-      "2021-09-08 11:55:06,918 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:55:06,921 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:06,989 epoch 4 - iter 1/8 - loss 0.69458872 - samples/sec: 18.78 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,039 epoch 4 - iter 2/8 - loss 0.35181126 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,158 epoch 4 - iter 3/8 - loss 0.23533655 - samples/sec: 8.48 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,206 epoch 4 - iter 4/8 - loss 0.17691913 - samples/sec: 21.01 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,258 epoch 4 - iter 5/8 - loss 0.14280866 - samples/sec: 19.74 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,311 epoch 4 - iter 6/8 - loss 0.20028055 - samples/sec: 19.10 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,366 epoch 4 - iter 7/8 - loss 0.21512642 - samples/sec: 18.44 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,414 epoch 4 - iter 8/8 - loss 0.18921052 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 11:55:07,416 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:07,416 EPOCH 4 done: loss 0.1892 - lr 0.0200000\n",
-      "2021-09-08 11:55:07,669 DEV : loss 1.996071696281433 - score 0.0\n",
-      "2021-09-08 11:55:07,670 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:30:13,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:13,488 epoch 6 - iter 1/8 - loss 0.00075888 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,538 epoch 6 - iter 2/8 - loss 0.00104593 - samples/sec: 20.21 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,591 epoch 6 - iter 3/8 - loss 0.01598583 - samples/sec: 19.32 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,701 epoch 6 - iter 4/8 - loss 0.01902473 - samples/sec: 9.16 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,756 epoch 6 - iter 5/8 - loss 0.01567181 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,809 epoch 6 - iter 6/8 - loss 0.04375665 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,858 epoch 6 - iter 7/8 - loss 0.03898217 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,908 epoch 6 - iter 8/8 - loss 0.03422711 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 20:30:13,909 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:13,909 EPOCH 6 done: loss 0.0342 - lr 0.0200000\n",
+      "2021-09-21 20:30:16,022 DEV : loss 0.0021173087880015373 - score 0.0\n",
+      "2021-09-21 20:30:16,023 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:55:16,274 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:16,335 epoch 5 - iter 1/8 - loss 0.00223211 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,392 epoch 5 - iter 2/8 - loss 0.05595297 - samples/sec: 17.91 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,446 epoch 5 - iter 3/8 - loss 0.07301334 - samples/sec: 18.39 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,578 epoch 5 - iter 4/8 - loss 0.08028064 - samples/sec: 7.65 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,627 epoch 5 - iter 5/8 - loss 0.06466018 - samples/sec: 20.42 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,675 epoch 5 - iter 6/8 - loss 0.10973024 - samples/sec: 21.22 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,720 epoch 5 - iter 7/8 - loss 0.09517161 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,764 epoch 5 - iter 8/8 - loss 0.08434120 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,765 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:16,765 EPOCH 5 done: loss 0.0843 - lr 0.0200000\n",
-      "2021-09-08 11:55:16,920 DEV : loss 3.123861074447632 - score 0.0\n",
-      "2021-09-08 11:55:16,921 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:55:16,923 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:17,052 epoch 6 - iter 1/8 - loss 0.00179303 - samples/sec: 8.73 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,099 epoch 6 - iter 2/8 - loss 0.18726665 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,143 epoch 6 - iter 3/8 - loss 0.12501602 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,187 epoch 6 - iter 4/8 - loss 0.09443525 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,234 epoch 6 - iter 5/8 - loss 0.07575998 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,283 epoch 6 - iter 6/8 - loss 0.06331174 - samples/sec: 20.60 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,331 epoch 6 - iter 7/8 - loss 0.07105215 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,382 epoch 6 - iter 8/8 - loss 0.11508020 - samples/sec: 19.62 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:17,384 EPOCH 6 done: loss 0.1151 - lr 0.0200000\n",
-      "2021-09-08 11:55:17,537 DEV : loss 2.8112120628356934 - score 0.0\n",
-      "2021-09-08 11:55:17,538 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:55:17,540 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:17,607 epoch 7 - iter 1/8 - loss 0.03196156 - samples/sec: 18.91 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,656 epoch 7 - iter 2/8 - loss 0.01614846 - samples/sec: 21.09 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,704 epoch 7 - iter 3/8 - loss 0.01099111 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,753 epoch 7 - iter 4/8 - loss 0.00838178 - samples/sec: 20.67 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,869 epoch 7 - iter 5/8 - loss 0.00708344 - samples/sec: 8.65 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,922 epoch 7 - iter 6/8 - loss 0.00600178 - samples/sec: 19.18 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,971 epoch 7 - iter 7/8 - loss 0.00786574 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 11:55:18,025 epoch 7 - iter 8/8 - loss 0.01087246 - samples/sec: 18.69 - lr: 0.020000\n",
-      "2021-09-08 11:55:18,026 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:18,026 EPOCH 7 done: loss 0.0109 - lr 0.0200000\n",
-      "2021-09-08 11:55:18,280 DEV : loss 1.985710620880127 - score 0.0\n"
+      "2021-09-21 20:30:23,285 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:23,362 epoch 7 - iter 1/8 - loss 0.00428819 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,415 epoch 7 - iter 2/8 - loss 0.00284092 - samples/sec: 18.86 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,466 epoch 7 - iter 3/8 - loss 0.21821885 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,585 epoch 7 - iter 4/8 - loss 0.16391637 - samples/sec: 8.49 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,635 epoch 7 - iter 5/8 - loss 0.13205793 - samples/sec: 20.06 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,684 epoch 7 - iter 6/8 - loss 0.11012672 - samples/sec: 20.64 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,733 epoch 7 - iter 7/8 - loss 0.09538547 - samples/sec: 20.58 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,781 epoch 7 - iter 8/8 - loss 0.08352420 - samples/sec: 21.02 - lr: 0.020000\n",
+      "2021-09-21 20:30:23,782 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:23,782 EPOCH 7 done: loss 0.0835 - lr 0.0200000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:55:18,281 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:30:23,972 DEV : loss 0.0026113828644156456 - score 0.0\n",
+      "2021-09-21 20:30:23,972 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:24,081 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:24,207 epoch 8 - iter 1/8 - loss 0.00076187 - samples/sec: 9.01 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,256 epoch 8 - iter 2/8 - loss 0.00062283 - samples/sec: 20.66 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,305 epoch 8 - iter 3/8 - loss 0.00066420 - samples/sec: 20.69 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,359 epoch 8 - iter 4/8 - loss 0.00075769 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,407 epoch 8 - iter 5/8 - loss 0.00077443 - samples/sec: 21.13 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,456 epoch 8 - iter 6/8 - loss 0.00078383 - samples/sec: 20.46 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,505 epoch 8 - iter 7/8 - loss 0.00089143 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,556 epoch 8 - iter 8/8 - loss 0.00516759 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 20:30:24,557 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:24,558 EPOCH 8 done: loss 0.0052 - lr 0.0200000\n",
+      "2021-09-21 20:30:24,707 DEV : loss 0.0013444427167996764 - score 0.0\n",
+      "2021-09-21 20:30:24,708 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:55:24,224 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:24,285 epoch 8 - iter 1/8 - loss 0.00052424 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,332 epoch 8 - iter 2/8 - loss 0.00060914 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,382 epoch 8 - iter 3/8 - loss 0.00062069 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,426 epoch 8 - iter 4/8 - loss 0.00059383 - samples/sec: 22.87 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,478 epoch 8 - iter 5/8 - loss 0.01102361 - samples/sec: 19.23 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,526 epoch 8 - iter 6/8 - loss 0.00930053 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,572 epoch 8 - iter 7/8 - loss 0.00822681 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,686 epoch 8 - iter 8/8 - loss 0.00739407 - samples/sec: 8.84 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,687 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:24,687 EPOCH 8 done: loss 0.0074 - lr 0.0200000\n",
-      "2021-09-08 11:55:25,871 DEV : loss 1.301349401473999 - score 0.0\n",
-      "2021-09-08 11:55:25,872 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:30:29,236 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:29,471 epoch 9 - iter 1/8 - loss 0.00063283 - samples/sec: 5.03 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,540 epoch 9 - iter 2/8 - loss 0.00045775 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,594 epoch 9 - iter 3/8 - loss 0.00048265 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,662 epoch 9 - iter 4/8 - loss 0.00916167 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,738 epoch 9 - iter 5/8 - loss 0.00743020 - samples/sec: 13.23 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,803 epoch 9 - iter 6/8 - loss 0.00628448 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,889 epoch 9 - iter 7/8 - loss 0.00554850 - samples/sec: 11.76 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,956 epoch 9 - iter 8/8 - loss 0.00521222 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 20:30:29,957 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:29,958 EPOCH 9 done: loss 0.0052 - lr 0.0200000\n",
+      "2021-09-21 20:30:30,085 DEV : loss 0.0006397567340172827 - score 0.0\n",
+      "2021-09-21 20:30:30,087 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:55:31,354 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:31,416 epoch 9 - iter 1/8 - loss 0.00038501 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,460 epoch 9 - iter 2/8 - loss 0.00049213 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,505 epoch 9 - iter 3/8 - loss 0.00065568 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,560 epoch 9 - iter 4/8 - loss 0.49061142 - samples/sec: 18.22 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,612 epoch 9 - iter 5/8 - loss 0.39256696 - samples/sec: 19.37 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,661 epoch 9 - iter 6/8 - loss 0.32723506 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,706 epoch 9 - iter 7/8 - loss 0.28058101 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,820 epoch 9 - iter 8/8 - loss 0.24602363 - samples/sec: 8.82 - lr: 0.020000\n",
-      "2021-09-08 11:55:31,821 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:31,822 EPOCH 9 done: loss 0.2460 - lr 0.0200000\n",
-      "2021-09-08 11:55:32,462 DEV : loss 1.8463096618652344 - score 0.0\n",
-      "2021-09-08 11:55:32,463 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:55:32,466 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:32,527 epoch 10 - iter 1/8 - loss 0.00033666 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,575 epoch 10 - iter 2/8 - loss 0.00054141 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,619 epoch 10 - iter 3/8 - loss 0.00179380 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,668 epoch 10 - iter 4/8 - loss 0.00152861 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,712 epoch 10 - iter 5/8 - loss 0.00343682 - samples/sec: 22.90 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,825 epoch 10 - iter 6/8 - loss 0.00310475 - samples/sec: 8.85 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,870 epoch 10 - iter 7/8 - loss 0.00277153 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,914 epoch 10 - iter 8/8 - loss 0.00256098 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,915 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:32,916 EPOCH 10 done: loss 0.0026 - lr 0.0200000\n",
-      "2021-09-08 11:55:33,071 DEV : loss 1.8827195167541504 - score 0.0\n",
-      "2021-09-08 11:55:33,072 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:55:40,138 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:40,139 Testing using best model ...\n",
-      "2021-09-08 11:55:40,140 loading file None/best-model.pt\n",
+      "2021-09-21 20:30:35,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:36,108 epoch 10 - iter 1/8 - loss 0.00063217 - samples/sec: 10.00 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,180 epoch 10 - iter 2/8 - loss 0.00082188 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,252 epoch 10 - iter 3/8 - loss 0.00090087 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,320 epoch 10 - iter 4/8 - loss 0.00922505 - samples/sec: 14.68 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,476 epoch 10 - iter 5/8 - loss 0.00747522 - samples/sec: 6.43 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,529 epoch 10 - iter 6/8 - loss 0.00626635 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,591 epoch 10 - iter 7/8 - loss 0.00552347 - samples/sec: 16.17 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,679 epoch 10 - iter 8/8 - loss 0.00488269 - samples/sec: 11.41 - lr: 0.020000\n",
+      "2021-09-21 20:30:36,680 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:36,681 EPOCH 10 done: loss 0.0049 - lr 0.0200000\n",
+      "2021-09-21 20:30:36,825 DEV : loss 0.0008518965332768857 - score 0.0\n",
+      "2021-09-21 20:30:36,826 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:46,590 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:46,591 Testing using best model ...\n",
+      "2021-09-21 20:30:46,592 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:55:45,756 \t1.0\n",
-      "2021-09-08 11:55:45,757 \n",
+      "2021-09-21 20:30:51,787 \t1.0\n",
+      "2021-09-21 20:30:51,787 \n",
       "Results:\n",
       "- F-score (micro) 1.0\n",
       "- F-score (macro) 0.1111\n",
@@ -4416,38 +4437,38 @@
       "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
       "                This text is about Health     0.0000    0.0000    0.0000         0\n",
       " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
-      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
       "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
       " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
       "     This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
       "    This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
-      "                This text is about Sports     1.0000    1.0000    1.0000         1\n",
+      " This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
+      " This text is about Entertainment & Music     1.0000    1.0000    1.0000         1\n",
       "\n",
       "                                micro avg     1.0000    1.0000    1.0000         1\n",
       "                                macro avg     0.1111    0.1111    0.1111         1\n",
       "                             weighted avg     1.0000    1.0000    1.0000         1\n",
       "                              samples avg     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "2021-09-08 11:55:45,757 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:00,148 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:30:51,787 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:13,943 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:56:04,453 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:31:18,175 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 11785.43it/s]"
+      "100%|██████████| 9/9 [00:00<00:00, 12994.40it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:04,456 [b'This text is about Family & Relationships', b'This text is about Science & Mathematics', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government', b'This text is about Entertainment & Music']\n",
-      "2021-09-08 11:56:04,589 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:04,591 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:31:18,178 [b'This text is about Family & Relationships', b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Politics & Government', b'This text is about Entertainment & Music']\n",
+      "2021-09-21 20:31:18,307 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,309 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4760,24 +4781,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:04,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:04,592 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:56:04,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:04,592 Parameters:\n",
-      "2021-09-08 11:56:04,593  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:56:04,593  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:56:04,593  - patience: \"3\"\n",
-      "2021-09-08 11:56:04,594  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:56:04,594  - max_epochs: \"10\"\n",
-      "2021-09-08 11:56:04,594  - shuffle: \"True\"\n",
-      "2021-09-08 11:56:04,595  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:56:04,595  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:56:04,595 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:04,595 Model training base path: \"None\"\n",
-      "2021-09-08 11:56:04,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:04,596 Device: cuda:1\n",
-      "2021-09-08 11:56:04,596 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:04,597 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:31:18,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,310 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:31:18,310 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,310 Parameters:\n",
+      "2021-09-21 20:31:18,311  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:31:18,311  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:31:18,311  - patience: \"3\"\n",
+      "2021-09-21 20:31:18,312  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:31:18,312  - max_epochs: \"10\"\n",
+      "2021-09-21 20:31:18,312  - shuffle: \"True\"\n",
+      "2021-09-21 20:31:18,312  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:31:18,313  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:31:18,313 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,313 Model training base path: \"None\"\n",
+      "2021-09-21 20:31:18,314 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,314 Device: cuda:0\n",
+      "2021-09-21 20:31:18,314 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,314 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -4791,195 +4812,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:04,785 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:04,905 epoch 1 - iter 1/8 - loss 0.64405578 - samples/sec: 9.54 - lr: 0.020000\n",
-      "2021-09-08 11:56:04,975 epoch 1 - iter 2/8 - loss 0.33548992 - samples/sec: 14.41 - lr: 0.020000\n",
-      "2021-09-08 11:56:05,027 epoch 1 - iter 3/8 - loss 0.66982261 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 11:56:05,075 epoch 1 - iter 4/8 - loss 0.97767373 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:56:05,121 epoch 1 - iter 5/8 - loss 0.78694420 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 11:56:05,169 epoch 1 - iter 6/8 - loss 0.91906311 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 11:56:05,221 epoch 1 - iter 7/8 - loss 0.88397372 - samples/sec: 19.14 - lr: 0.020000\n",
-      "2021-09-08 11:56:05,360 epoch 1 - iter 8/8 - loss 1.05412258 - samples/sec: 7.23 - lr: 0.020000\n",
-      "2021-09-08 11:56:05,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:05,361 EPOCH 1 done: loss 1.0541 - lr 0.0200000\n",
-      "2021-09-08 11:56:05,543 DEV : loss 0.17257744073867798 - score 0.0\n",
-      "2021-09-08 11:56:05,544 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:31:18,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,600 epoch 1 - iter 1/8 - loss 0.40420809 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 20:31:18,664 epoch 1 - iter 2/8 - loss 0.24545557 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 20:31:18,724 epoch 1 - iter 3/8 - loss 0.17439630 - samples/sec: 16.65 - lr: 0.020000\n",
+      "2021-09-21 20:31:18,781 epoch 1 - iter 4/8 - loss 0.13473616 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 20:31:18,838 epoch 1 - iter 5/8 - loss 0.43354729 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 20:31:18,898 epoch 1 - iter 6/8 - loss 0.39316634 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 20:31:18,957 epoch 1 - iter 7/8 - loss 0.49249419 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,015 epoch 1 - iter 8/8 - loss 0.44041072 - samples/sec: 17.34 - lr: 0.020000\n",
+      "2021-09-21 20:31:19,016 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:19,016 EPOCH 1 done: loss 0.4404 - lr 0.0200000\n",
+      "2021-09-21 20:31:19,311 DEV : loss 0.3199937343597412 - score 0.0\n",
+      "2021-09-21 20:31:19,311 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:31:23,494 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,567 epoch 2 - iter 1/8 - loss 0.00392136 - samples/sec: 18.41 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,626 epoch 2 - iter 2/8 - loss 0.00292271 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,701 epoch 2 - iter 3/8 - loss 0.01292837 - samples/sec: 13.43 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,763 epoch 2 - iter 4/8 - loss 0.34073776 - samples/sec: 16.32 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,841 epoch 2 - iter 5/8 - loss 0.32236480 - samples/sec: 12.91 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,904 epoch 2 - iter 6/8 - loss 0.26914656 - samples/sec: 15.86 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,977 epoch 2 - iter 7/8 - loss 0.23123500 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,055 epoch 2 - iter 8/8 - loss 0.20278187 - samples/sec: 12.80 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,056 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:24,057 EPOCH 2 done: loss 0.2028 - lr 0.0200000\n",
+      "2021-09-21 20:31:24,091 DEV : loss 0.15791422128677368 - score 0.0\n",
+      "2021-09-21 20:31:24,092 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:56:10,516 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:10,689 epoch 2 - iter 1/8 - loss 0.32568860 - samples/sec: 6.37 - lr: 0.020000\n",
-      "2021-09-08 11:56:10,797 epoch 2 - iter 2/8 - loss 0.47038910 - samples/sec: 9.37 - lr: 0.020000\n",
-      "2021-09-08 11:56:10,845 epoch 2 - iter 3/8 - loss 0.67101783 - samples/sec: 20.90 - lr: 0.020000\n",
-      "2021-09-08 11:56:10,897 epoch 2 - iter 4/8 - loss 0.59458901 - samples/sec: 19.57 - lr: 0.020000\n",
-      "2021-09-08 11:56:10,944 epoch 2 - iter 5/8 - loss 0.47684642 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 11:56:10,991 epoch 2 - iter 6/8 - loss 0.63245645 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,042 epoch 2 - iter 7/8 - loss 0.59020754 - samples/sec: 20.10 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,111 epoch 2 - iter 8/8 - loss 0.54922572 - samples/sec: 14.57 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,112 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:11,112 EPOCH 2 done: loss 0.5492 - lr 0.0200000\n",
-      "2021-09-08 11:56:11,288 DEV : loss 0.2622780203819275 - score 0.0\n",
-      "2021-09-08 11:56:11,289 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:56:11,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:11,478 epoch 3 - iter 1/8 - loss 0.26568994 - samples/sec: 9.81 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,529 epoch 3 - iter 2/8 - loss 0.43821032 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,576 epoch 3 - iter 3/8 - loss 0.54251708 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,621 epoch 3 - iter 4/8 - loss 0.40981514 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,760 epoch 3 - iter 5/8 - loss 0.34640826 - samples/sec: 7.22 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,811 epoch 3 - iter 6/8 - loss 0.30761786 - samples/sec: 19.88 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,857 epoch 3 - iter 7/8 - loss 0.44424205 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,924 epoch 3 - iter 8/8 - loss 0.38912142 - samples/sec: 15.17 - lr: 0.020000\n",
-      "2021-09-08 11:56:11,925 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:11,925 EPOCH 3 done: loss 0.3891 - lr 0.0200000\n",
-      "2021-09-08 11:56:12,321 DEV : loss 0.1465112715959549 - score 0.0\n",
-      "2021-09-08 11:56:12,322 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:31:33,121 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:33,242 epoch 3 - iter 1/8 - loss 1.00806320 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,348 epoch 3 - iter 2/8 - loss 0.52813059 - samples/sec: 9.43 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,450 epoch 3 - iter 3/8 - loss 0.35806475 - samples/sec: 9.93 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,536 epoch 3 - iter 4/8 - loss 0.63773302 - samples/sec: 11.60 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,633 epoch 3 - iter 5/8 - loss 0.51056147 - samples/sec: 10.38 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,721 epoch 3 - iter 6/8 - loss 0.42628068 - samples/sec: 11.41 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,806 epoch 3 - iter 7/8 - loss 0.36562253 - samples/sec: 11.85 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,932 epoch 3 - iter 8/8 - loss 0.43659113 - samples/sec: 7.93 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:33,934 EPOCH 3 done: loss 0.4366 - lr 0.0200000\n",
+      "2021-09-21 20:31:34,035 DEV : loss 0.002168857492506504 - score 0.0\n",
+      "2021-09-21 20:31:34,040 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:56:16,312 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:16,379 epoch 4 - iter 1/8 - loss 0.68882591 - samples/sec: 19.72 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,428 epoch 4 - iter 2/8 - loss 0.35019453 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,480 epoch 4 - iter 3/8 - loss 0.51757261 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,557 epoch 4 - iter 4/8 - loss 0.39091097 - samples/sec: 13.21 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,699 epoch 4 - iter 5/8 - loss 0.32528253 - samples/sec: 7.05 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,805 epoch 4 - iter 6/8 - loss 0.35742894 - samples/sec: 9.54 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,857 epoch 4 - iter 7/8 - loss 0.31469377 - samples/sec: 19.28 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,910 epoch 4 - iter 8/8 - loss 0.28400035 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 11:56:16,912 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:16,912 EPOCH 4 done: loss 0.2840 - lr 0.0200000\n",
-      "2021-09-08 11:56:17,095 DEV : loss 0.4475221037864685 - score 0.0\n",
-      "2021-09-08 11:56:17,096 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:56:17,174 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:17,236 epoch 5 - iter 1/8 - loss 1.35481155 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,284 epoch 5 - iter 2/8 - loss 0.71589060 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,351 epoch 5 - iter 3/8 - loss 0.47883219 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,452 epoch 5 - iter 4/8 - loss 0.71564765 - samples/sec: 9.94 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,500 epoch 5 - iter 5/8 - loss 0.57539320 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,548 epoch 5 - iter 6/8 - loss 0.48363920 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,683 epoch 5 - iter 7/8 - loss 0.41570200 - samples/sec: 7.43 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,731 epoch 5 - iter 8/8 - loss 0.38173670 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 11:56:17,732 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:17,732 EPOCH 5 done: loss 0.3817 - lr 0.0200000\n",
-      "2021-09-08 11:56:17,910 DEV : loss 0.27891045808792114 - score 0.0\n",
-      "2021-09-08 11:56:17,911 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:56:17,983 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:18,098 epoch 6 - iter 1/8 - loss 0.50438958 - samples/sec: 9.90 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,146 epoch 6 - iter 2/8 - loss 0.30294941 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,282 epoch 6 - iter 3/8 - loss 0.20312670 - samples/sec: 7.34 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,352 epoch 6 - iter 4/8 - loss 0.36860452 - samples/sec: 14.43 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,398 epoch 6 - iter 5/8 - loss 0.29499964 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,445 epoch 6 - iter 6/8 - loss 0.24738449 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,492 epoch 6 - iter 7/8 - loss 0.21764549 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,542 epoch 6 - iter 8/8 - loss 0.23033247 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 11:56:18,543 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:18,544 EPOCH 6 done: loss 0.2303 - lr 0.0200000\n",
-      "2021-09-08 11:56:18,726 DEV : loss 0.13986240327358246 - score 0.0\n",
-      "2021-09-08 11:56:18,727 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:31:39,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:39,894 epoch 4 - iter 1/8 - loss 0.33345053 - samples/sec: 9.65 - lr: 0.020000\n",
+      "2021-09-21 20:31:39,989 epoch 4 - iter 2/8 - loss 0.17205190 - samples/sec: 10.67 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,052 epoch 4 - iter 3/8 - loss 0.11547208 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,134 epoch 4 - iter 4/8 - loss 0.25335937 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,217 epoch 4 - iter 5/8 - loss 0.20504437 - samples/sec: 12.15 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,298 epoch 4 - iter 6/8 - loss 0.17113800 - samples/sec: 12.39 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,389 epoch 4 - iter 7/8 - loss 0.14698011 - samples/sec: 11.05 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,524 epoch 4 - iter 8/8 - loss 0.13191130 - samples/sec: 7.45 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,525 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:40,525 EPOCH 4 done: loss 0.1319 - lr 0.0200000\n",
+      "2021-09-21 20:31:40,586 DEV : loss 0.034129153937101364 - score 0.0\n",
+      "2021-09-21 20:31:40,589 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:40,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:40,704 epoch 5 - iter 1/8 - loss 0.00154135 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,794 epoch 5 - iter 2/8 - loss 0.02442778 - samples/sec: 11.14 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,863 epoch 5 - iter 3/8 - loss 0.02474483 - samples/sec: 14.73 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,992 epoch 5 - iter 4/8 - loss 0.01873368 - samples/sec: 7.75 - lr: 0.020000\n",
+      "2021-09-21 20:31:41,093 epoch 5 - iter 5/8 - loss 0.01569876 - samples/sec: 9.96 - lr: 0.020000\n",
+      "2021-09-21 20:31:41,179 epoch 5 - iter 6/8 - loss 0.01325923 - samples/sec: 11.75 - lr: 0.020000\n",
+      "2021-09-21 20:31:41,269 epoch 5 - iter 7/8 - loss 0.05774913 - samples/sec: 11.05 - lr: 0.020000\n",
+      "2021-09-21 20:31:41,343 epoch 5 - iter 8/8 - loss 0.07635235 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 20:31:41,344 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:41,345 EPOCH 5 done: loss 0.0764 - lr 0.0200000\n",
+      "2021-09-21 20:31:41,395 DEV : loss 0.0007516813348047435 - score 0.0\n",
+      "2021-09-21 20:31:41,398 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:56:26,777 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:26,845 epoch 7 - iter 1/8 - loss 0.02964369 - samples/sec: 19.13 - lr: 0.020000\n",
-      "2021-09-08 11:56:26,896 epoch 7 - iter 2/8 - loss 0.02146134 - samples/sec: 20.03 - lr: 0.020000\n",
-      "2021-09-08 11:56:26,943 epoch 7 - iter 3/8 - loss 0.03479047 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,087 epoch 7 - iter 4/8 - loss 0.02664915 - samples/sec: 6.93 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,192 epoch 7 - iter 5/8 - loss 0.04813235 - samples/sec: 9.58 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,260 epoch 7 - iter 6/8 - loss 0.04022276 - samples/sec: 14.92 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,307 epoch 7 - iter 7/8 - loss 0.03453289 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,357 epoch 7 - iter 8/8 - loss 0.19385525 - samples/sec: 20.27 - lr: 0.020000\n"
+      "2021-09-21 20:31:45,432 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:45,590 epoch 6 - iter 1/8 - loss 0.00259694 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 20:31:45,724 epoch 6 - iter 2/8 - loss 0.00408820 - samples/sec: 7.49 - lr: 0.020000\n",
+      "2021-09-21 20:31:45,829 epoch 6 - iter 3/8 - loss 0.00447096 - samples/sec: 9.53 - lr: 0.020000\n",
+      "2021-09-21 20:31:45,915 epoch 6 - iter 4/8 - loss 0.00358459 - samples/sec: 11.73 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,008 epoch 6 - iter 5/8 - loss 0.00355289 - samples/sec: 10.82 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,077 epoch 6 - iter 6/8 - loss 0.01763656 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,173 epoch 6 - iter 7/8 - loss 0.01542949 - samples/sec: 10.45 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,250 epoch 6 - iter 8/8 - loss 0.01366180 - samples/sec: 13.11 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,251 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:46,252 EPOCH 6 done: loss 0.0137 - lr 0.0200000\n",
+      "2021-09-21 20:31:46,303 DEV : loss 0.03476882353425026 - score 0.0\n",
+      "2021-09-21 20:31:46,304 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:46,306 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:46,429 epoch 7 - iter 1/8 - loss 0.00041078 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,509 epoch 7 - iter 2/8 - loss 0.00036776 - samples/sec: 12.54 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,582 epoch 7 - iter 3/8 - loss 0.00065266 - samples/sec: 13.75 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,664 epoch 7 - iter 4/8 - loss 0.00164369 - samples/sec: 12.30 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,752 epoch 7 - iter 5/8 - loss 0.00145994 - samples/sec: 11.41 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,816 epoch 7 - iter 6/8 - loss 0.00132554 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,885 epoch 7 - iter 7/8 - loss 0.00130288 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 20:31:46,961 epoch 7 - iter 8/8 - loss 0.00121571 - samples/sec: 13.36 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:27,358 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:27,359 EPOCH 7 done: loss 0.1939 - lr 0.0200000\n",
-      "2021-09-08 11:56:27,438 DEV : loss 0.4843616485595703 - score 0.0\n",
-      "2021-09-08 11:56:27,439 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:56:27,440 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:27,594 epoch 8 - iter 1/8 - loss 0.00073592 - samples/sec: 7.23 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,662 epoch 8 - iter 2/8 - loss 0.00074382 - samples/sec: 14.94 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,709 epoch 8 - iter 3/8 - loss 0.00392381 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,757 epoch 8 - iter 4/8 - loss 0.00326961 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,802 epoch 8 - iter 5/8 - loss 0.00274756 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,849 epoch 8 - iter 6/8 - loss 0.01722021 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:56:27,897 epoch 8 - iter 7/8 - loss 0.09317775 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 11:56:28,000 epoch 8 - iter 8/8 - loss 0.15788146 - samples/sec: 9.68 - lr: 0.020000\n",
-      "2021-09-08 11:56:28,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:28,002 EPOCH 8 done: loss 0.1579 - lr 0.0200000\n",
-      "2021-09-08 11:56:28,173 DEV : loss 0.10447869449853897 - score 0.0\n",
-      "2021-09-08 11:56:28,174 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:31:46,962 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:46,962 EPOCH 7 done: loss 0.0012 - lr 0.0200000\n",
+      "2021-09-21 20:31:47,002 DEV : loss 0.03988579660654068 - score 0.0\n",
+      "2021-09-21 20:31:47,004 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:31:47,006 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:47,115 epoch 8 - iter 1/8 - loss 0.00049090 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,179 epoch 8 - iter 2/8 - loss 0.04004188 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,236 epoch 8 - iter 3/8 - loss 0.02686392 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,308 epoch 8 - iter 4/8 - loss 0.02051885 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,400 epoch 8 - iter 5/8 - loss 0.01659800 - samples/sec: 10.94 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,482 epoch 8 - iter 6/8 - loss 0.03681022 - samples/sec: 12.25 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,548 epoch 8 - iter 7/8 - loss 0.03395387 - samples/sec: 15.30 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,641 epoch 8 - iter 8/8 - loss 0.02975913 - samples/sec: 10.70 - lr: 0.020000\n",
+      "2021-09-21 20:31:47,642 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:47,643 EPOCH 8 done: loss 0.0298 - lr 0.0200000\n",
+      "2021-09-21 20:31:47,771 DEV : loss 0.0005690588150173426 - score 0.0\n",
+      "2021-09-21 20:31:47,774 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:56:34,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:34,191 epoch 9 - iter 1/8 - loss 0.00682127 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,345 epoch 9 - iter 2/8 - loss 0.09719094 - samples/sec: 6.51 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,389 epoch 9 - iter 3/8 - loss 0.07167658 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,436 epoch 9 - iter 4/8 - loss 0.05387349 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,504 epoch 9 - iter 5/8 - loss 0.04334075 - samples/sec: 14.80 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,605 epoch 9 - iter 6/8 - loss 0.04741748 - samples/sec: 9.92 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,652 epoch 9 - iter 7/8 - loss 0.04091580 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,700 epoch 9 - iter 8/8 - loss 0.03587657 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,701 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:34,701 EPOCH 9 done: loss 0.0359 - lr 0.0200000\n",
-      "2021-09-08 11:56:34,779 DEV : loss 0.2798992991447449 - score 0.0\n",
-      "2021-09-08 11:56:34,780 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:56:34,782 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:34,842 epoch 10 - iter 1/8 - loss 0.00356519 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,887 epoch 10 - iter 2/8 - loss 0.00195393 - samples/sec: 22.66 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,931 epoch 10 - iter 3/8 - loss 0.00400248 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 11:56:34,978 epoch 10 - iter 4/8 - loss 0.00315668 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 11:56:35,077 epoch 10 - iter 5/8 - loss 0.00284827 - samples/sec: 10.15 - lr: 0.020000\n",
-      "2021-09-08 11:56:35,124 epoch 10 - iter 6/8 - loss 0.19811116 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:56:35,190 epoch 10 - iter 7/8 - loss 0.17004065 - samples/sec: 15.20 - lr: 0.020000\n",
-      "2021-09-08 11:56:35,326 epoch 10 - iter 8/8 - loss 0.14897422 - samples/sec: 7.39 - lr: 0.020000\n",
-      "2021-09-08 11:56:35,327 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:35,327 EPOCH 10 done: loss 0.1490 - lr 0.0200000\n",
-      "2021-09-08 11:56:35,404 DEV : loss 0.2626461684703827 - score 0.0\n",
-      "2021-09-08 11:56:35,405 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:56:39,664 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:39,665 Testing using best model ...\n",
-      "2021-09-08 11:56:39,666 loading file None/best-model.pt\n",
+      "2021-09-21 20:31:57,884 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:57,983 epoch 9 - iter 1/8 - loss 0.00218834 - samples/sec: 12.96 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,062 epoch 9 - iter 2/8 - loss 0.00133911 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,131 epoch 9 - iter 3/8 - loss 0.00121949 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,232 epoch 9 - iter 4/8 - loss 0.00123830 - samples/sec: 9.87 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,300 epoch 9 - iter 5/8 - loss 0.00108432 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,364 epoch 9 - iter 6/8 - loss 0.03148676 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,428 epoch 9 - iter 7/8 - loss 0.02703300 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,513 epoch 9 - iter 8/8 - loss 0.02378949 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,514 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:58,514 EPOCH 9 done: loss 0.0238 - lr 0.0200000\n",
+      "2021-09-21 20:31:58,557 DEV : loss 0.02225431054830551 - score 0.0\n",
+      "2021-09-21 20:31:58,558 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:58,561 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:58,665 epoch 10 - iter 1/8 - loss 0.00046589 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,753 epoch 10 - iter 2/8 - loss 0.00054973 - samples/sec: 11.42 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,817 epoch 10 - iter 3/8 - loss 0.00054426 - samples/sec: 15.71 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,885 epoch 10 - iter 4/8 - loss 0.02520328 - samples/sec: 14.71 - lr: 0.020000\n",
+      "2021-09-21 20:31:58,940 epoch 10 - iter 5/8 - loss 0.02036642 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 20:31:59,021 epoch 10 - iter 6/8 - loss 0.01711090 - samples/sec: 12.49 - lr: 0.020000\n",
+      "2021-09-21 20:31:59,077 epoch 10 - iter 7/8 - loss 0.01472267 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 20:31:59,131 epoch 10 - iter 8/8 - loss 0.01295063 - samples/sec: 18.96 - lr: 0.020000\n",
+      "2021-09-21 20:31:59,132 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:59,132 EPOCH 10 done: loss 0.0130 - lr 0.0200000\n",
+      "2021-09-21 20:31:59,165 DEV : loss 0.0013343091122806072 - score 0.0\n",
+      "2021-09-21 20:31:59,165 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:32:03,323 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:03,324 Testing using best model ...\n",
+      "2021-09-21 20:32:03,325 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:56:45,076 \t1.0\n",
-      "2021-09-08 11:56:45,076 \n",
+      "2021-09-21 20:32:08,226 \t0.0\n",
+      "2021-09-21 20:32:08,227 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1111\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                           precision    recall  f1-score   support\n",
       "\n",
       "This text is about Family & Relationships     0.0000    0.0000    0.0000         0\n",
+      "                This text is about Health     0.0000    0.0000    0.0000         0\n",
       " This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
       "  This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
       " This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
       "                This text is about Sports     0.0000    0.0000    0.0000         0\n",
       "     This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
-      "    This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
       " This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
-      " This text is about Entertainment & Music     1.0000    1.0000    1.0000         1\n",
+      " This text is about Entertainment & Music     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                micro avg     1.0000    1.0000    1.0000         1\n",
-      "                                macro avg     0.1111    0.1111    0.1111         1\n",
-      "                             weighted avg     1.0000    1.0000    1.0000         1\n",
-      "                              samples avg     1.0000    1.0000    1.0000         1\n",
+      "                                micro avg     0.0000    0.0000    0.0000         1\n",
+      "                                macro avg     0.0000    0.0000    0.0000         1\n",
+      "                             weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                              samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:56:45,077 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:59,392 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:32:08,227 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:28,035 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:57:03,731 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:32:32,368 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 9/9 [00:00<00:00, 9612.61it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:32:32,371 [b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Society & Culture', b'This text is about Business & Finance', b'This text is about Politics & Government', b'This text is about Entertainment & Music']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 9/9 [00:00<00:00, 17427.86it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:57:03,734 [b'This text is about Health', b'This text is about Science & Mathematics', b'This text is about Entertainment & Music', b'This text is about Computers & Internet', b'This text is about Education & Reference', b'This text is about Sports', b'This text is about Business & Finance', b'This text is about Politics & Government', b'This text is about Society & Culture']\n",
-      "2021-09-08 11:57:03,916 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:03,918 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:32:37,822 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,824 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5292,183 +5327,171 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:57:03,919 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:03,919 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 11:57:03,920 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:03,920 Parameters:\n",
-      "2021-09-08 11:57:03,920  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:57:03,920  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:57:03,921  - patience: \"3\"\n",
-      "2021-09-08 11:57:03,921  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:57:03,921  - max_epochs: \"10\"\n",
-      "2021-09-08 11:57:03,921  - shuffle: \"True\"\n",
-      "2021-09-08 11:57:03,922  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:57:03,922  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:57:03,922 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:03,923 Model training base path: \"None\"\n",
-      "2021-09-08 11:57:03,923 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:03,923 Device: cuda:1\n",
-      "2021-09-08 11:57:03,923 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:03,924 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:57:04,139 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:04,220 epoch 1 - iter 1/8 - loss 0.59931928 - samples/sec: 15.38 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,267 epoch 1 - iter 2/8 - loss 0.45666437 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,315 epoch 1 - iter 3/8 - loss 0.32052529 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,370 epoch 1 - iter 4/8 - loss 0.24238680 - samples/sec: 18.32 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,422 epoch 1 - iter 5/8 - loss 0.53997677 - samples/sec: 19.16 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,469 epoch 1 - iter 6/8 - loss 0.88629841 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,517 epoch 1 - iter 7/8 - loss 0.82273638 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,587 epoch 1 - iter 8/8 - loss 0.73787851 - samples/sec: 14.37 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,588 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:04,588 EPOCH 1 done: loss 0.7379 - lr 0.0200000\n",
-      "2021-09-08 11:57:04,674 DEV : loss 0.05433247983455658 - score 0.0\n",
-      "2021-09-08 11:57:04,674 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:32:37,825 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,825 Corpus: \"Corpus: 8 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:32:37,825 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,826 Parameters:\n",
+      "2021-09-21 20:32:37,826  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:32:37,826  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:32:37,827  - patience: \"3\"\n",
+      "2021-09-21 20:32:37,827  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:32:37,827  - max_epochs: \"10\"\n",
+      "2021-09-21 20:32:37,827  - shuffle: \"True\"\n",
+      "2021-09-21 20:32:37,828  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:32:37,828  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:32:37,828 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,828 Model training base path: \"None\"\n",
+      "2021-09-21 20:32:37,829 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,829 Device: cuda:0\n",
+      "2021-09-21 20:32:37,829 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,830 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:32:37,838 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,933 epoch 1 - iter 1/8 - loss 0.07596800 - samples/sec: 18.07 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,320 epoch 1 - iter 2/8 - loss 0.06905395 - samples/sec: 2.59 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,487 epoch 1 - iter 3/8 - loss 0.34047196 - samples/sec: 6.02 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,615 epoch 1 - iter 4/8 - loss 0.28316664 - samples/sec: 7.82 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,679 epoch 1 - iter 5/8 - loss 0.22677178 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,796 epoch 1 - iter 6/8 - loss 0.49107458 - samples/sec: 8.58 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,874 epoch 1 - iter 7/8 - loss 0.64387566 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,952 epoch 1 - iter 8/8 - loss 0.83699217 - samples/sec: 12.79 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,953 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:38,954 EPOCH 1 done: loss 0.8370 - lr 0.0200000\n",
+      "2021-09-21 20:32:39,017 DEV : loss 0.02055422216653824 - score 0.0\n",
+      "2021-09-21 20:32:39,017 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:57:09,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:09,641 epoch 2 - iter 1/8 - loss 0.10616287 - samples/sec: 18.97 - lr: 0.020000\n",
-      "2021-09-08 11:57:09,692 epoch 2 - iter 2/8 - loss 0.06077223 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 11:57:09,764 epoch 2 - iter 3/8 - loss 0.11700795 - samples/sec: 14.09 - lr: 0.020000\n",
-      "2021-09-08 11:57:09,820 epoch 2 - iter 4/8 - loss 0.33113849 - samples/sec: 18.01 - lr: 0.020000\n",
-      "2021-09-08 11:57:09,869 epoch 2 - iter 5/8 - loss 0.66355169 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 11:57:09,937 epoch 2 - iter 6/8 - loss 0.55853410 - samples/sec: 14.69 - lr: 0.020000\n",
-      "2021-09-08 11:57:09,985 epoch 2 - iter 7/8 - loss 0.49152843 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 11:57:10,049 epoch 2 - iter 8/8 - loss 0.43571832 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 11:57:10,050 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:10,050 EPOCH 2 done: loss 0.4357 - lr 0.0200000\n",
-      "2021-09-08 11:57:10,281 DEV : loss 0.010358606465160847 - score 0.0\n",
-      "2021-09-08 11:57:10,282 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:32:43,540 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:43,683 epoch 2 - iter 1/8 - loss 0.12364254 - samples/sec: 9.73 - lr: 0.020000\n",
+      "2021-09-21 20:32:43,754 epoch 2 - iter 2/8 - loss 0.17787548 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 20:32:43,862 epoch 2 - iter 3/8 - loss 0.45720849 - samples/sec: 9.27 - lr: 0.020000\n",
+      "2021-09-21 20:32:43,929 epoch 2 - iter 4/8 - loss 0.58979667 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 20:32:44,140 epoch 2 - iter 5/8 - loss 0.47693609 - samples/sec: 4.77 - lr: 0.020000\n",
+      "2021-09-21 20:32:44,213 epoch 2 - iter 6/8 - loss 0.42835344 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 20:32:44,278 epoch 2 - iter 7/8 - loss 0.36734468 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 20:32:44,369 epoch 2 - iter 8/8 - loss 0.38053879 - samples/sec: 10.99 - lr: 0.020000\n",
+      "2021-09-21 20:32:44,370 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:44,371 EPOCH 2 done: loss 0.3805 - lr 0.0200000\n",
+      "2021-09-21 20:32:44,504 DEV : loss 0.018222033977508545 - score 0.0\n",
+      "2021-09-21 20:32:44,506 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:57:17,283 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:17,349 epoch 3 - iter 1/8 - loss 0.57040435 - samples/sec: 20.27 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,395 epoch 3 - iter 2/8 - loss 0.28920482 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,454 epoch 3 - iter 3/8 - loss 0.19749339 - samples/sec: 17.05 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,509 epoch 3 - iter 4/8 - loss 0.22216174 - samples/sec: 18.27 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,577 epoch 3 - iter 5/8 - loss 0.17957550 - samples/sec: 14.70 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,641 epoch 3 - iter 6/8 - loss 0.15071877 - samples/sec: 15.89 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,685 epoch 3 - iter 7/8 - loss 0.13035295 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,729 epoch 3 - iter 8/8 - loss 0.11534421 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 11:57:17,730 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:17,730 EPOCH 3 done: loss 0.1153 - lr 0.0200000\n",
-      "2021-09-08 11:57:18,362 DEV : loss 0.32641157507896423 - score 0.0\n",
-      "2021-09-08 11:57:18,363 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:57:18,369 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:18,432 epoch 4 - iter 1/8 - loss 0.02706602 - samples/sec: 20.47 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,477 epoch 4 - iter 2/8 - loss 0.01558560 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,541 epoch 4 - iter 3/8 - loss 0.01154117 - samples/sec: 15.87 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,594 epoch 4 - iter 4/8 - loss 0.00907311 - samples/sec: 19.08 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,659 epoch 4 - iter 5/8 - loss 0.00791707 - samples/sec: 15.50 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,705 epoch 4 - iter 6/8 - loss 0.00711322 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,750 epoch 4 - iter 7/8 - loss 0.00690373 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,796 epoch 4 - iter 8/8 - loss 0.00622203 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:57:18,797 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:18,798 EPOCH 4 done: loss 0.0062 - lr 0.0200000\n",
-      "2021-09-08 11:57:18,831 DEV : loss 0.168192058801651 - score 0.0\n",
-      "2021-09-08 11:57:18,832 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:57:18,922 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:18,983 epoch 5 - iter 1/8 - loss 0.00219799 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,029 epoch 5 - iter 2/8 - loss 0.00286900 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,083 epoch 5 - iter 3/8 - loss 0.00250039 - samples/sec: 18.92 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,129 epoch 5 - iter 4/8 - loss 0.00243087 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,183 epoch 5 - iter 5/8 - loss 0.01692565 - samples/sec: 18.77 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,248 epoch 5 - iter 6/8 - loss 0.01460315 - samples/sec: 15.66 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,315 epoch 5 - iter 7/8 - loss 0.01296113 - samples/sec: 15.08 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,361 epoch 5 - iter 8/8 - loss 0.01159646 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,362 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:19,363 EPOCH 5 done: loss 0.0116 - lr 0.0200000\n",
-      "2021-09-08 11:57:19,398 DEV : loss 0.024932704865932465 - score 0.0\n",
-      "2021-09-08 11:57:19,399 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:57:19,414 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:19,494 epoch 6 - iter 1/8 - loss 0.00462853 - samples/sec: 15.50 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,541 epoch 6 - iter 2/8 - loss 0.00274878 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,608 epoch 6 - iter 3/8 - loss 0.00266393 - samples/sec: 15.06 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,661 epoch 6 - iter 4/8 - loss 0.00249301 - samples/sec: 18.98 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,711 epoch 6 - iter 5/8 - loss 0.00506742 - samples/sec: 20.30 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,755 epoch 6 - iter 6/8 - loss 0.00458407 - samples/sec: 22.99 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,800 epoch 6 - iter 7/8 - loss 0.00410069 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,844 epoch 6 - iter 8/8 - loss 0.00388617 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 11:57:19,845 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:19,845 EPOCH 6 done: loss 0.0039 - lr 0.0200000\n",
-      "2021-09-08 11:57:19,879 DEV : loss 0.015368546359241009 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:57:19,879 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:57:19,881 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:19,943 epoch 7 - iter 1/8 - loss 0.00320259 - samples/sec: 21.16 - lr: 0.010000\n",
-      "2021-09-08 11:57:19,995 epoch 7 - iter 2/8 - loss 0.00214063 - samples/sec: 19.50 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,039 epoch 7 - iter 3/8 - loss 0.00190932 - samples/sec: 22.98 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,084 epoch 7 - iter 4/8 - loss 0.00179456 - samples/sec: 22.13 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,149 epoch 7 - iter 5/8 - loss 0.00409586 - samples/sec: 15.61 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,192 epoch 7 - iter 6/8 - loss 0.00359028 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,236 epoch 7 - iter 7/8 - loss 0.00352093 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,300 epoch 7 - iter 8/8 - loss 0.00351683 - samples/sec: 15.85 - lr: 0.010000\n"
+      "2021-09-21 20:32:51,924 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:52,027 epoch 3 - iter 1/8 - loss 0.08954530 - samples/sec: 12.33 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,127 epoch 3 - iter 2/8 - loss 0.06825084 - samples/sec: 10.00 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,329 epoch 3 - iter 3/8 - loss 0.04597782 - samples/sec: 4.97 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,435 epoch 3 - iter 4/8 - loss 0.03668691 - samples/sec: 9.48 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,528 epoch 3 - iter 5/8 - loss 0.10083672 - samples/sec: 10.87 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,595 epoch 3 - iter 6/8 - loss 0.09465647 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,649 epoch 3 - iter 7/8 - loss 0.08173353 - samples/sec: 18.98 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,703 epoch 3 - iter 8/8 - loss 0.07241579 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 20:32:52,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:52,704 EPOCH 3 done: loss 0.0724 - lr 0.0200000\n",
+      "2021-09-21 20:32:53,543 DEV : loss 0.010554071515798569 - score 0.0\n",
+      "2021-09-21 20:32:53,544 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:32:57,671 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:57,771 epoch 4 - iter 1/8 - loss 0.00058498 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 20:32:57,851 epoch 4 - iter 2/8 - loss 0.00091369 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 20:32:58,081 epoch 4 - iter 3/8 - loss 0.00420007 - samples/sec: 4.36 - lr: 0.020000\n",
+      "2021-09-21 20:32:58,167 epoch 4 - iter 4/8 - loss 0.06709011 - samples/sec: 11.68 - lr: 0.020000\n",
+      "2021-09-21 20:32:58,259 epoch 4 - iter 5/8 - loss 0.06193344 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 20:32:58,374 epoch 4 - iter 6/8 - loss 0.06212954 - samples/sec: 8.77 - lr: 0.020000\n",
+      "2021-09-21 20:32:58,440 epoch 4 - iter 7/8 - loss 0.05725578 - samples/sec: 15.25 - lr: 0.020000\n",
+      "2021-09-21 20:32:58,512 epoch 4 - iter 8/8 - loss 0.05905879 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 20:32:58,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:58,513 EPOCH 4 done: loss 0.0591 - lr 0.0200000\n",
+      "2021-09-21 20:32:58,675 DEV : loss 0.0024658271577209234 - score 0.0\n",
+      "2021-09-21 20:32:58,675 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:33:03,453 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:03,583 epoch 5 - iter 1/8 - loss 0.00550868 - samples/sec: 10.55 - lr: 0.020000\n",
+      "2021-09-21 20:33:03,652 epoch 5 - iter 2/8 - loss 0.00326195 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 20:33:03,742 epoch 5 - iter 3/8 - loss 0.13092151 - samples/sec: 11.15 - lr: 0.020000\n",
+      "2021-09-21 20:33:03,810 epoch 5 - iter 4/8 - loss 0.09846631 - samples/sec: 14.80 - lr: 0.020000\n",
+      "2021-09-21 20:33:03,877 epoch 5 - iter 5/8 - loss 0.07892505 - samples/sec: 15.05 - lr: 0.020000\n",
+      "2021-09-21 20:33:03,942 epoch 5 - iter 6/8 - loss 0.06584310 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 20:33:04,179 epoch 5 - iter 7/8 - loss 0.05648741 - samples/sec: 4.24 - lr: 0.020000\n",
+      "2021-09-21 20:33:04,289 epoch 5 - iter 8/8 - loss 0.04966980 - samples/sec: 9.13 - lr: 0.020000\n",
+      "2021-09-21 20:33:04,290 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:04,290 EPOCH 5 done: loss 0.0497 - lr 0.0200000\n",
+      "2021-09-21 20:33:04,421 DEV : loss 0.011225271970033646 - score 0.0\n",
+      "2021-09-21 20:33:04,422 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:33:04,591 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:04,696 epoch 6 - iter 1/8 - loss 0.00118527 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 20:33:04,788 epoch 6 - iter 2/8 - loss 0.00154599 - samples/sec: 10.90 - lr: 0.020000\n",
+      "2021-09-21 20:33:04,882 epoch 6 - iter 3/8 - loss 0.00727742 - samples/sec: 10.74 - lr: 0.020000\n",
+      "2021-09-21 20:33:04,953 epoch 6 - iter 4/8 - loss 0.00564437 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 20:33:05,012 epoch 6 - iter 5/8 - loss 0.00520245 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 20:33:05,097 epoch 6 - iter 6/8 - loss 0.00447829 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 20:33:05,182 epoch 6 - iter 7/8 - loss 0.00406670 - samples/sec: 11.86 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:57:20,301 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:20,301 EPOCH 7 done: loss 0.0035 - lr 0.0100000\n",
-      "2021-09-08 11:57:20,336 DEV : loss 0.01403886079788208 - score 0.0\n",
-      "2021-09-08 11:57:20,336 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:57:20,338 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:20,396 epoch 8 - iter 1/8 - loss 0.00088215 - samples/sec: 22.92 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,461 epoch 8 - iter 2/8 - loss 0.00127578 - samples/sec: 15.59 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,508 epoch 8 - iter 3/8 - loss 0.00149620 - samples/sec: 21.32 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,571 epoch 8 - iter 4/8 - loss 0.00182259 - samples/sec: 16.00 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,615 epoch 8 - iter 5/8 - loss 0.00176730 - samples/sec: 22.84 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,659 epoch 8 - iter 6/8 - loss 0.00173776 - samples/sec: 22.82 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,703 epoch 8 - iter 7/8 - loss 0.00194302 - samples/sec: 23.07 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,755 epoch 8 - iter 8/8 - loss 0.00225906 - samples/sec: 19.37 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,756 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:20,756 EPOCH 8 done: loss 0.0023 - lr 0.0100000\n",
-      "2021-09-08 11:57:20,790 DEV : loss 0.012086653150618076 - score 0.0\n",
-      "2021-09-08 11:57:20,791 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:57:20,792 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:20,850 epoch 9 - iter 1/8 - loss 0.00119078 - samples/sec: 23.00 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,914 epoch 9 - iter 2/8 - loss 0.00139366 - samples/sec: 15.73 - lr: 0.010000\n",
-      "2021-09-08 11:57:20,958 epoch 9 - iter 3/8 - loss 0.00118607 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,002 epoch 9 - iter 4/8 - loss 0.00111389 - samples/sec: 22.94 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,046 epoch 9 - iter 5/8 - loss 0.00170936 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,098 epoch 9 - iter 6/8 - loss 0.00159609 - samples/sec: 19.55 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,162 epoch 9 - iter 7/8 - loss 0.00149872 - samples/sec: 15.63 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,209 epoch 9 - iter 8/8 - loss 0.00155315 - samples/sec: 21.31 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,210 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:21,211 EPOCH 9 done: loss 0.0016 - lr 0.0100000\n",
-      "2021-09-08 11:57:21,244 DEV : loss 0.011360694654285908 - score 0.0\n",
-      "2021-09-08 11:57:21,245 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:57:21,247 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:21,305 epoch 10 - iter 1/8 - loss 0.00088579 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,352 epoch 10 - iter 2/8 - loss 0.00128282 - samples/sec: 21.31 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,396 epoch 10 - iter 3/8 - loss 0.00131804 - samples/sec: 23.05 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,440 epoch 10 - iter 4/8 - loss 0.00122385 - samples/sec: 22.88 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,505 epoch 10 - iter 5/8 - loss 0.00124396 - samples/sec: 15.63 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,551 epoch 10 - iter 6/8 - loss 0.00120733 - samples/sec: 21.91 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,606 epoch 10 - iter 7/8 - loss 0.00130347 - samples/sec: 18.55 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,668 epoch 10 - iter 8/8 - loss 0.00135701 - samples/sec: 16.06 - lr: 0.010000\n",
-      "2021-09-08 11:57:21,669 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:21,670 EPOCH 10 done: loss 0.0014 - lr 0.0100000\n",
-      "2021-09-08 11:57:21,703 DEV : loss 0.010972476564347744 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 11:57:21,704 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:57:28,456 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:28,457 Testing using best model ...\n",
-      "2021-09-08 11:57:28,458 loading file None/best-model.pt\n",
+      "2021-09-21 20:33:05,388 epoch 6 - iter 8/8 - loss 0.00362738 - samples/sec: 4.86 - lr: 0.020000\n",
+      "2021-09-21 20:33:05,389 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:05,389 EPOCH 6 done: loss 0.0036 - lr 0.0200000\n",
+      "2021-09-21 20:33:05,538 DEV : loss 0.022492550313472748 - score 0.0\n",
+      "2021-09-21 20:33:05,538 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:33:05,631 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:05,729 epoch 7 - iter 1/8 - loss 0.00055283 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 20:33:05,957 epoch 7 - iter 2/8 - loss 0.00048631 - samples/sec: 4.40 - lr: 0.020000\n",
+      "2021-09-21 20:33:06,013 epoch 7 - iter 3/8 - loss 0.00077695 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 20:33:06,108 epoch 7 - iter 4/8 - loss 0.00423277 - samples/sec: 10.62 - lr: 0.020000\n",
+      "2021-09-21 20:33:06,165 epoch 7 - iter 5/8 - loss 0.00356404 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 20:33:06,258 epoch 7 - iter 6/8 - loss 0.00319326 - samples/sec: 10.86 - lr: 0.020000\n",
+      "2021-09-21 20:33:06,355 epoch 7 - iter 7/8 - loss 0.00290514 - samples/sec: 10.30 - lr: 0.020000\n",
+      "2021-09-21 20:33:06,427 epoch 7 - iter 8/8 - loss 0.00260446 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 20:33:06,428 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:06,428 EPOCH 7 done: loss 0.0026 - lr 0.0200000\n",
+      "2021-09-21 20:33:08,595 DEV : loss 0.02358824387192726 - score 0.0\n",
+      "2021-09-21 20:33:08,597 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:33:08,609 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:08,743 epoch 8 - iter 1/8 - loss 0.00147755 - samples/sec: 10.71 - lr: 0.020000\n",
+      "2021-09-21 20:33:08,801 epoch 8 - iter 2/8 - loss 0.00106217 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 20:33:08,862 epoch 8 - iter 3/8 - loss 0.00156701 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 20:33:08,959 epoch 8 - iter 4/8 - loss 0.00252014 - samples/sec: 10.29 - lr: 0.020000\n",
+      "2021-09-21 20:33:09,062 epoch 8 - iter 5/8 - loss 0.00233556 - samples/sec: 9.75 - lr: 0.020000\n",
+      "2021-09-21 20:33:09,121 epoch 8 - iter 6/8 - loss 0.00203858 - samples/sec: 17.24 - lr: 0.020000\n",
+      "2021-09-21 20:33:09,355 epoch 8 - iter 7/8 - loss 0.00178667 - samples/sec: 4.27 - lr: 0.020000\n",
+      "2021-09-21 20:33:09,426 epoch 8 - iter 8/8 - loss 0.00165909 - samples/sec: 14.31 - lr: 0.020000\n",
+      "2021-09-21 20:33:09,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:09,427 EPOCH 8 done: loss 0.0017 - lr 0.0200000\n",
+      "2021-09-21 20:33:09,463 DEV : loss 0.030376702547073364 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:33:09,464 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:33:09,490 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:09,624 epoch 9 - iter 1/8 - loss 0.00330072 - samples/sec: 10.80 - lr: 0.010000\n",
+      "2021-09-21 20:33:09,725 epoch 9 - iter 2/8 - loss 0.00226966 - samples/sec: 9.95 - lr: 0.010000\n",
+      "2021-09-21 20:33:09,780 epoch 9 - iter 3/8 - loss 0.00215480 - samples/sec: 18.17 - lr: 0.010000\n",
+      "2021-09-21 20:33:09,846 epoch 9 - iter 4/8 - loss 0.00173018 - samples/sec: 15.22 - lr: 0.010000\n",
+      "2021-09-21 20:33:09,968 epoch 9 - iter 5/8 - loss 0.00226717 - samples/sec: 8.23 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,030 epoch 9 - iter 6/8 - loss 0.00196845 - samples/sec: 16.37 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,290 epoch 9 - iter 7/8 - loss 0.00177749 - samples/sec: 3.85 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,355 epoch 9 - iter 8/8 - loss 0.00159978 - samples/sec: 15.51 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,356 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:10,357 EPOCH 9 done: loss 0.0016 - lr 0.0100000\n",
+      "2021-09-21 20:33:10,415 DEV : loss 0.0350160114467144 - score 0.0\n",
+      "2021-09-21 20:33:10,417 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:33:10,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:10,510 epoch 10 - iter 1/8 - loss 0.00180122 - samples/sec: 17.18 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,588 epoch 10 - iter 2/8 - loss 0.00124871 - samples/sec: 13.02 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,646 epoch 10 - iter 3/8 - loss 0.00095820 - samples/sec: 17.40 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,865 epoch 10 - iter 4/8 - loss 0.00078757 - samples/sec: 4.56 - lr: 0.010000\n",
+      "2021-09-21 20:33:10,971 epoch 10 - iter 5/8 - loss 0.00097874 - samples/sec: 9.47 - lr: 0.010000\n",
+      "2021-09-21 20:33:11,088 epoch 10 - iter 6/8 - loss 0.00116217 - samples/sec: 8.58 - lr: 0.010000\n",
+      "2021-09-21 20:33:11,179 epoch 10 - iter 7/8 - loss 0.00106499 - samples/sec: 11.00 - lr: 0.010000\n",
+      "2021-09-21 20:33:11,238 epoch 10 - iter 8/8 - loss 0.00098150 - samples/sec: 17.20 - lr: 0.010000\n",
+      "2021-09-21 20:33:11,239 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:11,239 EPOCH 10 done: loss 0.0010 - lr 0.0100000\n",
+      "2021-09-21 20:33:11,284 DEV : loss 0.03909517824649811 - score 0.0\n",
+      "2021-09-21 20:33:11,286 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:33:15,521 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:15,522 Testing using best model ...\n",
+      "2021-09-21 20:33:15,524 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:57:33,568 \t0.0\n",
-      "2021-09-08 11:57:33,568 \n",
+      "2021-09-21 20:33:20,446 \t0.0\n",
+      "2021-09-21 20:33:20,447 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -5479,21 +5502,21 @@
       "\n",
       "               This text is about Health     0.0000    0.0000    0.0000         0\n",
       "This text is about Science & Mathematics     0.0000    0.0000    0.0000         0\n",
-      "This text is about Entertainment & Music     0.0000    0.0000    0.0000         0\n",
       " This text is about Computers & Internet     0.0000    0.0000    0.0000         0\n",
       "This text is about Education & Reference     0.0000    0.0000    0.0000         0\n",
       "               This text is about Sports     0.0000    0.0000    0.0000         0\n",
+      "    This text is about Society & Culture     0.0000    0.0000    0.0000         0\n",
       "   This text is about Business & Finance     0.0000    0.0000    0.0000         0\n",
       "This text is about Politics & Government     0.0000    0.0000    0.0000         0\n",
-      "    This text is about Society & Culture     0.0000    0.0000    0.0000         1\n",
+      "This text is about Entertainment & Music     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                               micro avg     0.0000    0.0000    0.0000         1\n",
       "                               macro avg     0.0000    0.0000    0.0000         1\n",
       "                            weighted avg     0.0000    0.0000    0.0000         1\n",
       "                             samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 11:57:33,569 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.5270029673590504\n"
+      "2021-09-21 20:33:20,447 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.5258160237388724\n"
      ]
     }
    ],
@@ -5572,11 +5595,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "86988d74",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.5519287833827893, 0.486646884272997, 0.5192878338278932, 0.5341246290801187, 0.5370919881305638]\n",
+      "0.022158047124741204\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -5588,7 +5623,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "263ee449",
    "metadata": {},
    "outputs": [
@@ -5596,25 +5631,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:27:33,260 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:33:41,870 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:27:37,394 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:33:46,449 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 19691.57it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 13262.62it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:27:37,396 [b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units', b'a machine for performing calculations automatically']\n",
-      "2021-09-08 14:27:37,465 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,467 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:33:46,452 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'the study of government of states and other political units', b'an extended social group having a distinctive cultural and economic organization']\n",
+      "2021-09-21 20:33:46,604 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:46,606 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5927,27 +5962,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:27:37,467 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,468 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 14:27:37,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,468 Parameters:\n",
-      "2021-09-08 14:27:37,468  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:27:37,469  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:27:37,469  - patience: \"3\"\n",
-      "2021-09-08 14:27:37,469  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:27:37,469  - max_epochs: \"10\"\n",
-      "2021-09-08 14:27:37,470  - shuffle: \"True\"\n",
-      "2021-09-08 14:27:37,470  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:27:37,470  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:27:37,470 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,471 Model training base path: \"None\"\n",
-      "2021-09-08 14:27:37,471 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,471 Device: cuda:1\n",
-      "2021-09-08 14:27:37,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,472 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:27:37,478 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,537 epoch 1 - iter 1/7 - loss 0.12800013 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 14:27:37,583 epoch 1 - iter 2/7 - loss 0.08619808 - samples/sec: 21.91 - lr: 0.020000\n"
+      "2021-09-21 20:33:46,607 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:46,607 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:33:46,608 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:46,608 Parameters:\n",
+      "2021-09-21 20:33:46,608  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:33:46,608  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:33:46,609  - patience: \"3\"\n",
+      "2021-09-21 20:33:46,609  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:33:46,609  - max_epochs: \"10\"\n",
+      "2021-09-21 20:33:46,610  - shuffle: \"True\"\n",
+      "2021-09-21 20:33:46,610  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:33:46,610  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:33:46,610 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:46,611 Model training base path: \"None\"\n",
+      "2021-09-21 20:33:46,611 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:46,611 Device: cuda:0\n",
+      "2021-09-21 20:33:46,612 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:46,612 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -5961,185 +5993,196 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:27:37,631 epoch 1 - iter 3/7 - loss 0.62574175 - samples/sec: 20.85 - lr: 0.020000\n",
-      "2021-09-08 14:27:37,675 epoch 1 - iter 4/7 - loss 0.46960649 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 14:27:37,721 epoch 1 - iter 5/7 - loss 0.49480301 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 14:27:37,767 epoch 1 - iter 6/7 - loss 0.42033804 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 14:27:37,857 epoch 1 - iter 7/7 - loss 0.58350760 - samples/sec: 11.14 - lr: 0.020000\n",
-      "2021-09-08 14:27:37,858 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:37,858 EPOCH 1 done: loss 0.5835 - lr 0.0200000\n",
-      "2021-09-08 14:27:37,901 DEV : loss 0.03789253160357475 - score 0.0\n",
-      "2021-09-08 14:27:37,901 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:27:53,265 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:53,330 epoch 2 - iter 1/7 - loss 0.48228523 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 14:27:53,374 epoch 2 - iter 2/7 - loss 0.24370341 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 14:27:53,417 epoch 2 - iter 3/7 - loss 0.16950365 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 14:27:53,460 epoch 2 - iter 4/7 - loss 0.12902752 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 14:27:53,559 epoch 2 - iter 5/7 - loss 0.22301406 - samples/sec: 10.13 - lr: 0.020000\n",
-      "2021-09-08 14:27:53,610 epoch 2 - iter 6/7 - loss 0.18622006 - samples/sec: 20.04 - lr: 0.020000\n",
-      "2021-09-08 14:27:53,656 epoch 2 - iter 7/7 - loss 0.17549224 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 14:27:53,657 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:27:53,657 EPOCH 2 done: loss 0.1755 - lr 0.0200000\n",
-      "2021-09-08 14:27:53,822 DEV : loss 0.006884618662297726 - score 0.0\n",
-      "2021-09-08 14:27:53,822 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:33:46,789 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:46,893 epoch 1 - iter 1/7 - loss 0.89011574 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 20:33:47,019 epoch 1 - iter 2/7 - loss 0.45574182 - samples/sec: 7.96 - lr: 0.020000\n",
+      "2021-09-21 20:33:47,104 epoch 1 - iter 3/7 - loss 0.32840394 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 20:33:47,180 epoch 1 - iter 4/7 - loss 0.75888764 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 20:33:47,257 epoch 1 - iter 5/7 - loss 0.93046181 - samples/sec: 13.17 - lr: 0.020000\n",
+      "2021-09-21 20:33:47,322 epoch 1 - iter 6/7 - loss 1.14049154 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 20:33:47,494 epoch 1 - iter 7/7 - loss 1.11301002 - samples/sec: 5.83 - lr: 0.020000\n",
+      "2021-09-21 20:33:47,495 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:47,495 EPOCH 1 done: loss 1.1130 - lr 0.0200000\n",
+      "2021-09-21 20:33:47,626 DEV : loss 0.4707741439342499 - score 0.0\n",
+      "2021-09-21 20:33:47,626 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:28:01,036 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:01,099 epoch 3 - iter 1/7 - loss 0.35299781 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 14:28:01,149 epoch 3 - iter 2/7 - loss 0.18290628 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 14:28:01,193 epoch 3 - iter 3/7 - loss 0.12279958 - samples/sec: 23.37 - lr: 0.020000\n",
-      "2021-09-08 14:28:01,240 epoch 3 - iter 4/7 - loss 0.10587210 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 14:28:01,283 epoch 3 - iter 5/7 - loss 0.08495830 - samples/sec: 23.50 - lr: 0.020000\n",
-      "2021-09-08 14:28:01,371 epoch 3 - iter 6/7 - loss 0.12531516 - samples/sec: 11.39 - lr: 0.020000\n",
-      "2021-09-08 14:28:01,418 epoch 3 - iter 7/7 - loss 0.12758791 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 14:28:01,419 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:01,419 EPOCH 3 done: loss 0.1276 - lr 0.0200000\n",
-      "2021-09-08 14:28:01,462 DEV : loss 0.0004879988555330783 - score 0.0\n",
-      "2021-09-08 14:28:01,463 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:28:09,859 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:09,922 epoch 4 - iter 1/7 - loss 0.04923528 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,021 epoch 4 - iter 2/7 - loss 0.09314684 - samples/sec: 10.13 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,067 epoch 4 - iter 3/7 - loss 0.06918732 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,110 epoch 4 - iter 4/7 - loss 0.05204031 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,157 epoch 4 - iter 5/7 - loss 0.07532480 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,200 epoch 4 - iter 6/7 - loss 0.06284879 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,246 epoch 4 - iter 7/7 - loss 0.05393517 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,247 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:10,248 EPOCH 4 done: loss 0.0539 - lr 0.0200000\n",
-      "2021-09-08 14:28:10,390 DEV : loss 0.0005475498037412763 - score 0.0\n",
-      "2021-09-08 14:28:10,391 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:28:10,394 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:10,451 epoch 5 - iter 1/7 - loss 0.00401402 - samples/sec: 22.86 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,494 epoch 5 - iter 2/7 - loss 0.00213899 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,540 epoch 5 - iter 3/7 - loss 0.01443585 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,587 epoch 5 - iter 4/7 - loss 0.01095757 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,630 epoch 5 - iter 5/7 - loss 0.00938127 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,714 epoch 5 - iter 6/7 - loss 0.00864133 - samples/sec: 11.93 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,757 epoch 5 - iter 7/7 - loss 0.00745277 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,758 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:10,759 EPOCH 5 done: loss 0.0075 - lr 0.0200000\n",
-      "2021-09-08 14:28:10,802 DEV : loss 0.0007232613861560822 - score 0.0\n",
-      "2021-09-08 14:28:10,803 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:28:10,805 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:10,861 epoch 6 - iter 1/7 - loss 0.00031037 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,949 epoch 6 - iter 2/7 - loss 0.04803104 - samples/sec: 11.41 - lr: 0.020000\n",
-      "2021-09-08 14:28:10,992 epoch 6 - iter 3/7 - loss 0.03277357 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 14:28:11,038 epoch 6 - iter 4/7 - loss 0.02466860 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 14:28:11,081 epoch 6 - iter 5/7 - loss 0.01984987 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 14:28:11,124 epoch 6 - iter 6/7 - loss 0.01660508 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 14:28:11,168 epoch 6 - iter 7/7 - loss 0.01511633 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 14:28:11,169 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:11,169 EPOCH 6 done: loss 0.0151 - lr 0.0200000\n",
-      "2021-09-08 14:28:11,213 DEV : loss 0.0002841602545231581 - score 0.0\n",
-      "2021-09-08 14:28:11,214 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:28:15,740 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:15,799 epoch 7 - iter 1/7 - loss 0.00098976 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 14:28:15,845 epoch 7 - iter 2/7 - loss 0.00133714 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:28:15,888 epoch 7 - iter 3/7 - loss 0.00123937 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 14:28:15,981 epoch 7 - iter 4/7 - loss 0.00252448 - samples/sec: 10.78 - lr: 0.020000\n",
-      "2021-09-08 14:28:16,025 epoch 7 - iter 5/7 - loss 0.00383582 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 14:28:16,074 epoch 7 - iter 6/7 - loss 0.00328795 - samples/sec: 20.62 - lr: 0.020000\n",
-      "2021-09-08 14:28:16,117 epoch 7 - iter 7/7 - loss 0.00285984 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 14:28:16,118 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:16,118 EPOCH 7 done: loss 0.0029 - lr 0.0200000\n",
-      "2021-09-08 14:28:16,170 DEV : loss 0.00021828485478181392 - score 0.0\n",
-      "2021-09-08 14:28:16,171 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:28:20,477 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:20,537 epoch 8 - iter 1/7 - loss 0.00104225 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 14:28:20,580 epoch 8 - iter 2/7 - loss 0.00330181 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 14:28:20,625 epoch 8 - iter 3/7 - loss 0.00250532 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:28:20,668 epoch 8 - iter 4/7 - loss 0.00191553 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 14:28:20,711 epoch 8 - iter 5/7 - loss 0.00160022 - samples/sec: 23.25 - lr: 0.020000\n"
+      "2021-09-21 20:33:53,873 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:54,030 epoch 2 - iter 1/7 - loss 0.35620227 - samples/sec: 7.37 - lr: 0.020000\n",
+      "2021-09-21 20:33:54,128 epoch 2 - iter 2/7 - loss 0.22193146 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 20:33:54,210 epoch 2 - iter 3/7 - loss 0.15335751 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 20:33:54,280 epoch 2 - iter 4/7 - loss 0.23502461 - samples/sec: 14.34 - lr: 0.020000\n",
+      "2021-09-21 20:33:54,351 epoch 2 - iter 5/7 - loss 0.24833565 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 20:33:54,428 epoch 2 - iter 6/7 - loss 0.29006512 - samples/sec: 12.90 - lr: 0.020000\n",
+      "2021-09-21 20:33:54,561 epoch 2 - iter 7/7 - loss 0.24931825 - samples/sec: 7.59 - lr: 0.020000\n",
+      "2021-09-21 20:33:54,562 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:54,562 EPOCH 2 done: loss 0.2493 - lr 0.0200000\n",
+      "2021-09-21 20:34:04,883 DEV : loss 0.49294212460517883 - score 0.0\n",
+      "2021-09-21 20:34:04,885 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:04,922 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:05,059 epoch 3 - iter 1/7 - loss 0.00571389 - samples/sec: 9.48 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,144 epoch 3 - iter 2/7 - loss 0.02486855 - samples/sec: 11.82 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,336 epoch 3 - iter 3/7 - loss 0.02079621 - samples/sec: 5.21 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,427 epoch 3 - iter 4/7 - loss 0.13017731 - samples/sec: 11.04 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,516 epoch 3 - iter 5/7 - loss 0.14327415 - samples/sec: 11.37 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,655 epoch 3 - iter 6/7 - loss 0.11969149 - samples/sec: 7.18 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,763 epoch 3 - iter 7/7 - loss 0.10471296 - samples/sec: 9.33 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,764 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:05,764 EPOCH 3 done: loss 0.1047 - lr 0.0200000\n",
+      "2021-09-21 20:34:05,805 DEV : loss 0.6849424839019775 - score 0.0\n",
+      "2021-09-21 20:34:05,809 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:34:05,812 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:05,931 epoch 4 - iter 1/7 - loss 0.00263876 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,991 epoch 4 - iter 2/7 - loss 0.00508911 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,163 epoch 4 - iter 3/7 - loss 0.46623981 - samples/sec: 5.81 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,263 epoch 4 - iter 4/7 - loss 0.40057262 - samples/sec: 10.08 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,363 epoch 4 - iter 5/7 - loss 0.32118197 - samples/sec: 10.11 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,564 epoch 4 - iter 6/7 - loss 0.27059700 - samples/sec: 4.98 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,657 epoch 4 - iter 7/7 - loss 0.23249109 - samples/sec: 10.73 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,658 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:06,659 EPOCH 4 done: loss 0.2325 - lr 0.0200000\n",
+      "2021-09-21 20:34:06,731 DEV : loss 0.6160321831703186 - score 0.0\n",
+      "2021-09-21 20:34:06,735 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:34:06,740 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:06,875 epoch 5 - iter 1/7 - loss 0.00198462 - samples/sec: 11.15 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,943 epoch 5 - iter 2/7 - loss 0.02500391 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,027 epoch 5 - iter 3/7 - loss 0.01701905 - samples/sec: 11.98 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,114 epoch 5 - iter 4/7 - loss 0.18049874 - samples/sec: 11.64 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,276 epoch 5 - iter 5/7 - loss 0.26267805 - samples/sec: 6.19 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,368 epoch 5 - iter 6/7 - loss 0.21967484 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,558 epoch 5 - iter 7/7 - loss 0.18906205 - samples/sec: 5.27 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,559 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:07,559 EPOCH 5 done: loss 0.1891 - lr 0.0200000\n",
+      "2021-09-21 20:34:07,642 DEV : loss 0.6634657382965088 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:34:07,644 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:34:07,646 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:07,878 epoch 6 - iter 1/7 - loss 0.05277875 - samples/sec: 5.47 - lr: 0.010000\n",
+      "2021-09-21 20:34:07,980 epoch 6 - iter 2/7 - loss 0.09211269 - samples/sec: 9.83 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,063 epoch 6 - iter 3/7 - loss 0.06491816 - samples/sec: 12.13 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,160 epoch 6 - iter 4/7 - loss 0.05168498 - samples/sec: 10.34 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,336 epoch 6 - iter 5/7 - loss 0.04149566 - samples/sec: 5.70 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,415 epoch 6 - iter 6/7 - loss 0.03500790 - samples/sec: 12.63 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,484 epoch 6 - iter 7/7 - loss 0.03036025 - samples/sec: 14.68 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,485 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:08,485 EPOCH 6 done: loss 0.0304 - lr 0.0100000\n",
+      "2021-09-21 20:34:08,538 DEV : loss 0.6809157729148865 - score 0.0\n",
+      "2021-09-21 20:34:08,539 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:08,541 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:08,637 epoch 7 - iter 1/7 - loss 0.16437228 - samples/sec: 13.44 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,716 epoch 7 - iter 2/7 - loss 0.08470439 - samples/sec: 12.66 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,789 epoch 7 - iter 3/7 - loss 0.06085507 - samples/sec: 13.81 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,848 epoch 7 - iter 4/7 - loss 0.04697382 - samples/sec: 17.10 - lr: 0.010000\n",
+      "2021-09-21 20:34:08,994 epoch 7 - iter 5/7 - loss 0.03797533 - samples/sec: 6.88 - lr: 0.010000\n",
+      "2021-09-21 20:34:09,077 epoch 7 - iter 6/7 - loss 0.03315114 - samples/sec: 12.11 - lr: 0.010000\n",
+      "2021-09-21 20:34:09,178 epoch 7 - iter 7/7 - loss 0.02855873 - samples/sec: 9.95 - lr: 0.010000\n",
+      "2021-09-21 20:34:09,179 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:09,179 EPOCH 7 done: loss 0.0286 - lr 0.0100000\n",
+      "2021-09-21 20:34:09,317 DEV : loss 0.6982412934303284 - score 0.0\n",
+      "2021-09-21 20:34:09,317 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:34:09,419 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:09,543 epoch 8 - iter 1/7 - loss 0.29328665 - samples/sec: 10.87 - lr: 0.010000\n",
+      "2021-09-21 20:34:09,670 epoch 8 - iter 2/7 - loss 0.15200529 - samples/sec: 7.92 - lr: 0.010000\n",
+      "2021-09-21 20:34:09,738 epoch 8 - iter 3/7 - loss 0.11069698 - samples/sec: 14.73 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:28:20,758 epoch 8 - iter 6/7 - loss 0.00138195 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 14:28:20,845 epoch 8 - iter 7/7 - loss 0.00134560 - samples/sec: 11.64 - lr: 0.020000\n",
-      "2021-09-08 14:28:20,846 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:20,846 EPOCH 8 done: loss 0.0013 - lr 0.0200000\n",
-      "2021-09-08 14:28:20,888 DEV : loss 0.00021119264420121908 - score 0.0\n",
-      "2021-09-08 14:28:20,888 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:28:25,275 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:25,334 epoch 9 - iter 1/7 - loss 0.00027653 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 14:28:25,434 epoch 9 - iter 2/7 - loss 0.00928353 - samples/sec: 10.04 - lr: 0.020000\n",
-      "2021-09-08 14:28:25,478 epoch 9 - iter 3/7 - loss 0.00627106 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 14:28:25,521 epoch 9 - iter 4/7 - loss 0.00645239 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 14:28:25,564 epoch 9 - iter 5/7 - loss 0.00531812 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 14:28:25,611 epoch 9 - iter 6/7 - loss 0.00448327 - samples/sec: 21.38 - lr: 0.020000\n",
-      "2021-09-08 14:28:25,655 epoch 9 - iter 7/7 - loss 0.00419939 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 14:28:25,655 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:25,656 EPOCH 9 done: loss 0.0042 - lr 0.0200000\n",
-      "2021-09-08 14:28:25,706 DEV : loss 0.0001443022774765268 - score 0.0\n",
-      "2021-09-08 14:28:25,707 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:28:30,158 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:30,225 epoch 10 - iter 1/7 - loss 0.00049836 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 14:28:30,270 epoch 10 - iter 2/7 - loss 0.00332701 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:28:30,366 epoch 10 - iter 3/7 - loss 0.00241489 - samples/sec: 10.51 - lr: 0.020000\n",
-      "2021-09-08 14:28:30,409 epoch 10 - iter 4/7 - loss 0.00187126 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 14:28:30,451 epoch 10 - iter 5/7 - loss 0.00158582 - samples/sec: 23.55 - lr: 0.020000\n",
-      "2021-09-08 14:28:30,494 epoch 10 - iter 6/7 - loss 0.00146223 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 14:28:30,537 epoch 10 - iter 7/7 - loss 0.00130816 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 14:28:30,538 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:30,539 EPOCH 10 done: loss 0.0013 - lr 0.0200000\n",
-      "2021-09-08 14:28:30,581 DEV : loss 0.00014005627599544823 - score 0.0\n",
-      "2021-09-08 14:28:30,582 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:28:39,345 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:39,346 Testing using best model ...\n",
-      "2021-09-08 14:28:39,347 loading file None/best-model.pt\n",
+      "2021-09-21 20:34:09,814 epoch 8 - iter 4/7 - loss 0.08351998 - samples/sec: 13.34 - lr: 0.010000\n",
+      "2021-09-21 20:34:09,880 epoch 8 - iter 5/7 - loss 0.06726910 - samples/sec: 15.27 - lr: 0.010000\n",
+      "2021-09-21 20:34:09,936 epoch 8 - iter 6/7 - loss 0.05713570 - samples/sec: 17.99 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,068 epoch 8 - iter 7/7 - loss 0.04917854 - samples/sec: 7.56 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:10,070 EPOCH 8 done: loss 0.0492 - lr 0.0100000\n",
+      "2021-09-21 20:34:10,222 DEV : loss 0.7116144299507141 - score 0.0\n",
+      "2021-09-21 20:34:10,222 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:34:10,295 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:10,398 epoch 9 - iter 1/7 - loss 0.00289940 - samples/sec: 14.55 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,474 epoch 9 - iter 2/7 - loss 0.00262976 - samples/sec: 13.19 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,535 epoch 9 - iter 3/7 - loss 0.00262826 - samples/sec: 16.81 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,603 epoch 9 - iter 4/7 - loss 0.00493341 - samples/sec: 14.66 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,748 epoch 9 - iter 5/7 - loss 0.00421651 - samples/sec: 6.94 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,902 epoch 9 - iter 6/7 - loss 0.00431328 - samples/sec: 6.49 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,978 epoch 9 - iter 7/7 - loss 0.00378934 - samples/sec: 13.21 - lr: 0.010000\n",
+      "2021-09-21 20:34:10,979 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:10,980 EPOCH 9 done: loss 0.0038 - lr 0.0100000\n",
+      "2021-09-21 20:34:11,134 DEV : loss 0.7323412299156189 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:34:11,135 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:34:11,208 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:11,384 epoch 10 - iter 1/7 - loss 0.00085389 - samples/sec: 6.82 - lr: 0.005000\n",
+      "2021-09-21 20:34:11,450 epoch 10 - iter 2/7 - loss 0.00141379 - samples/sec: 15.33 - lr: 0.005000\n",
+      "2021-09-21 20:34:11,515 epoch 10 - iter 3/7 - loss 0.00284601 - samples/sec: 15.42 - lr: 0.005000\n",
+      "2021-09-21 20:34:11,581 epoch 10 - iter 4/7 - loss 0.00226285 - samples/sec: 15.17 - lr: 0.005000\n",
+      "2021-09-21 20:34:11,656 epoch 10 - iter 5/7 - loss 0.00230954 - samples/sec: 13.49 - lr: 0.005000\n",
+      "2021-09-21 20:34:11,820 epoch 10 - iter 6/7 - loss 0.00238247 - samples/sec: 6.12 - lr: 0.005000\n",
+      "2021-09-21 20:34:11,881 epoch 10 - iter 7/7 - loss 0.00239568 - samples/sec: 16.35 - lr: 0.005000\n",
+      "2021-09-21 20:34:11,882 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:11,883 EPOCH 10 done: loss 0.0024 - lr 0.0050000\n",
+      "2021-09-21 20:34:12,027 DEV : loss 0.741042971611023 - score 0.0\n",
+      "2021-09-21 20:34:12,030 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:16,629 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:16,629 Testing using best model ...\n",
+      "2021-09-21 20:34:16,653 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:28:43,829 \t1.0\n",
-      "2021-09-08 14:28:43,830 \n",
+      "2021-09-21 20:34:23,488 \t0.0\n",
+      "2021-09-21 20:34:23,488 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.125\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                                                                  precision    recall  f1-score   support\n",
       "\n",
+      "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
       "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
       "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
       "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
       "                 an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
-      "                          the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         0\n",
       "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
-      "                             a machine for performing calculations automatically     1.0000    1.0000    1.0000         1\n",
+      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                       micro avg     1.0000    1.0000    1.0000         1\n",
-      "                                                                       macro avg     0.1250    0.1250    0.1250         1\n",
-      "                                                                    weighted avg     1.0000    1.0000    1.0000         1\n",
-      "                                                                     samples avg     1.0000    1.0000    1.0000         1\n",
+      "                                                                       micro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                       macro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                    weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                     samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 14:28:43,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:28:57,999 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:34:23,488 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:38,465 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:29:01,831 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:34:42,370 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 15613.97it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 15686.97it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:29:01,833 [b'a social unit living together', b'a particular branch of scientific knowledge', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the study of government of states and other political units', b'an activity that is diverting and that holds the attention']\n",
-      "2021-09-08 14:29:01,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:01,842 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:34:42,372 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units', b'a machine for performing calculations automatically']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:34:46,408 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,411 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6452,215 +6495,201 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:29:01,843 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:01,843 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 14:29:01,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:01,844 Parameters:\n",
-      "2021-09-08 14:29:01,844  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:29:01,844  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:29:01,845  - patience: \"3\"\n",
-      "2021-09-08 14:29:01,845  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:29:01,845  - max_epochs: \"10\"\n",
-      "2021-09-08 14:29:01,845  - shuffle: \"True\"\n",
-      "2021-09-08 14:29:01,846  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:29:01,846  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:29:01,846 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:01,847 Model training base path: \"None\"\n",
-      "2021-09-08 14:29:01,847 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:01,847 Device: cuda:1\n",
-      "2021-09-08 14:29:01,847 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:01,848 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:29:01,854 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:01,933 epoch 1 - iter 1/7 - loss 0.40138900 - samples/sec: 15.52 - lr: 0.020000\n",
-      "2021-09-08 14:29:01,979 epoch 1 - iter 2/7 - loss 0.20187925 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 14:29:02,032 epoch 1 - iter 3/7 - loss 0.19758446 - samples/sec: 19.06 - lr: 0.020000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 14:29:02,093 epoch 1 - iter 4/7 - loss 0.54380401 - samples/sec: 16.60 - lr: 0.020000\n",
-      "2021-09-08 14:29:02,137 epoch 1 - iter 5/7 - loss 0.43762893 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 14:29:02,184 epoch 1 - iter 6/7 - loss 0.42444283 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 14:29:02,235 epoch 1 - iter 7/7 - loss 0.52829811 - samples/sec: 19.49 - lr: 0.020000\n",
-      "2021-09-08 14:29:02,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:02,236 EPOCH 1 done: loss 0.5283 - lr 0.0200000\n",
-      "2021-09-08 14:29:02,304 DEV : loss 0.3657345473766327 - score 0.0\n",
-      "2021-09-08 14:29:02,305 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:29:07,308 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:07,386 epoch 2 - iter 1/7 - loss 1.00454533 - samples/sec: 15.91 - lr: 0.020000\n",
-      "2021-09-08 14:29:07,432 epoch 2 - iter 2/7 - loss 0.50986428 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:29:07,478 epoch 2 - iter 3/7 - loss 0.48883919 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 14:29:07,534 epoch 2 - iter 4/7 - loss 0.40425430 - samples/sec: 18.19 - lr: 0.020000\n",
-      "2021-09-08 14:29:07,585 epoch 2 - iter 5/7 - loss 0.43637654 - samples/sec: 19.50 - lr: 0.020000\n",
-      "2021-09-08 14:29:07,633 epoch 2 - iter 6/7 - loss 0.36997697 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 14:29:07,701 epoch 2 - iter 7/7 - loss 0.34341349 - samples/sec: 14.86 - lr: 0.020000\n",
-      "2021-09-08 14:29:07,701 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:07,702 EPOCH 2 done: loss 0.3434 - lr 0.0200000\n",
-      "2021-09-08 14:29:07,768 DEV : loss 0.1871633529663086 - score 0.0\n",
-      "2021-09-08 14:29:07,768 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:34:46,411 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,412 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:34:46,412 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,412 Parameters:\n",
+      "2021-09-21 20:34:46,412  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:34:46,413  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:34:46,413  - patience: \"3\"\n",
+      "2021-09-21 20:34:46,413  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:34:46,414  - max_epochs: \"10\"\n",
+      "2021-09-21 20:34:46,414  - shuffle: \"True\"\n",
+      "2021-09-21 20:34:46,414  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:34:46,414  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:34:46,415 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,415 Model training base path: \"None\"\n",
+      "2021-09-21 20:34:46,415 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,416 Device: cuda:0\n",
+      "2021-09-21 20:34:46,416 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,416 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:34:46,470 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,570 epoch 1 - iter 1/7 - loss 0.59465718 - samples/sec: 14.76 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,694 epoch 1 - iter 2/7 - loss 0.30281668 - samples/sec: 8.09 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,794 epoch 1 - iter 3/7 - loss 0.21068096 - samples/sec: 10.05 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,867 epoch 1 - iter 4/7 - loss 0.58255467 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 20:34:46,933 epoch 1 - iter 5/7 - loss 0.46637367 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,001 epoch 1 - iter 6/7 - loss 0.56418889 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,065 epoch 1 - iter 7/7 - loss 0.50542211 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 20:34:47,066 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:47,067 EPOCH 1 done: loss 0.5054 - lr 0.0200000\n",
+      "2021-09-21 20:34:47,221 DEV : loss 0.0019532013684511185 - score 0.0\n",
+      "2021-09-21 20:34:47,221 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:29:13,602 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:13,664 epoch 3 - iter 1/7 - loss 0.00360004 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 14:29:13,718 epoch 3 - iter 2/7 - loss 0.08167894 - samples/sec: 18.57 - lr: 0.020000\n",
-      "2021-09-08 14:29:13,793 epoch 3 - iter 3/7 - loss 0.11051930 - samples/sec: 13.35 - lr: 0.020000\n",
-      "2021-09-08 14:29:13,849 epoch 3 - iter 4/7 - loss 0.35335550 - samples/sec: 18.17 - lr: 0.020000\n",
-      "2021-09-08 14:29:13,895 epoch 3 - iter 5/7 - loss 0.39980256 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 14:29:13,939 epoch 3 - iter 6/7 - loss 0.33400591 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 14:29:13,987 epoch 3 - iter 7/7 - loss 0.28721175 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 14:29:13,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:13,989 EPOCH 3 done: loss 0.2872 - lr 0.0200000\n",
-      "2021-09-08 14:29:14,056 DEV : loss 0.389532208442688 - score 0.0\n",
-      "2021-09-08 14:29:14,057 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:29:14,059 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:14,139 epoch 4 - iter 1/7 - loss 0.06215905 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 14:29:14,183 epoch 4 - iter 2/7 - loss 0.03385761 - samples/sec: 22.74 - lr: 0.020000\n",
-      "2021-09-08 14:29:14,239 epoch 4 - iter 3/7 - loss 0.10696658 - samples/sec: 18.07 - lr: 0.020000\n",
-      "2021-09-08 14:29:14,283 epoch 4 - iter 4/7 - loss 0.08285861 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 14:29:14,334 epoch 4 - iter 5/7 - loss 0.12523969 - samples/sec: 19.56 - lr: 0.020000\n",
-      "2021-09-08 14:29:14,380 epoch 4 - iter 6/7 - loss 0.14412044 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 14:29:14,427 epoch 4 - iter 7/7 - loss 0.12424086 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 14:29:14,428 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:14,428 EPOCH 4 done: loss 0.1242 - lr 0.0200000\n",
-      "2021-09-08 14:29:14,496 DEV : loss 0.028536219149827957 - score 0.0\n",
-      "2021-09-08 14:29:14,497 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:29:18,726 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:18,789 epoch 5 - iter 1/7 - loss 0.05347587 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 14:29:18,834 epoch 5 - iter 2/7 - loss 0.02771138 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:29:18,881 epoch 5 - iter 3/7 - loss 0.01897953 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 14:29:18,944 epoch 5 - iter 4/7 - loss 0.06060350 - samples/sec: 16.15 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,012 epoch 5 - iter 5/7 - loss 0.04971230 - samples/sec: 14.79 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,060 epoch 5 - iter 6/7 - loss 0.04368192 - samples/sec: 20.79 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,112 epoch 5 - iter 7/7 - loss 0.04459234 - samples/sec: 19.51 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:19,113 EPOCH 5 done: loss 0.0446 - lr 0.0200000\n",
-      "2021-09-08 14:29:19,183 DEV : loss 0.9640219211578369 - score 0.0\n",
-      "2021-09-08 14:29:19,183 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:29:19,185 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:19,262 epoch 6 - iter 1/7 - loss 0.00090630 - samples/sec: 15.90 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,305 epoch 6 - iter 2/7 - loss 0.00207203 - samples/sec: 23.55 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,352 epoch 6 - iter 3/7 - loss 0.00174541 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,408 epoch 6 - iter 4/7 - loss 0.19879550 - samples/sec: 17.68 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,456 epoch 6 - iter 5/7 - loss 0.15926016 - samples/sec: 21.13 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,500 epoch 6 - iter 6/7 - loss 0.13290947 - samples/sec: 23.17 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,544 epoch 6 - iter 7/7 - loss 0.11403081 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,545 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:19,545 EPOCH 6 done: loss 0.1140 - lr 0.0200000\n",
-      "2021-09-08 14:29:19,612 DEV : loss 0.46846604347229004 - score 0.0\n",
-      "2021-09-08 14:29:19,613 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:29:19,615 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:19,681 epoch 7 - iter 1/7 - loss 0.01207837 - samples/sec: 19.20 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,725 epoch 7 - iter 2/7 - loss 0.00619270 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,768 epoch 7 - iter 3/7 - loss 0.00480401 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,816 epoch 7 - iter 4/7 - loss 0.00409597 - samples/sec: 21.06 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,859 epoch 7 - iter 5/7 - loss 0.00341741 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,921 epoch 7 - iter 6/7 - loss 0.00304073 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,969 epoch 7 - iter 7/7 - loss 0.00271534 - samples/sec: 20.94 - lr: 0.020000\n",
-      "2021-09-08 14:29:19,970 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:19,971 EPOCH 7 done: loss 0.0027 - lr 0.0200000\n",
-      "2021-09-08 14:29:20,037 DEV : loss 0.5424826145172119 - score 0.0\n",
-      "2021-09-08 14:29:20,038 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:29:20,040 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:20,109 epoch 8 - iter 1/7 - loss 0.67318273 - samples/sec: 17.88 - lr: 0.020000\n",
-      "2021-09-08 14:29:20,157 epoch 8 - iter 2/7 - loss 0.33894090 - samples/sec: 21.26 - lr: 0.020000\n",
-      "2021-09-08 14:29:20,200 epoch 8 - iter 3/7 - loss 0.22610556 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 14:29:20,244 epoch 8 - iter 4/7 - loss 0.16980080 - samples/sec: 23.15 - lr: 0.020000\n",
-      "2021-09-08 14:29:20,306 epoch 8 - iter 5/7 - loss 0.13642204 - samples/sec: 16.18 - lr: 0.020000\n",
-      "2021-09-08 14:29:20,354 epoch 8 - iter 6/7 - loss 0.11408444 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 14:29:20,397 epoch 8 - iter 7/7 - loss 0.09784194 - samples/sec: 23.59 - lr: 0.020000\n"
+      "2021-09-21 20:34:52,031 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:52,126 epoch 2 - iter 1/7 - loss 1.11346352 - samples/sec: 13.06 - lr: 0.020000\n",
+      "2021-09-21 20:34:52,200 epoch 2 - iter 2/7 - loss 0.55810604 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 20:34:52,311 epoch 2 - iter 3/7 - loss 0.37808764 - samples/sec: 9.05 - lr: 0.020000\n",
+      "2021-09-21 20:34:52,375 epoch 2 - iter 4/7 - loss 0.30100456 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 20:34:52,446 epoch 2 - iter 5/7 - loss 0.43593476 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 20:34:52,522 epoch 2 - iter 6/7 - loss 0.39315388 - samples/sec: 13.23 - lr: 0.020000\n",
+      "2021-09-21 20:34:52,599 epoch 2 - iter 7/7 - loss 0.34003094 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 20:34:52,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:52,600 EPOCH 2 done: loss 0.3400 - lr 0.0200000\n",
+      "2021-09-21 20:34:52,812 DEV : loss 0.023849936202168465 - score 0.0\n",
+      "2021-09-21 20:34:52,815 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:52,896 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:52,993 epoch 3 - iter 1/7 - loss 0.03183550 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 20:34:53,072 epoch 3 - iter 2/7 - loss 0.01724497 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 20:34:53,158 epoch 3 - iter 3/7 - loss 0.21564114 - samples/sec: 11.70 - lr: 0.020000\n",
+      "2021-09-21 20:34:53,223 epoch 3 - iter 4/7 - loss 0.17966019 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 20:34:53,293 epoch 3 - iter 5/7 - loss 0.14391324 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 20:34:53,374 epoch 3 - iter 6/7 - loss 0.12105532 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 20:34:53,467 epoch 3 - iter 7/7 - loss 0.10394320 - samples/sec: 10.79 - lr: 0.020000\n",
+      "2021-09-21 20:34:53,468 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:53,468 EPOCH 3 done: loss 0.1039 - lr 0.0200000\n",
+      "2021-09-21 20:34:53,898 DEV : loss 0.01727244444191456 - score 0.0\n",
+      "2021-09-21 20:34:53,899 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:34:53,901 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:54,023 epoch 4 - iter 1/7 - loss 0.02790701 - samples/sec: 11.21 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,117 epoch 4 - iter 2/7 - loss 0.01477280 - samples/sec: 10.75 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,196 epoch 4 - iter 3/7 - loss 0.01027889 - samples/sec: 12.67 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,320 epoch 4 - iter 4/7 - loss 0.00839797 - samples/sec: 8.10 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,407 epoch 4 - iter 5/7 - loss 0.06744063 - samples/sec: 11.56 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,487 epoch 4 - iter 6/7 - loss 0.05625989 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,554 epoch 4 - iter 7/7 - loss 0.21379876 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,555 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:54,555 EPOCH 4 done: loss 0.2138 - lr 0.0200000\n",
+      "2021-09-21 20:34:54,689 DEV : loss 0.03472405672073364 - score 0.0\n",
+      "2021-09-21 20:34:54,690 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:34:54,692 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:54,863 epoch 5 - iter 1/7 - loss 0.00529351 - samples/sec: 8.60 - lr: 0.020000\n",
+      "2021-09-21 20:34:54,935 epoch 5 - iter 2/7 - loss 0.00496461 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 20:34:55,029 epoch 5 - iter 3/7 - loss 0.00441299 - samples/sec: 10.73 - lr: 0.020000\n",
+      "2021-09-21 20:34:55,110 epoch 5 - iter 4/7 - loss 0.00476172 - samples/sec: 12.38 - lr: 0.020000\n",
+      "2021-09-21 20:34:55,175 epoch 5 - iter 5/7 - loss 0.00429323 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 20:34:55,260 epoch 5 - iter 6/7 - loss 0.00463505 - samples/sec: 11.81 - lr: 0.020000\n",
+      "2021-09-21 20:34:55,331 epoch 5 - iter 7/7 - loss 0.00404171 - samples/sec: 14.13 - lr: 0.020000\n",
+      "2021-09-21 20:34:55,332 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:55,333 EPOCH 5 done: loss 0.0040 - lr 0.0200000\n",
+      "2021-09-21 20:34:55,476 DEV : loss 0.03395191580057144 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:34:55,478 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:34:55,481 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:55,620 epoch 6 - iter 1/7 - loss 0.00096280 - samples/sec: 10.53 - lr: 0.010000\n",
+      "2021-09-21 20:34:55,692 epoch 6 - iter 2/7 - loss 0.00277676 - samples/sec: 14.06 - lr: 0.010000\n",
+      "2021-09-21 20:34:55,767 epoch 6 - iter 3/7 - loss 0.00300778 - samples/sec: 13.37 - lr: 0.010000\n",
+      "2021-09-21 20:34:55,865 epoch 6 - iter 4/7 - loss 0.00297910 - samples/sec: 10.22 - lr: 0.010000\n",
+      "2021-09-21 20:34:55,941 epoch 6 - iter 5/7 - loss 0.23381174 - samples/sec: 13.24 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,020 epoch 6 - iter 6/7 - loss 0.19564321 - samples/sec: 12.83 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,173 epoch 6 - iter 7/7 - loss 0.16780992 - samples/sec: 6.53 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,175 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:56,175 EPOCH 6 done: loss 0.1678 - lr 0.0100000\n",
+      "2021-09-21 20:34:56,317 DEV : loss 0.08503717184066772 - score 0.0\n",
+      "2021-09-21 20:34:56,320 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:56,322 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:56,436 epoch 7 - iter 1/7 - loss 0.00323993 - samples/sec: 13.26 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:29:20,398 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:20,398 EPOCH 8 done: loss 0.0978 - lr 0.0200000\n",
-      "2021-09-08 14:29:20,466 DEV : loss 0.4277067184448242 - score 0.0\n",
-      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 14:29:20,467 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 14:29:20,469 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:20,545 epoch 9 - iter 1/7 - loss 0.00267604 - samples/sec: 15.80 - lr: 0.010000\n",
-      "2021-09-08 14:29:20,598 epoch 9 - iter 2/7 - loss 0.00657130 - samples/sec: 19.28 - lr: 0.010000\n",
-      "2021-09-08 14:29:20,640 epoch 9 - iter 3/7 - loss 0.00467471 - samples/sec: 23.72 - lr: 0.010000\n",
-      "2021-09-08 14:29:20,687 epoch 9 - iter 4/7 - loss 0.00392440 - samples/sec: 21.65 - lr: 0.010000\n",
-      "2021-09-08 14:29:20,734 epoch 9 - iter 5/7 - loss 0.00331032 - samples/sec: 21.17 - lr: 0.010000\n",
-      "2021-09-08 14:29:20,778 epoch 9 - iter 6/7 - loss 0.00286435 - samples/sec: 23.18 - lr: 0.010000\n",
-      "2021-09-08 14:29:20,824 epoch 9 - iter 7/7 - loss 0.00255513 - samples/sec: 22.12 - lr: 0.010000\n",
-      "2021-09-08 14:29:20,825 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:20,825 EPOCH 9 done: loss 0.0026 - lr 0.0100000\n",
-      "2021-09-08 14:29:20,891 DEV : loss 0.4624103903770447 - score 0.0\n",
-      "2021-09-08 14:29:20,891 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:29:20,893 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:20,959 epoch 10 - iter 1/7 - loss 0.00672920 - samples/sec: 19.09 - lr: 0.010000\n",
-      "2021-09-08 14:29:21,003 epoch 10 - iter 2/7 - loss 0.00358445 - samples/sec: 22.87 - lr: 0.010000\n",
-      "2021-09-08 14:29:21,046 epoch 10 - iter 3/7 - loss 0.00320440 - samples/sec: 23.70 - lr: 0.010000\n",
-      "2021-09-08 14:29:21,089 epoch 10 - iter 4/7 - loss 0.00261822 - samples/sec: 23.25 - lr: 0.010000\n",
-      "2021-09-08 14:29:21,152 epoch 10 - iter 5/7 - loss 0.00230673 - samples/sec: 16.16 - lr: 0.010000\n",
-      "2021-09-08 14:29:21,200 epoch 10 - iter 6/7 - loss 0.00216205 - samples/sec: 20.66 - lr: 0.010000\n",
-      "2021-09-08 14:29:21,248 epoch 10 - iter 7/7 - loss 0.00194281 - samples/sec: 21.06 - lr: 0.010000\n",
-      "2021-09-08 14:29:21,249 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:21,250 EPOCH 10 done: loss 0.0019 - lr 0.0100000\n",
-      "2021-09-08 14:29:21,315 DEV : loss 0.4836142063140869 - score 0.0\n",
-      "2021-09-08 14:29:21,316 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:29:26,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:26,254 Testing using best model ...\n",
-      "2021-09-08 14:29:26,256 loading file None/best-model.pt\n",
+      "2021-09-21 20:34:56,495 epoch 7 - iter 2/7 - loss 0.00277481 - samples/sec: 16.84 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,601 epoch 7 - iter 3/7 - loss 0.00322597 - samples/sec: 9.53 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,679 epoch 7 - iter 4/7 - loss 0.00694969 - samples/sec: 12.93 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,755 epoch 7 - iter 5/7 - loss 0.00572111 - samples/sec: 13.19 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,850 epoch 7 - iter 6/7 - loss 0.00495721 - samples/sec: 10.56 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,955 epoch 7 - iter 7/7 - loss 0.00440587 - samples/sec: 9.53 - lr: 0.010000\n",
+      "2021-09-21 20:34:56,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:56,957 EPOCH 7 done: loss 0.0044 - lr 0.0100000\n",
+      "2021-09-21 20:34:57,143 DEV : loss 0.12074557691812515 - score 0.0\n",
+      "2021-09-21 20:34:57,145 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:34:57,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:57,333 epoch 8 - iter 1/7 - loss 0.00088568 - samples/sec: 8.69 - lr: 0.010000\n",
+      "2021-09-21 20:34:57,412 epoch 8 - iter 2/7 - loss 0.00085666 - samples/sec: 12.66 - lr: 0.010000\n",
+      "2021-09-21 20:34:57,481 epoch 8 - iter 3/7 - loss 0.00084891 - samples/sec: 14.64 - lr: 0.010000\n",
+      "2021-09-21 20:34:57,575 epoch 8 - iter 4/7 - loss 0.00284520 - samples/sec: 10.70 - lr: 0.010000\n",
+      "2021-09-21 20:34:57,640 epoch 8 - iter 5/7 - loss 0.00249406 - samples/sec: 15.36 - lr: 0.010000\n",
+      "2021-09-21 20:34:57,715 epoch 8 - iter 6/7 - loss 0.00310325 - samples/sec: 13.41 - lr: 0.010000\n",
+      "2021-09-21 20:34:57,813 epoch 8 - iter 7/7 - loss 0.00300921 - samples/sec: 10.32 - lr: 0.010000\n",
+      "2021-09-21 20:34:57,814 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:57,814 EPOCH 8 done: loss 0.0030 - lr 0.0100000\n",
+      "2021-09-21 20:34:57,954 DEV : loss 0.14459453523159027 - score 0.0\n",
+      "2021-09-21 20:34:57,956 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:34:57,958 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:58,141 epoch 9 - iter 1/7 - loss 0.00127142 - samples/sec: 7.76 - lr: 0.010000\n",
+      "2021-09-21 20:34:58,234 epoch 9 - iter 2/7 - loss 0.00151073 - samples/sec: 10.85 - lr: 0.010000\n",
+      "2021-09-21 20:34:58,303 epoch 9 - iter 3/7 - loss 0.00111392 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 20:34:58,373 epoch 9 - iter 4/7 - loss 0.00117110 - samples/sec: 14.23 - lr: 0.010000\n",
+      "2021-09-21 20:34:58,444 epoch 9 - iter 5/7 - loss 0.00115144 - samples/sec: 14.17 - lr: 0.010000\n",
+      "2021-09-21 20:34:58,522 epoch 9 - iter 6/7 - loss 0.00159079 - samples/sec: 12.89 - lr: 0.010000\n",
+      "2021-09-21 20:34:58,659 epoch 9 - iter 7/7 - loss 0.00150080 - samples/sec: 7.36 - lr: 0.010000\n",
+      "2021-09-21 20:34:58,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:58,660 EPOCH 9 done: loss 0.0015 - lr 0.0100000\n",
+      "2021-09-21 20:34:58,799 DEV : loss 0.15576393902301788 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:34:58,804 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:34:58,808 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:58,964 epoch 10 - iter 1/7 - loss 0.00259001 - samples/sec: 11.21 - lr: 0.005000\n",
+      "2021-09-21 20:34:59,040 epoch 10 - iter 2/7 - loss 0.00220909 - samples/sec: 13.29 - lr: 0.005000\n",
+      "2021-09-21 20:34:59,128 epoch 10 - iter 3/7 - loss 0.00159900 - samples/sec: 11.40 - lr: 0.005000\n",
+      "2021-09-21 20:34:59,213 epoch 10 - iter 4/7 - loss 0.00134033 - samples/sec: 11.85 - lr: 0.005000\n",
+      "2021-09-21 20:34:59,299 epoch 10 - iter 5/7 - loss 0.00125474 - samples/sec: 11.65 - lr: 0.005000\n",
+      "2021-09-21 20:34:59,371 epoch 10 - iter 6/7 - loss 0.00130302 - samples/sec: 13.89 - lr: 0.005000\n",
+      "2021-09-21 20:34:59,449 epoch 10 - iter 7/7 - loss 0.00152103 - samples/sec: 12.99 - lr: 0.005000\n",
+      "2021-09-21 20:34:59,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:59,450 EPOCH 10 done: loss 0.0015 - lr 0.0050000\n",
+      "2021-09-21 20:34:59,608 DEV : loss 0.1562950611114502 - score 0.0\n",
+      "2021-09-21 20:34:59,612 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:35:08,831 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:08,832 Testing using best model ...\n",
+      "2021-09-21 20:35:08,833 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:29:30,849 \t1.0\n",
-      "2021-09-08 14:29:30,850 \n",
+      "2021-09-21 20:35:13,891 \t0.0\n",
+      "2021-09-21 20:35:13,892 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.125\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
-      "                                                                                  precision    recall  f1-score   support\n",
+      "                                                                 precision    recall  f1-score   support\n",
       "\n",
-      "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
-      "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
-      "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
-      "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
-      "                 an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
-      "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
-      "                      an activity that is diverting and that holds the attention     1.0000    1.0000    1.0000         1\n",
+      "                                  a social unit living together     0.0000    0.0000    0.0000         0\n",
+      "                    a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
+      "     an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                 knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
+      "an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
+      "         the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         0\n",
+      "    the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
+      "            a machine for performing calculations automatically     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                       micro avg     1.0000    1.0000    1.0000         1\n",
-      "                                                                       macro avg     0.1250    0.1250    0.1250         1\n",
-      "                                                                    weighted avg     1.0000    1.0000    1.0000         1\n",
-      "                                                                     samples avg     1.0000    1.0000    1.0000         1\n",
+      "                                                      micro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                      macro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                   weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                                                    samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 14:29:30,850 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:45,065 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:35:13,892 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:37,875 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:29:48,942 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:35:42,164 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 12300.01it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 8044.70it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:29:48,944 [b'a social unit living together', b'a particular branch of scientific knowledge', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units', b'an activity that is diverting and that holds the attention']\n",
-      "2021-09-08 14:29:48,952 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:48,954 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:35:42,167 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'the commercial activity of providing funds and capital', b'an extended social group having a distinctive cultural and economic organization']\n",
+      "2021-09-21 20:35:42,176 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:42,178 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6973,27 +7002,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:29:48,954 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:48,955 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 14:29:48,955 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:48,955 Parameters:\n",
-      "2021-09-08 14:29:48,956  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:29:48,956  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:29:48,956  - patience: \"3\"\n",
-      "2021-09-08 14:29:48,956  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:29:48,957  - max_epochs: \"10\"\n",
-      "2021-09-08 14:29:48,957  - shuffle: \"True\"\n",
-      "2021-09-08 14:29:48,957  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:29:48,957  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:29:48,958 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:48,958 Model training base path: \"None\"\n",
-      "2021-09-08 14:29:48,958 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:48,959 Device: cuda:1\n",
-      "2021-09-08 14:29:48,959 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:48,959 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:29:48,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:49,019 epoch 1 - iter 1/7 - loss 1.33217573 - samples/sec: 25.40 - lr: 0.020000\n",
-      "2021-09-08 14:29:49,065 epoch 1 - iter 2/7 - loss 1.04346153 - samples/sec: 21.87 - lr: 0.020000\n"
+      "2021-09-21 20:35:42,178 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:42,179 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:35:42,179 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:42,179 Parameters:\n",
+      "2021-09-21 20:35:42,180  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:35:42,180  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:35:42,180  - patience: \"3\"\n",
+      "2021-09-21 20:35:42,180  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:35:42,181  - max_epochs: \"10\"\n",
+      "2021-09-21 20:35:42,181  - shuffle: \"True\"\n",
+      "2021-09-21 20:35:42,181  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:35:42,181  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:35:42,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:42,182 Model training base path: \"None\"\n",
+      "2021-09-21 20:35:42,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:42,183 Device: cuda:0\n",
+      "2021-09-21 20:35:42,183 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:42,183 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:35:42,192 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
@@ -7007,139 +7034,144 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:29:49,225 epoch 1 - iter 3/7 - loss 1.12601644 - samples/sec: 6.29 - lr: 0.020000\n",
-      "2021-09-08 14:29:49,285 epoch 1 - iter 4/7 - loss 0.86405764 - samples/sec: 16.67 - lr: 0.020000\n",
-      "2021-09-08 14:29:49,331 epoch 1 - iter 5/7 - loss 0.83132784 - samples/sec: 21.79 - lr: 0.020000\n",
-      "2021-09-08 14:29:49,378 epoch 1 - iter 6/7 - loss 0.78393834 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 14:29:49,424 epoch 1 - iter 7/7 - loss 0.68093563 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 14:29:49,425 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:49,425 EPOCH 1 done: loss 0.6809 - lr 0.0200000\n",
-      "2021-09-08 14:29:49,455 DEV : loss 0.09088127315044403 - score 0.0\n",
-      "2021-09-08 14:29:49,456 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:35:42,404 epoch 1 - iter 1/7 - loss 0.36486828 - samples/sec: 6.77 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,487 epoch 1 - iter 2/7 - loss 0.60881862 - samples/sec: 12.13 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,555 epoch 1 - iter 3/7 - loss 0.59108734 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,750 epoch 1 - iter 4/7 - loss 0.57203318 - samples/sec: 5.13 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,843 epoch 1 - iter 5/7 - loss 0.59602327 - samples/sec: 10.85 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,925 epoch 1 - iter 6/7 - loss 0.49839330 - samples/sec: 12.22 - lr: 0.020000\n",
+      "2021-09-21 20:35:43,000 epoch 1 - iter 7/7 - loss 0.47732071 - samples/sec: 13.49 - lr: 0.020000\n",
+      "2021-09-21 20:35:43,001 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:43,001 EPOCH 1 done: loss 0.4773 - lr 0.0200000\n",
+      "2021-09-21 20:35:43,055 DEV : loss 0.24357819557189941 - score 0.0\n",
+      "2021-09-21 20:35:43,055 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:29:53,838 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:53,901 epoch 2 - iter 1/7 - loss 0.10567222 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 14:29:53,964 epoch 2 - iter 2/7 - loss 0.05595080 - samples/sec: 15.96 - lr: 0.020000\n",
-      "2021-09-08 14:29:54,006 epoch 2 - iter 3/7 - loss 0.05166466 - samples/sec: 23.72 - lr: 0.020000\n",
-      "2021-09-08 14:29:54,054 epoch 2 - iter 4/7 - loss 0.10307200 - samples/sec: 21.36 - lr: 0.020000\n",
-      "2021-09-08 14:29:54,100 epoch 2 - iter 5/7 - loss 0.20323566 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 14:29:54,244 epoch 2 - iter 6/7 - loss 0.52896697 - samples/sec: 7.00 - lr: 0.020000\n",
-      "2021-09-08 14:29:54,287 epoch 2 - iter 7/7 - loss 0.45693538 - samples/sec: 23.41 - lr: 0.020000\n",
-      "2021-09-08 14:29:54,288 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:29:54,288 EPOCH 2 done: loss 0.4569 - lr 0.0200000\n",
-      "2021-09-08 14:29:54,316 DEV : loss 0.08326512575149536 - score 0.0\n",
-      "2021-09-08 14:29:54,317 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:35:49,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:49,647 epoch 2 - iter 1/7 - loss 0.46660426 - samples/sec: 10.34 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,715 epoch 2 - iter 2/7 - loss 0.30804135 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,784 epoch 2 - iter 3/7 - loss 0.25708152 - samples/sec: 14.58 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,903 epoch 2 - iter 4/7 - loss 0.24637261 - samples/sec: 8.41 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,965 epoch 2 - iter 5/7 - loss 0.19804951 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,036 epoch 2 - iter 6/7 - loss 0.17678630 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,231 epoch 2 - iter 7/7 - loss 0.27873159 - samples/sec: 5.13 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,232 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:50,232 EPOCH 2 done: loss 0.2787 - lr 0.0200000\n",
+      "2021-09-21 20:35:50,297 DEV : loss 0.14742666482925415 - score 0.0\n",
+      "2021-09-21 20:35:50,298 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:30:01,403 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:01,465 epoch 3 - iter 1/7 - loss 0.38804114 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 14:30:01,529 epoch 3 - iter 2/7 - loss 0.19807345 - samples/sec: 15.85 - lr: 0.020000\n",
-      "2021-09-08 14:30:01,572 epoch 3 - iter 3/7 - loss 0.13277845 - samples/sec: 23.46 - lr: 0.020000\n",
-      "2021-09-08 14:30:01,619 epoch 3 - iter 4/7 - loss 0.34220520 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 14:30:01,666 epoch 3 - iter 5/7 - loss 0.31826577 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 14:30:01,709 epoch 3 - iter 6/7 - loss 0.26554319 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 14:30:01,853 epoch 3 - iter 7/7 - loss 0.33430845 - samples/sec: 6.97 - lr: 0.020000\n",
-      "2021-09-08 14:30:01,854 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:01,854 EPOCH 3 done: loss 0.3343 - lr 0.0200000\n",
-      "2021-09-08 14:30:01,891 DEV : loss 0.3083532154560089 - score 0.0\n",
-      "2021-09-08 14:30:01,892 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:30:01,897 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:01,954 epoch 4 - iter 1/7 - loss 0.02304730 - samples/sec: 23.35 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,010 epoch 4 - iter 2/7 - loss 0.01329350 - samples/sec: 17.89 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,055 epoch 4 - iter 3/7 - loss 0.00970748 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,100 epoch 4 - iter 4/7 - loss 0.00901405 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,144 epoch 4 - iter 5/7 - loss 0.00774302 - samples/sec: 23.07 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,289 epoch 4 - iter 6/7 - loss 0.10924857 - samples/sec: 6.95 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,335 epoch 4 - iter 7/7 - loss 0.09695714 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,336 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:02,336 EPOCH 4 done: loss 0.0970 - lr 0.0200000\n",
-      "2021-09-08 14:30:02,368 DEV : loss 0.43126291036605835 - score 0.0\n",
-      "2021-09-08 14:30:02,369 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:30:02,371 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:02,525 epoch 5 - iter 1/7 - loss 0.02285624 - samples/sec: 7.12 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,569 epoch 5 - iter 2/7 - loss 0.01317768 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,626 epoch 5 - iter 3/7 - loss 0.01000344 - samples/sec: 17.90 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,672 epoch 5 - iter 4/7 - loss 0.03483940 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,719 epoch 5 - iter 5/7 - loss 0.32659658 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,762 epoch 5 - iter 6/7 - loss 0.27246727 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,805 epoch 5 - iter 7/7 - loss 0.23577400 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:02,806 EPOCH 5 done: loss 0.2358 - lr 0.0200000\n",
-      "2021-09-08 14:30:02,839 DEV : loss 0.3701602518558502 - score 0.0\n",
-      "2021-09-08 14:30:02,839 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:30:02,844 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:02,901 epoch 6 - iter 1/7 - loss 0.00294488 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,944 epoch 6 - iter 2/7 - loss 0.00221916 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 14:30:02,990 epoch 6 - iter 3/7 - loss 0.15348149 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 14:30:03,045 epoch 6 - iter 4/7 - loss 0.11812232 - samples/sec: 18.28 - lr: 0.020000\n",
-      "2021-09-08 14:30:03,088 epoch 6 - iter 5/7 - loss 0.09549333 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 14:30:03,134 epoch 6 - iter 6/7 - loss 0.13608032 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 14:30:03,277 epoch 6 - iter 7/7 - loss 0.14068589 - samples/sec: 7.01 - lr: 0.020000\n",
-      "2021-09-08 14:30:03,278 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:03,279 EPOCH 6 done: loss 0.1407 - lr 0.0200000\n",
-      "2021-09-08 14:30:03,307 DEV : loss 0.3568868041038513 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 14:30:03,307 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 14:30:03,310 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:03,365 epoch 7 - iter 1/7 - loss 0.00973701 - samples/sec: 23.72 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,421 epoch 7 - iter 2/7 - loss 0.00545453 - samples/sec: 18.11 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,467 epoch 7 - iter 3/7 - loss 0.02953489 - samples/sec: 21.93 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,610 epoch 7 - iter 4/7 - loss 0.03055839 - samples/sec: 7.02 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,653 epoch 7 - iter 5/7 - loss 0.02496973 - samples/sec: 23.29 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,696 epoch 7 - iter 6/7 - loss 0.02095207 - samples/sec: 23.70 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,739 epoch 7 - iter 7/7 - loss 0.01817936 - samples/sec: 23.23 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,740 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:03,740 EPOCH 7 done: loss 0.0182 - lr 0.0100000\n",
-      "2021-09-08 14:30:03,770 DEV : loss 0.21856534481048584 - score 0.0\n",
-      "2021-09-08 14:30:03,771 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:30:03,773 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:03,829 epoch 8 - iter 1/7 - loss 0.00213942 - samples/sec: 23.77 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,872 epoch 8 - iter 2/7 - loss 0.00196139 - samples/sec: 23.49 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,915 epoch 8 - iter 3/7 - loss 0.00189436 - samples/sec: 23.46 - lr: 0.010000\n",
-      "2021-09-08 14:30:03,958 epoch 8 - iter 4/7 - loss 0.00179969 - samples/sec: 23.52 - lr: 0.010000\n",
-      "2021-09-08 14:30:04,096 epoch 8 - iter 5/7 - loss 0.00708402 - samples/sec: 7.23 - lr: 0.010000\n",
-      "2021-09-08 14:30:04,140 epoch 8 - iter 6/7 - loss 0.00788277 - samples/sec: 23.25 - lr: 0.010000\n"
+      "2021-09-21 20:35:56,438 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:56,515 epoch 3 - iter 1/7 - loss 0.08346557 - samples/sec: 17.08 - lr: 0.020000\n",
+      "2021-09-21 20:35:56,568 epoch 3 - iter 2/7 - loss 0.05112201 - samples/sec: 18.85 - lr: 0.020000\n",
+      "2021-09-21 20:35:56,714 epoch 3 - iter 3/7 - loss 0.17563728 - samples/sec: 6.90 - lr: 0.020000\n",
+      "2021-09-21 20:35:56,768 epoch 3 - iter 4/7 - loss 0.14239421 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 20:35:56,827 epoch 3 - iter 5/7 - loss 0.12287945 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 20:35:56,902 epoch 3 - iter 6/7 - loss 0.10333387 - samples/sec: 13.40 - lr: 0.020000\n",
+      "2021-09-21 20:35:56,967 epoch 3 - iter 7/7 - loss 0.09105063 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:35:56,968 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:56,968 EPOCH 3 done: loss 0.0911 - lr 0.0200000\n",
+      "2021-09-21 20:35:57,287 DEV : loss 0.007059692405164242 - score 0.0\n",
+      "2021-09-21 20:35:57,288 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:36:04,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:04,724 epoch 4 - iter 1/7 - loss 0.00513620 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 20:36:04,796 epoch 4 - iter 2/7 - loss 0.00347314 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 20:36:04,874 epoch 4 - iter 3/7 - loss 0.00282392 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,053 epoch 4 - iter 4/7 - loss 0.15978929 - samples/sec: 5.58 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,128 epoch 4 - iter 5/7 - loss 0.12803044 - samples/sec: 13.47 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,200 epoch 4 - iter 6/7 - loss 0.10821105 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,263 epoch 4 - iter 7/7 - loss 0.09288413 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,264 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:05,264 EPOCH 4 done: loss 0.0929 - lr 0.0200000\n",
+      "2021-09-21 20:36:05,325 DEV : loss 0.04912199825048447 - score 0.0\n",
+      "2021-09-21 20:36:05,325 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:36:05,327 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:05,447 epoch 5 - iter 1/7 - loss 0.00735244 - samples/sec: 13.21 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,513 epoch 5 - iter 2/7 - loss 0.00443687 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,582 epoch 5 - iter 3/7 - loss 0.00348654 - samples/sec: 14.41 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,750 epoch 5 - iter 4/7 - loss 0.13189388 - samples/sec: 5.99 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,825 epoch 5 - iter 5/7 - loss 0.10783697 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,895 epoch 5 - iter 6/7 - loss 0.09014301 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,979 epoch 5 - iter 7/7 - loss 0.07843755 - samples/sec: 11.90 - lr: 0.020000\n",
+      "2021-09-21 20:36:05,980 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:05,981 EPOCH 5 done: loss 0.0784 - lr 0.0200000\n",
+      "2021-09-21 20:36:06,034 DEV : loss 0.0020722555927932262 - score 0.0\n",
+      "2021-09-21 20:36:06,035 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:36:10,171 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:10,292 epoch 6 - iter 1/7 - loss 0.00314787 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 20:36:10,360 epoch 6 - iter 2/7 - loss 0.00268971 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 20:36:10,522 epoch 6 - iter 3/7 - loss 0.01105906 - samples/sec: 6.20 - lr: 0.020000\n",
+      "2021-09-21 20:36:10,598 epoch 6 - iter 4/7 - loss 0.00834522 - samples/sec: 13.28 - lr: 0.020000\n",
+      "2021-09-21 20:36:10,674 epoch 6 - iter 5/7 - loss 0.00690415 - samples/sec: 13.20 - lr: 0.020000\n",
+      "2021-09-21 20:36:10,758 epoch 6 - iter 6/7 - loss 0.00612400 - samples/sec: 11.94 - lr: 0.020000\n",
+      "2021-09-21 20:36:10,821 epoch 6 - iter 7/7 - loss 0.00536287 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 20:36:10,822 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:10,822 EPOCH 6 done: loss 0.0054 - lr 0.0200000\n",
+      "2021-09-21 20:36:10,882 DEV : loss 0.0014692895347252488 - score 0.0\n",
+      "2021-09-21 20:36:10,882 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:36:15,051 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:15,166 epoch 7 - iter 1/7 - loss 0.00295978 - samples/sec: 12.08 - lr: 0.020000\n",
+      "2021-09-21 20:36:15,231 epoch 7 - iter 2/7 - loss 0.00193643 - samples/sec: 15.55 - lr: 0.020000\n",
+      "2021-09-21 20:36:15,299 epoch 7 - iter 3/7 - loss 0.00154236 - samples/sec: 14.72 - lr: 0.020000\n",
+      "2021-09-21 20:36:15,369 epoch 7 - iter 4/7 - loss 0.00143357 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 20:36:15,533 epoch 7 - iter 5/7 - loss 0.00141956 - samples/sec: 6.14 - lr: 0.020000\n",
+      "2021-09-21 20:36:15,607 epoch 7 - iter 6/7 - loss 0.00125283 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 20:36:15,694 epoch 7 - iter 7/7 - loss 0.00134999 - samples/sec: 11.55 - lr: 0.020000\n",
+      "2021-09-21 20:36:15,695 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:15,696 EPOCH 7 done: loss 0.0013 - lr 0.0200000\n",
+      "2021-09-21 20:36:15,748 DEV : loss 0.0011885669082403183 - score 0.0\n",
+      "2021-09-21 20:36:15,749 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:36:20,015 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:20,090 epoch 8 - iter 1/7 - loss 0.00057664 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 20:36:20,144 epoch 8 - iter 2/7 - loss 0.00056257 - samples/sec: 18.87 - lr: 0.020000\n",
+      "2021-09-21 20:36:20,280 epoch 8 - iter 3/7 - loss 0.00092365 - samples/sec: 7.36 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:30:04,196 epoch 8 - iter 7/7 - loss 0.00689411 - samples/sec: 18.04 - lr: 0.010000\n",
-      "2021-09-08 14:30:04,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:04,197 EPOCH 8 done: loss 0.0069 - lr 0.0100000\n",
-      "2021-09-08 14:30:04,225 DEV : loss 0.07460449635982513 - score 0.0\n",
-      "2021-09-08 14:30:04,225 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:36:20,333 epoch 8 - iter 4/7 - loss 0.00077664 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 20:36:20,398 epoch 8 - iter 5/7 - loss 0.00102764 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 20:36:20,475 epoch 8 - iter 6/7 - loss 0.01708458 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 20:36:20,531 epoch 8 - iter 7/7 - loss 0.01472440 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 20:36:20,532 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:20,532 EPOCH 8 done: loss 0.0147 - lr 0.0200000\n",
+      "2021-09-21 20:36:20,578 DEV : loss 0.0008859297959133983 - score 0.0\n",
+      "2021-09-21 20:36:20,578 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:30:08,559 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:08,618 epoch 9 - iter 1/7 - loss 0.00143371 - samples/sec: 22.77 - lr: 0.010000\n",
-      "2021-09-08 14:30:08,681 epoch 9 - iter 2/7 - loss 0.00121380 - samples/sec: 15.98 - lr: 0.010000\n",
-      "2021-09-08 14:30:08,725 epoch 9 - iter 3/7 - loss 0.00129100 - samples/sec: 23.15 - lr: 0.010000\n",
-      "2021-09-08 14:30:08,768 epoch 9 - iter 4/7 - loss 0.00114494 - samples/sec: 23.59 - lr: 0.010000\n",
-      "2021-09-08 14:30:08,811 epoch 9 - iter 5/7 - loss 0.00115084 - samples/sec: 23.46 - lr: 0.010000\n",
-      "2021-09-08 14:30:08,854 epoch 9 - iter 6/7 - loss 0.00202255 - samples/sec: 23.19 - lr: 0.010000\n",
-      "2021-09-08 14:30:08,994 epoch 9 - iter 7/7 - loss 0.00267832 - samples/sec: 7.19 - lr: 0.010000\n",
-      "2021-09-08 14:30:08,995 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:08,995 EPOCH 9 done: loss 0.0027 - lr 0.0100000\n",
-      "2021-09-08 14:30:09,023 DEV : loss 0.08491013199090958 - score 0.0\n",
-      "2021-09-08 14:30:09,024 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:30:09,026 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:09,082 epoch 10 - iter 1/7 - loss 0.00110087 - samples/sec: 23.72 - lr: 0.010000\n",
-      "2021-09-08 14:30:09,124 epoch 10 - iter 2/7 - loss 0.00086661 - samples/sec: 23.79 - lr: 0.010000\n",
-      "2021-09-08 14:30:09,167 epoch 10 - iter 3/7 - loss 0.00135225 - samples/sec: 23.48 - lr: 0.010000\n",
-      "2021-09-08 14:30:09,223 epoch 10 - iter 4/7 - loss 0.00118427 - samples/sec: 17.96 - lr: 0.010000\n",
-      "2021-09-08 14:30:09,266 epoch 10 - iter 5/7 - loss 0.00135158 - samples/sec: 23.72 - lr: 0.010000\n",
-      "2021-09-08 14:30:09,405 epoch 10 - iter 6/7 - loss 0.00152067 - samples/sec: 7.23 - lr: 0.010000\n",
-      "2021-09-08 14:30:09,448 epoch 10 - iter 7/7 - loss 0.00174026 - samples/sec: 23.31 - lr: 0.010000\n",
-      "2021-09-08 14:30:09,449 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:09,449 EPOCH 10 done: loss 0.0017 - lr 0.0100000\n",
-      "2021-09-08 14:30:09,477 DEV : loss 0.09042832255363464 - score 0.0\n",
-      "2021-09-08 14:30:09,478 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:30:13,092 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:13,093 Testing using best model ...\n",
-      "2021-09-08 14:30:13,094 loading file None/best-model.pt\n",
+      "2021-09-21 20:36:29,714 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:29,788 epoch 9 - iter 1/7 - loss 0.00048497 - samples/sec: 18.21 - lr: 0.020000\n",
+      "2021-09-21 20:36:29,870 epoch 9 - iter 2/7 - loss 0.00096964 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 20:36:29,923 epoch 9 - iter 3/7 - loss 0.00110131 - samples/sec: 19.01 - lr: 0.020000\n",
+      "2021-09-21 20:36:30,052 epoch 9 - iter 4/7 - loss 0.00122853 - samples/sec: 7.82 - lr: 0.020000\n",
+      "2021-09-21 20:36:30,107 epoch 9 - iter 5/7 - loss 0.00104211 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 20:36:30,171 epoch 9 - iter 6/7 - loss 0.00271030 - samples/sec: 15.62 - lr: 0.020000\n",
+      "2021-09-21 20:36:30,224 epoch 9 - iter 7/7 - loss 0.00238299 - samples/sec: 18.98 - lr: 0.020000\n",
+      "2021-09-21 20:36:30,225 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:30,226 EPOCH 9 done: loss 0.0024 - lr 0.0200000\n",
+      "2021-09-21 20:36:31,345 DEV : loss 0.0009741701069287956 - score 0.0\n",
+      "2021-09-21 20:36:31,346 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:36:31,377 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:31,452 epoch 10 - iter 1/7 - loss 0.00054151 - samples/sec: 18.31 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,559 epoch 10 - iter 2/7 - loss 0.00081297 - samples/sec: 9.35 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,650 epoch 10 - iter 3/7 - loss 0.00093785 - samples/sec: 11.09 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,706 epoch 10 - iter 4/7 - loss 0.00080685 - samples/sec: 17.93 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,764 epoch 10 - iter 5/7 - loss 0.00073562 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,969 epoch 10 - iter 6/7 - loss 0.23742235 - samples/sec: 4.87 - lr: 0.020000\n",
+      "2021-09-21 20:36:32,030 epoch 10 - iter 7/7 - loss 0.20360340 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 20:36:32,031 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:32,031 EPOCH 10 done: loss 0.2036 - lr 0.0200000\n",
+      "2021-09-21 20:36:32,100 DEV : loss 0.0019156031776219606 - score 0.0\n",
+      "2021-09-21 20:36:32,101 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:36:39,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:39,002 Testing using best model ...\n",
+      "2021-09-21 20:36:39,004 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:30:17,835 \t0.0\n",
-      "2021-09-08 14:30:17,836 \n",
+      "2021-09-21 20:36:44,248 \t0.0\n",
+      "2021-09-21 20:36:44,249 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -7150,38 +7182,38 @@
       "\n",
       "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
       "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
+      "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
       "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
       "                 an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
       "                          the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         0\n",
-      "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
-      "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         1\n",
+      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                                                       micro avg     0.0000    0.0000    0.0000         1\n",
       "                                                                       macro avg     0.0000    0.0000    0.0000         1\n",
       "                                                                    weighted avg     0.0000    0.0000    0.0000         1\n",
       "                                                                     samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 14:30:17,836 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:31,932 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:36:44,249 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:00,379 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:30:35,793 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:37:04,586 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 13911.46it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 7315.11it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:30:35,795 [b'a social unit living together', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the study of government of states and other political units', b'the commercial activity of providing funds and capital']\n",
-      "2021-09-08 14:30:35,803 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:35,805 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:37:04,589 [b'a social unit living together', b'a particular branch of scientific knowledge', b'a machine for performing calculations automatically', b'knowledge acquired by learning and instruction', b'an active diversion requiring physical exertion and competition', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units', b'an extended social group having a distinctive cultural and economic organization']\n",
+      "2021-09-21 20:37:04,765 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:04,766 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7494,26 +7526,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:30:35,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:35,806 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 14:30:35,806 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:35,807 Parameters:\n",
-      "2021-09-08 14:30:35,807  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:30:35,807  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:30:35,808  - patience: \"3\"\n",
-      "2021-09-08 14:30:35,808  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:30:35,808  - max_epochs: \"10\"\n",
-      "2021-09-08 14:30:35,808  - shuffle: \"True\"\n",
-      "2021-09-08 14:30:35,809  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:30:35,809  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:30:35,809 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:35,809 Model training base path: \"None\"\n",
-      "2021-09-08 14:30:35,810 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:35,810 Device: cuda:1\n",
-      "2021-09-08 14:30:35,810 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:35,811 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:30:35,817 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:35,896 epoch 1 - iter 1/7 - loss 0.32732579 - samples/sec: 15.30 - lr: 0.020000\n"
+      "2021-09-21 20:37:04,767 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:04,767 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:37:04,768 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:04,768 Parameters:\n",
+      "2021-09-21 20:37:04,768  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:37:04,768  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:37:04,769  - patience: \"3\"\n",
+      "2021-09-21 20:37:04,769  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:37:04,769  - max_epochs: \"10\"\n",
+      "2021-09-21 20:37:04,770  - shuffle: \"True\"\n",
+      "2021-09-21 20:37:04,770  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:37:04,770  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:37:04,770 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:04,771 Model training base path: \"None\"\n",
+      "2021-09-21 20:37:04,771 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:04,771 Device: cuda:0\n",
+      "2021-09-21 20:37:04,772 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:04,772 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -7527,185 +7557,184 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:30:36,035 epoch 1 - iter 2/7 - loss 0.50681444 - samples/sec: 7.26 - lr: 0.020000\n",
-      "2021-09-08 14:30:36,084 epoch 1 - iter 3/7 - loss 0.34381110 - samples/sec: 20.48 - lr: 0.020000\n",
-      "2021-09-08 14:30:36,130 epoch 1 - iter 4/7 - loss 0.52946200 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 14:30:36,206 epoch 1 - iter 5/7 - loss 0.44166247 - samples/sec: 13.32 - lr: 0.020000\n",
-      "2021-09-08 14:30:36,252 epoch 1 - iter 6/7 - loss 0.69435027 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 14:30:36,302 epoch 1 - iter 7/7 - loss 0.59578543 - samples/sec: 20.06 - lr: 0.020000\n",
-      "2021-09-08 14:30:36,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:36,304 EPOCH 1 done: loss 0.5958 - lr 0.0200000\n",
-      "2021-09-08 14:30:36,338 DEV : loss 0.14985528588294983 - score 0.0\n",
-      "2021-09-08 14:30:36,339 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:37:04,945 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:05,011 epoch 1 - iter 1/7 - loss 1.31641674 - samples/sec: 20.77 - lr: 0.020000\n",
+      "2021-09-21 20:37:05,177 epoch 1 - iter 2/7 - loss 1.74822128 - samples/sec: 6.03 - lr: 0.020000\n",
+      "2021-09-21 20:37:05,236 epoch 1 - iter 3/7 - loss 1.18598401 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 20:37:05,297 epoch 1 - iter 4/7 - loss 1.08688475 - samples/sec: 16.64 - lr: 0.020000\n",
+      "2021-09-21 20:37:05,349 epoch 1 - iter 5/7 - loss 0.87019923 - samples/sec: 19.06 - lr: 0.020000\n",
+      "2021-09-21 20:37:05,427 epoch 1 - iter 6/7 - loss 0.77951851 - samples/sec: 12.94 - lr: 0.020000\n",
+      "2021-09-21 20:37:05,528 epoch 1 - iter 7/7 - loss 0.75748928 - samples/sec: 10.00 - lr: 0.020000\n",
+      "2021-09-21 20:37:05,529 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:05,529 EPOCH 1 done: loss 0.7575 - lr 0.0200000\n",
+      "2021-09-21 20:37:05,694 DEV : loss 0.33212798833847046 - score 0.0\n",
+      "2021-09-21 20:37:05,695 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:30:40,691 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:40,761 epoch 2 - iter 1/7 - loss 0.31138778 - samples/sec: 18.23 - lr: 0.020000\n",
-      "2021-09-08 14:30:40,846 epoch 2 - iter 2/7 - loss 0.20521949 - samples/sec: 11.91 - lr: 0.020000\n",
-      "2021-09-08 14:30:40,902 epoch 2 - iter 3/7 - loss 0.17592175 - samples/sec: 18.06 - lr: 0.020000\n",
-      "2021-09-08 14:30:40,948 epoch 2 - iter 4/7 - loss 0.16778870 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 14:30:40,994 epoch 2 - iter 5/7 - loss 0.26454120 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,044 epoch 2 - iter 6/7 - loss 0.22935062 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,112 epoch 2 - iter 7/7 - loss 0.24086397 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,113 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:41,113 EPOCH 2 done: loss 0.2409 - lr 0.0200000\n",
-      "2021-09-08 14:30:41,150 DEV : loss 0.32413095235824585 - score 0.0\n",
-      "2021-09-08 14:30:41,150 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:30:41,152 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:41,212 epoch 3 - iter 1/7 - loss 0.01209542 - samples/sec: 21.44 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,279 epoch 3 - iter 2/7 - loss 0.07640023 - samples/sec: 15.03 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,325 epoch 3 - iter 3/7 - loss 0.05129005 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,374 epoch 3 - iter 4/7 - loss 0.03873135 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,420 epoch 3 - iter 5/7 - loss 0.06711975 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,466 epoch 3 - iter 6/7 - loss 0.06011466 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,537 epoch 3 - iter 7/7 - loss 0.05342109 - samples/sec: 14.23 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,538 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:41,538 EPOCH 3 done: loss 0.0534 - lr 0.0200000\n",
-      "2021-09-08 14:30:41,574 DEV : loss 0.23662808537483215 - score 0.0\n",
-      "2021-09-08 14:30:41,575 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:30:41,578 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:41,637 epoch 4 - iter 1/7 - loss 0.00110235 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,683 epoch 4 - iter 2/7 - loss 0.04804091 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,747 epoch 4 - iter 3/7 - loss 0.03432041 - samples/sec: 15.80 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,818 epoch 4 - iter 4/7 - loss 0.02588143 - samples/sec: 14.09 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,867 epoch 4 - iter 5/7 - loss 0.02126717 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,914 epoch 4 - iter 6/7 - loss 0.01897664 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,957 epoch 4 - iter 7/7 - loss 0.01809304 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 14:30:41,958 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:41,958 EPOCH 4 done: loss 0.0181 - lr 0.0200000\n",
-      "2021-09-08 14:30:41,994 DEV : loss 0.2610321640968323 - score 0.0\n",
-      "2021-09-08 14:30:41,995 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:30:41,997 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:42,053 epoch 5 - iter 1/7 - loss 0.04535311 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 14:30:42,120 epoch 5 - iter 2/7 - loss 0.03353794 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 14:30:42,166 epoch 5 - iter 3/7 - loss 0.33099124 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 14:30:42,212 epoch 5 - iter 4/7 - loss 0.24944118 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 14:30:42,259 epoch 5 - iter 5/7 - loss 0.20521236 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 14:30:42,334 epoch 5 - iter 6/7 - loss 0.17580642 - samples/sec: 13.39 - lr: 0.020000\n",
-      "2021-09-08 14:30:42,382 epoch 5 - iter 7/7 - loss 0.15086027 - samples/sec: 20.78 - lr: 0.020000\n",
-      "2021-09-08 14:30:42,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:42,383 EPOCH 5 done: loss 0.1509 - lr 0.0200000\n",
-      "2021-09-08 14:30:42,418 DEV : loss 0.0001642862771404907 - score 0.0\n",
-      "2021-09-08 14:30:42,419 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:37:11,751 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:11,823 epoch 2 - iter 1/7 - loss 0.00733372 - samples/sec: 18.46 - lr: 0.020000\n",
+      "2021-09-21 20:37:11,883 epoch 2 - iter 2/7 - loss 0.04047469 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 20:37:11,942 epoch 2 - iter 3/7 - loss 0.05622774 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,026 epoch 2 - iter 4/7 - loss 0.15035632 - samples/sec: 11.99 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,189 epoch 2 - iter 5/7 - loss 0.26166018 - samples/sec: 6.13 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,249 epoch 2 - iter 6/7 - loss 0.23205715 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,351 epoch 2 - iter 7/7 - loss 0.21043390 - samples/sec: 9.79 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,352 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:12,353 EPOCH 2 done: loss 0.2104 - lr 0.0200000\n",
+      "2021-09-21 20:37:12,499 DEV : loss 0.5842270851135254 - score 0.0\n",
+      "2021-09-21 20:37:12,500 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:37:12,502 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:12,673 epoch 3 - iter 1/7 - loss 1.46917379 - samples/sec: 6.47 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,727 epoch 3 - iter 2/7 - loss 0.73576966 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,780 epoch 3 - iter 3/7 - loss 0.49222439 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,834 epoch 3 - iter 4/7 - loss 0.36961814 - samples/sec: 18.85 - lr: 0.020000\n",
+      "2021-09-21 20:37:12,930 epoch 3 - iter 5/7 - loss 0.29668216 - samples/sec: 10.42 - lr: 0.020000\n",
+      "2021-09-21 20:37:13,004 epoch 3 - iter 6/7 - loss 0.24896743 - samples/sec: 13.66 - lr: 0.020000\n",
+      "2021-09-21 20:37:13,063 epoch 3 - iter 7/7 - loss 0.21911751 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 20:37:13,064 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:13,064 EPOCH 3 done: loss 0.2191 - lr 0.0200000\n",
+      "2021-09-21 20:37:13,210 DEV : loss 0.32481688261032104 - score 0.0\n",
+      "2021-09-21 20:37:13,211 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:30:46,611 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:46,678 epoch 6 - iter 1/7 - loss 0.00212658 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 14:30:46,721 epoch 6 - iter 2/7 - loss 0.00160766 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 14:30:46,798 epoch 6 - iter 3/7 - loss 0.00117789 - samples/sec: 13.16 - lr: 0.020000\n",
-      "2021-09-08 14:30:46,841 epoch 6 - iter 4/7 - loss 0.00132738 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 14:30:46,887 epoch 6 - iter 5/7 - loss 0.00113951 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 14:30:46,936 epoch 6 - iter 6/7 - loss 0.00102822 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 14:30:46,999 epoch 6 - iter 7/7 - loss 0.00105756 - samples/sec: 15.89 - lr: 0.020000\n",
-      "2021-09-08 14:30:47,000 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:47,000 EPOCH 6 done: loss 0.0011 - lr 0.0200000\n",
-      "2021-09-08 14:30:47,035 DEV : loss 0.00015570463438052684 - score 0.0\n",
-      "2021-09-08 14:30:47,035 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:37:16,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:17,124 epoch 4 - iter 1/7 - loss 0.06020345 - samples/sec: 4.91 - lr: 0.020000\n",
+      "2021-09-21 20:37:17,210 epoch 4 - iter 2/7 - loss 0.03066199 - samples/sec: 11.70 - lr: 0.020000\n",
+      "2021-09-21 20:37:17,339 epoch 4 - iter 3/7 - loss 0.14933650 - samples/sec: 7.79 - lr: 0.020000\n",
+      "2021-09-21 20:37:17,406 epoch 4 - iter 4/7 - loss 0.11234032 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 20:37:17,477 epoch 4 - iter 5/7 - loss 0.23949380 - samples/sec: 14.16 - lr: 0.020000\n",
+      "2021-09-21 20:37:17,551 epoch 4 - iter 6/7 - loss 0.20029932 - samples/sec: 13.69 - lr: 0.020000\n",
+      "2021-09-21 20:37:17,621 epoch 4 - iter 7/7 - loss 0.17201893 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 20:37:17,622 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:17,622 EPOCH 4 done: loss 0.1720 - lr 0.0200000\n",
+      "2021-09-21 20:37:17,929 DEV : loss 0.08424772322177887 - score 0.0\n",
+      "2021-09-21 20:37:17,931 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:30:51,167 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:51,233 epoch 7 - iter 1/7 - loss 0.00151445 - samples/sec: 19.81 - lr: 0.020000\n",
-      "2021-09-08 14:30:51,313 epoch 7 - iter 2/7 - loss 0.00093438 - samples/sec: 12.51 - lr: 0.020000\n",
-      "2021-09-08 14:30:51,386 epoch 7 - iter 3/7 - loss 0.00077447 - samples/sec: 13.79 - lr: 0.020000\n",
-      "2021-09-08 14:30:51,429 epoch 7 - iter 4/7 - loss 0.00154676 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 14:30:51,478 epoch 7 - iter 5/7 - loss 0.00145054 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 14:30:51,521 epoch 7 - iter 6/7 - loss 0.00138763 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 14:30:51,568 epoch 7 - iter 7/7 - loss 0.00129003 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 14:30:51,569 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:51,570 EPOCH 7 done: loss 0.0013 - lr 0.0200000\n",
-      "2021-09-08 14:30:51,606 DEV : loss 0.0001465269597247243 - score 0.0\n",
-      "2021-09-08 14:30:51,606 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:30:55,785 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:55,852 epoch 8 - iter 1/7 - loss 0.00063132 - samples/sec: 19.27 - lr: 0.020000\n",
-      "2021-09-08 14:30:55,895 epoch 8 - iter 2/7 - loss 0.00106019 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 14:30:55,967 epoch 8 - iter 3/7 - loss 0.00089216 - samples/sec: 13.98 - lr: 0.020000\n",
-      "2021-09-08 14:30:56,010 epoch 8 - iter 4/7 - loss 0.00142576 - samples/sec: 23.44 - lr: 0.020000\n",
-      "2021-09-08 14:30:56,057 epoch 8 - iter 5/7 - loss 0.00121613 - samples/sec: 21.67 - lr: 0.020000\n"
+      "2021-09-21 20:37:23,676 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:23,845 epoch 5 - iter 1/7 - loss 0.01884704 - samples/sec: 7.52 - lr: 0.020000\n",
+      "2021-09-21 20:37:23,902 epoch 5 - iter 2/7 - loss 0.01787141 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,092 epoch 5 - iter 3/7 - loss 0.01240529 - samples/sec: 5.29 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,163 epoch 5 - iter 4/7 - loss 0.01060868 - samples/sec: 14.12 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,240 epoch 5 - iter 5/7 - loss 0.00915583 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,305 epoch 5 - iter 6/7 - loss 0.00777610 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,388 epoch 5 - iter 7/7 - loss 0.00797775 - samples/sec: 12.10 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,389 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:24,389 EPOCH 5 done: loss 0.0080 - lr 0.0200000\n",
+      "2021-09-21 20:37:24,666 DEV : loss 0.11333142220973969 - score 0.0\n",
+      "2021-09-21 20:37:24,668 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:37:24,693 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:24,803 epoch 6 - iter 1/7 - loss 0.00164188 - samples/sec: 12.87 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,868 epoch 6 - iter 2/7 - loss 0.00135687 - samples/sec: 15.53 - lr: 0.020000\n",
+      "2021-09-21 20:37:24,994 epoch 6 - iter 3/7 - loss 0.00173173 - samples/sec: 7.97 - lr: 0.020000\n",
+      "2021-09-21 20:37:25,186 epoch 6 - iter 4/7 - loss 0.00207605 - samples/sec: 5.22 - lr: 0.020000\n",
+      "2021-09-21 20:37:25,255 epoch 6 - iter 5/7 - loss 0.00186299 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 20:37:25,325 epoch 6 - iter 6/7 - loss 0.00167450 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 20:37:25,412 epoch 6 - iter 7/7 - loss 0.00198698 - samples/sec: 11.52 - lr: 0.020000\n",
+      "2021-09-21 20:37:25,413 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:25,414 EPOCH 6 done: loss 0.0020 - lr 0.0200000\n",
+      "2021-09-21 20:37:25,597 DEV : loss 0.09820063412189484 - score 0.0\n",
+      "2021-09-21 20:37:25,598 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:37:25,600 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:25,713 epoch 7 - iter 1/7 - loss 0.00091631 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 20:37:25,801 epoch 7 - iter 2/7 - loss 0.00093254 - samples/sec: 11.38 - lr: 0.020000\n",
+      "2021-09-21 20:37:25,926 epoch 7 - iter 3/7 - loss 0.00097240 - samples/sec: 8.04 - lr: 0.020000\n",
+      "2021-09-21 20:37:26,115 epoch 7 - iter 4/7 - loss 0.00100144 - samples/sec: 5.32 - lr: 0.020000\n",
+      "2021-09-21 20:37:26,179 epoch 7 - iter 5/7 - loss 0.08320433 - samples/sec: 15.61 - lr: 0.020000\n",
+      "2021-09-21 20:37:26,240 epoch 7 - iter 6/7 - loss 0.06946308 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 20:37:26,322 epoch 7 - iter 7/7 - loss 0.06025256 - samples/sec: 12.26 - lr: 0.020000\n",
+      "2021-09-21 20:37:26,323 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:26,323 EPOCH 7 done: loss 0.0603 - lr 0.0200000\n",
+      "2021-09-21 20:37:26,509 DEV : loss 0.0894843116402626 - score 0.0\n",
+      "2021-09-21 20:37:26,510 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:37:26,512 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:26,617 epoch 8 - iter 1/7 - loss 0.00278217 - samples/sec: 13.93 - lr: 0.020000\n",
+      "2021-09-21 20:37:26,746 epoch 8 - iter 2/7 - loss 0.00165987 - samples/sec: 7.74 - lr: 0.020000\n",
+      "2021-09-21 20:37:26,817 epoch 8 - iter 3/7 - loss 0.00135928 - samples/sec: 14.15 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:30:56,129 epoch 8 - iter 6/7 - loss 0.00105978 - samples/sec: 13.90 - lr: 0.020000\n",
-      "2021-09-08 14:30:56,178 epoch 8 - iter 7/7 - loss 0.00178768 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 14:30:56,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:30:56,179 EPOCH 8 done: loss 0.0018 - lr 0.0200000\n",
-      "2021-09-08 14:30:56,213 DEV : loss 0.0001338330184808001 - score 0.0\n",
-      "2021-09-08 14:30:56,214 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:31:00,985 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:01,082 epoch 9 - iter 1/7 - loss 0.00013041 - samples/sec: 12.37 - lr: 0.020000\n",
-      "2021-09-08 14:31:01,125 epoch 9 - iter 2/7 - loss 0.00043146 - samples/sec: 23.50 - lr: 0.020000\n",
-      "2021-09-08 14:31:01,176 epoch 9 - iter 3/7 - loss 0.00058509 - samples/sec: 19.71 - lr: 0.020000\n",
-      "2021-09-08 14:31:01,228 epoch 9 - iter 4/7 - loss 0.00076921 - samples/sec: 19.22 - lr: 0.020000\n",
-      "2021-09-08 14:31:01,275 epoch 9 - iter 5/7 - loss 0.00074162 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 14:31:01,318 epoch 9 - iter 6/7 - loss 0.00082434 - samples/sec: 23.71 - lr: 0.020000\n",
-      "2021-09-08 14:31:01,381 epoch 9 - iter 7/7 - loss 0.00094124 - samples/sec: 15.92 - lr: 0.020000\n",
-      "2021-09-08 14:31:01,382 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:01,382 EPOCH 9 done: loss 0.0009 - lr 0.0200000\n",
-      "2021-09-08 14:31:01,470 DEV : loss 0.00012913979298900813 - score 0.0\n",
-      "2021-09-08 14:31:01,470 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:31:05,702 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:05,761 epoch 10 - iter 1/7 - loss 0.00063021 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 14:31:05,833 epoch 10 - iter 2/7 - loss 0.00055391 - samples/sec: 13.93 - lr: 0.020000\n",
-      "2021-09-08 14:31:05,881 epoch 10 - iter 3/7 - loss 0.00066646 - samples/sec: 21.03 - lr: 0.020000\n",
-      "2021-09-08 14:31:05,955 epoch 10 - iter 4/7 - loss 0.00058060 - samples/sec: 13.62 - lr: 0.020000\n",
-      "2021-09-08 14:31:06,003 epoch 10 - iter 5/7 - loss 0.00055029 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 14:31:06,046 epoch 10 - iter 6/7 - loss 0.00070509 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 14:31:06,092 epoch 10 - iter 7/7 - loss 0.00065856 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 14:31:06,093 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:06,093 EPOCH 10 done: loss 0.0007 - lr 0.0200000\n",
-      "2021-09-08 14:31:06,128 DEV : loss 0.0001251914945896715 - score 0.0\n",
-      "2021-09-08 14:31:06,129 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:31:14,082 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:14,083 Testing using best model ...\n",
-      "2021-09-08 14:31:14,084 loading file None/best-model.pt\n",
+      "2021-09-21 20:37:26,880 epoch 8 - iter 4/7 - loss 0.00116441 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 20:37:27,080 epoch 8 - iter 5/7 - loss 0.00260368 - samples/sec: 5.01 - lr: 0.020000\n",
+      "2021-09-21 20:37:27,151 epoch 8 - iter 6/7 - loss 0.00248470 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 20:37:27,237 epoch 8 - iter 7/7 - loss 0.00225740 - samples/sec: 11.72 - lr: 0.020000\n",
+      "2021-09-21 20:37:27,238 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:27,238 EPOCH 8 done: loss 0.0023 - lr 0.0200000\n",
+      "2021-09-21 20:37:27,435 DEV : loss 0.09874428808689117 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:37:27,437 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:37:27,439 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:27,600 epoch 9 - iter 1/7 - loss 0.00120366 - samples/sec: 7.83 - lr: 0.010000\n",
+      "2021-09-21 20:37:27,679 epoch 9 - iter 2/7 - loss 0.00101103 - samples/sec: 12.71 - lr: 0.010000\n",
+      "2021-09-21 20:37:27,740 epoch 9 - iter 3/7 - loss 0.00085130 - samples/sec: 16.56 - lr: 0.010000\n",
+      "2021-09-21 20:37:27,809 epoch 9 - iter 4/7 - loss 0.00074400 - samples/sec: 14.63 - lr: 0.010000\n",
+      "2021-09-21 20:37:27,873 epoch 9 - iter 5/7 - loss 0.00120231 - samples/sec: 15.59 - lr: 0.010000\n",
+      "2021-09-21 20:37:27,951 epoch 9 - iter 6/7 - loss 0.00111510 - samples/sec: 12.93 - lr: 0.010000\n",
+      "2021-09-21 20:37:28,151 epoch 9 - iter 7/7 - loss 0.00115734 - samples/sec: 5.02 - lr: 0.010000\n",
+      "2021-09-21 20:37:28,152 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:28,152 EPOCH 9 done: loss 0.0012 - lr 0.0100000\n",
+      "2021-09-21 20:37:28,344 DEV : loss 0.09617053717374802 - score 0.0\n",
+      "2021-09-21 20:37:28,345 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:37:28,347 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:28,469 epoch 10 - iter 1/7 - loss 0.00065877 - samples/sec: 12.67 - lr: 0.010000\n",
+      "2021-09-21 20:37:28,539 epoch 10 - iter 2/7 - loss 0.00080973 - samples/sec: 14.37 - lr: 0.010000\n",
+      "2021-09-21 20:37:28,604 epoch 10 - iter 3/7 - loss 0.00080131 - samples/sec: 15.54 - lr: 0.010000\n",
+      "2021-09-21 20:37:28,794 epoch 10 - iter 4/7 - loss 0.00074649 - samples/sec: 5.28 - lr: 0.010000\n",
+      "2021-09-21 20:37:28,920 epoch 10 - iter 5/7 - loss 0.00193803 - samples/sec: 7.94 - lr: 0.010000\n",
+      "2021-09-21 20:37:29,006 epoch 10 - iter 6/7 - loss 0.00168734 - samples/sec: 11.79 - lr: 0.010000\n",
+      "2021-09-21 20:37:29,082 epoch 10 - iter 7/7 - loss 0.00161154 - samples/sec: 13.18 - lr: 0.010000\n",
+      "2021-09-21 20:37:29,083 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:29,083 EPOCH 10 done: loss 0.0016 - lr 0.0100000\n",
+      "2021-09-21 20:37:29,287 DEV : loss 0.08794418722391129 - score 0.0\n",
+      "2021-09-21 20:37:29,288 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:37:33,416 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:33,416 Testing using best model ...\n",
+      "2021-09-21 20:37:33,418 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:31:18,687 \t1.0\n",
-      "2021-09-08 14:31:18,688 \n",
+      "2021-09-21 20:37:39,052 \t0.0\n",
+      "2021-09-21 20:37:39,053 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.125\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                                                                  precision    recall  f1-score   support\n",
       "\n",
       "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
-      "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
       "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
       "                                  knowledge acquired by learning and instruction     0.0000    0.0000    0.0000         0\n",
       "                 an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
+      "                          the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         0\n",
       "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
-      "                          the commercial activity of providing funds and capital     1.0000    1.0000    1.0000         1\n",
+      "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                       micro avg     1.0000    1.0000    1.0000         1\n",
-      "                                                                       macro avg     0.1250    0.1250    0.1250         1\n",
-      "                                                                    weighted avg     1.0000    1.0000    1.0000         1\n",
-      "                                                                     samples avg     1.0000    1.0000    1.0000         1\n",
+      "                                                                       micro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                       macro avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                    weighted avg     0.0000    0.0000    0.0000         1\n",
+      "                                                                     samples avg     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "2021-09-08 14:31:18,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:32,804 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
+      "2021-09-21 20:37:39,053 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:55,464 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_huffpost/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:31:36,688 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:37:59,595 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 8/8 [00:00<00:00, 12122.27it/s]"
+      "100%|██████████| 8/8 [00:00<00:00, 9912.68it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:31:36,690 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'a machine for performing calculations automatically', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units']\n",
-      "2021-09-08 14:31:36,699 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:36,701 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:37:59,597 [b'a social unit living together', b'a particular branch of scientific knowledge', b'an activity that is diverting and that holds the attention', b'an active diversion requiring physical exertion and competition', b'an extended social group having a distinctive cultural and economic organization', b'the commercial activity of providing funds and capital', b'the study of government of states and other political units', b'a machine for performing calculations automatically']\n",
+      "2021-09-21 20:37:59,720 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:59,722 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8018,27 +8047,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:31:36,701 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:36,701 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
-      "2021-09-08 14:31:36,702 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:36,702 Parameters:\n",
-      "2021-09-08 14:31:36,702  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:31:36,703  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:31:36,703  - patience: \"3\"\n",
-      "2021-09-08 14:31:36,703  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:31:36,703  - max_epochs: \"10\"\n",
-      "2021-09-08 14:31:36,704  - shuffle: \"True\"\n",
-      "2021-09-08 14:31:36,704  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:31:36,704  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:31:36,704 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:36,705 Model training base path: \"None\"\n",
-      "2021-09-08 14:31:36,705 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:36,705 Device: cuda:1\n",
-      "2021-09-08 14:31:36,706 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:36,706 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:31:36,712 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:36,765 epoch 1 - iter 1/7 - loss 0.67859381 - samples/sec: 25.67 - lr: 0.020000\n",
-      "2021-09-08 14:31:36,846 epoch 1 - iter 2/7 - loss 0.56510790 - samples/sec: 12.53 - lr: 0.020000\n"
+      "2021-09-21 20:37:59,722 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:59,723 Corpus: \"Corpus: 7 train + 1 dev + 1 test sentences\"\n",
+      "2021-09-21 20:37:59,723 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:59,723 Parameters:\n",
+      "2021-09-21 20:37:59,724  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:37:59,724  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:37:59,724  - patience: \"3\"\n",
+      "2021-09-21 20:37:59,724  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:37:59,725  - max_epochs: \"10\"\n",
+      "2021-09-21 20:37:59,725  - shuffle: \"True\"\n",
+      "2021-09-21 20:37:59,725  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:37:59,726  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:37:59,726 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:59,726 Model training base path: \"None\"\n",
+      "2021-09-21 20:37:59,726 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:59,727 Device: cuda:0\n",
+      "2021-09-21 20:37:59,727 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:59,727 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -8052,139 +8078,141 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:31:36,940 epoch 1 - iter 3/7 - loss 0.86260876 - samples/sec: 10.70 - lr: 0.020000\n",
-      "2021-09-08 14:31:36,999 epoch 1 - iter 4/7 - loss 0.73716462 - samples/sec: 16.92 - lr: 0.020000\n",
-      "2021-09-08 14:31:37,046 epoch 1 - iter 5/7 - loss 0.65590335 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 14:31:37,142 epoch 1 - iter 6/7 - loss 0.58744385 - samples/sec: 10.42 - lr: 0.020000\n",
-      "2021-09-08 14:31:37,188 epoch 1 - iter 7/7 - loss 0.53138624 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 14:31:37,189 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:37,190 EPOCH 1 done: loss 0.5314 - lr 0.0200000\n",
-      "2021-09-08 14:31:37,242 DEV : loss 0.8810762763023376 - score 0.0\n",
-      "2021-09-08 14:31:37,242 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:37:59,902 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:00,084 epoch 1 - iter 1/7 - loss 0.97112280 - samples/sec: 6.08 - lr: 0.020000\n",
+      "2021-09-21 20:38:00,143 epoch 1 - iter 2/7 - loss 0.82106578 - samples/sec: 17.21 - lr: 0.020000\n",
+      "2021-09-21 20:38:00,201 epoch 1 - iter 3/7 - loss 0.59350051 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 20:38:00,260 epoch 1 - iter 4/7 - loss 1.09476141 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:38:00,318 epoch 1 - iter 5/7 - loss 0.97895647 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 20:38:00,377 epoch 1 - iter 6/7 - loss 0.84450759 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 20:38:00,436 epoch 1 - iter 7/7 - loss 0.75041954 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 20:38:00,437 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:00,437 EPOCH 1 done: loss 0.7504 - lr 0.0200000\n",
+      "2021-09-21 20:38:00,559 DEV : loss 0.5633096694946289 - score 0.0\n",
+      "2021-09-21 20:38:00,560 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:31:41,253 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:41,378 epoch 2 - iter 1/7 - loss 0.53995079 - samples/sec: 9.15 - lr: 0.020000\n",
-      "2021-09-08 14:31:41,426 epoch 2 - iter 2/7 - loss 0.37203156 - samples/sec: 21.32 - lr: 0.020000\n",
-      "2021-09-08 14:31:41,492 epoch 2 - iter 3/7 - loss 0.27848339 - samples/sec: 15.11 - lr: 0.020000\n",
-      "2021-09-08 14:31:41,538 epoch 2 - iter 4/7 - loss 0.36773420 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 14:31:41,585 epoch 2 - iter 5/7 - loss 0.34281492 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 14:31:41,677 epoch 2 - iter 6/7 - loss 0.32612187 - samples/sec: 11.01 - lr: 0.020000\n",
-      "2021-09-08 14:31:41,747 epoch 2 - iter 7/7 - loss 0.29291020 - samples/sec: 14.25 - lr: 0.020000\n",
-      "2021-09-08 14:31:41,748 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:41,748 EPOCH 2 done: loss 0.2929 - lr 0.0200000\n",
-      "2021-09-08 14:31:41,799 DEV : loss 0.3078979253768921 - score 0.0\n",
-      "2021-09-08 14:31:41,800 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:31:45,799 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:45,891 epoch 3 - iter 1/7 - loss 0.00266131 - samples/sec: 13.04 - lr: 0.020000\n",
-      "2021-09-08 14:31:45,937 epoch 3 - iter 2/7 - loss 0.27478943 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,001 epoch 3 - iter 3/7 - loss 0.19339962 - samples/sec: 15.81 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,098 epoch 3 - iter 4/7 - loss 0.15256223 - samples/sec: 10.30 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,197 epoch 3 - iter 5/7 - loss 0.43459280 - samples/sec: 10.18 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,244 epoch 3 - iter 6/7 - loss 0.37486994 - samples/sec: 21.10 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,288 epoch 3 - iter 7/7 - loss 0.32180685 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,289 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:46,289 EPOCH 3 done: loss 0.3218 - lr 0.0200000\n",
-      "2021-09-08 14:31:46,340 DEV : loss 0.7097411751747131 - score 0.0\n",
-      "2021-09-08 14:31:46,341 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:31:46,343 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:46,400 epoch 4 - iter 1/7 - loss 0.00263841 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,487 epoch 4 - iter 2/7 - loss 0.00292972 - samples/sec: 11.61 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,555 epoch 4 - iter 3/7 - loss 0.00217046 - samples/sec: 14.82 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,612 epoch 4 - iter 4/7 - loss 0.00234665 - samples/sec: 17.58 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,655 epoch 4 - iter 5/7 - loss 0.00243264 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,751 epoch 4 - iter 6/7 - loss 0.10257019 - samples/sec: 10.41 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,798 epoch 4 - iter 7/7 - loss 0.09900311 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,798 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:46,799 EPOCH 4 done: loss 0.0990 - lr 0.0200000\n",
-      "2021-09-08 14:31:46,850 DEV : loss 0.7887205481529236 - score 0.0\n",
-      "2021-09-08 14:31:46,851 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:31:46,853 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:46,910 epoch 5 - iter 1/7 - loss 0.00216641 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 14:31:46,996 epoch 5 - iter 2/7 - loss 0.00470963 - samples/sec: 11.68 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,053 epoch 5 - iter 3/7 - loss 0.00367017 - samples/sec: 17.54 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,096 epoch 5 - iter 4/7 - loss 0.00835501 - samples/sec: 23.35 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,142 epoch 5 - iter 5/7 - loss 0.08729615 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,209 epoch 5 - iter 6/7 - loss 0.07283634 - samples/sec: 14.91 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,306 epoch 5 - iter 7/7 - loss 0.06866009 - samples/sec: 10.35 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,307 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:47,308 EPOCH 5 done: loss 0.0687 - lr 0.0200000\n",
-      "2021-09-08 14:31:47,359 DEV : loss 0.7032329440116882 - score 0.0\n",
-      "2021-09-08 14:31:47,360 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:31:47,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:47,442 epoch 6 - iter 1/7 - loss 0.00105956 - samples/sec: 14.81 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,485 epoch 6 - iter 2/7 - loss 0.00311851 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,578 epoch 6 - iter 3/7 - loss 0.00537816 - samples/sec: 10.72 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,666 epoch 6 - iter 4/7 - loss 0.00424586 - samples/sec: 11.45 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,723 epoch 6 - iter 5/7 - loss 0.00356823 - samples/sec: 17.87 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,766 epoch 6 - iter 6/7 - loss 0.00311816 - samples/sec: 23.13 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,809 epoch 6 - iter 7/7 - loss 0.00295355 - samples/sec: 23.31 - lr: 0.020000\n",
-      "2021-09-08 14:31:47,810 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:47,811 EPOCH 6 done: loss 0.0030 - lr 0.0200000\n",
-      "2021-09-08 14:31:47,955 DEV : loss 0.8540226221084595 - score 0.0\n",
-      "Epoch     6: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 14:31:47,956 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 14:31:47,958 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:48,039 epoch 7 - iter 1/7 - loss 0.00069050 - samples/sec: 14.92 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,082 epoch 7 - iter 2/7 - loss 0.00103840 - samples/sec: 23.61 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,169 epoch 7 - iter 3/7 - loss 0.00145623 - samples/sec: 11.51 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,263 epoch 7 - iter 4/7 - loss 0.00238920 - samples/sec: 10.69 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,307 epoch 7 - iter 5/7 - loss 0.00206647 - samples/sec: 23.06 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,363 epoch 7 - iter 6/7 - loss 0.00183337 - samples/sec: 17.94 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,406 epoch 7 - iter 7/7 - loss 0.00169166 - samples/sec: 23.29 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,407 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:48,407 EPOCH 7 done: loss 0.0017 - lr 0.0100000\n",
-      "2021-09-08 14:31:48,458 DEV : loss 0.8731380105018616 - score 0.0\n",
-      "2021-09-08 14:31:48,459 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:31:48,461 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:48,531 epoch 8 - iter 1/7 - loss 0.00333112 - samples/sec: 17.77 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,575 epoch 8 - iter 2/7 - loss 0.00193561 - samples/sec: 23.13 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,661 epoch 8 - iter 3/7 - loss 0.00207488 - samples/sec: 11.67 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,704 epoch 8 - iter 4/7 - loss 0.00425159 - samples/sec: 23.52 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,773 epoch 8 - iter 5/7 - loss 0.00345536 - samples/sec: 14.59 - lr: 0.010000\n"
+      "2021-09-21 20:38:04,738 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:04,917 epoch 2 - iter 1/7 - loss 1.28574419 - samples/sec: 6.21 - lr: 0.020000\n",
+      "2021-09-21 20:38:04,977 epoch 2 - iter 2/7 - loss 0.77485292 - samples/sec: 16.97 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,031 epoch 2 - iter 3/7 - loss 0.52823801 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,085 epoch 2 - iter 4/7 - loss 0.40026005 - samples/sec: 18.89 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,144 epoch 2 - iter 5/7 - loss 0.51409333 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,202 epoch 2 - iter 6/7 - loss 0.58682711 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,256 epoch 2 - iter 7/7 - loss 0.50512901 - samples/sec: 18.71 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,257 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:05,257 EPOCH 2 done: loss 0.5051 - lr 0.0200000\n",
+      "2021-09-21 20:38:05,384 DEV : loss 0.6195568442344666 - score 0.0\n",
+      "2021-09-21 20:38:05,384 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:38:05,468 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:05,539 epoch 3 - iter 1/7 - loss 0.02008279 - samples/sec: 18.42 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,699 epoch 3 - iter 2/7 - loss 0.18607222 - samples/sec: 6.26 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,752 epoch 3 - iter 3/7 - loss 0.12452620 - samples/sec: 18.94 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,806 epoch 3 - iter 4/7 - loss 0.09881069 - samples/sec: 18.79 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,860 epoch 3 - iter 5/7 - loss 0.08134020 - samples/sec: 18.51 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,918 epoch 3 - iter 6/7 - loss 0.12780458 - samples/sec: 17.43 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,977 epoch 3 - iter 7/7 - loss 0.12553899 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 20:38:05,978 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:05,978 EPOCH 3 done: loss 0.1255 - lr 0.0200000\n",
+      "2021-09-21 20:38:06,106 DEV : loss 0.6711801886558533 - score 0.0\n",
+      "2021-09-21 20:38:06,107 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:38:06,185 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:06,254 epoch 4 - iter 1/7 - loss 0.00348816 - samples/sec: 18.90 - lr: 0.020000\n",
+      "2021-09-21 20:38:06,316 epoch 4 - iter 2/7 - loss 0.01484173 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 20:38:06,370 epoch 4 - iter 3/7 - loss 0.01045279 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 20:38:06,424 epoch 4 - iter 4/7 - loss 0.00960765 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 20:38:06,582 epoch 4 - iter 5/7 - loss 0.01970779 - samples/sec: 6.35 - lr: 0.020000\n",
+      "2021-09-21 20:38:06,637 epoch 4 - iter 6/7 - loss 0.01683236 - samples/sec: 18.30 - lr: 0.020000\n",
+      "2021-09-21 20:38:06,695 epoch 4 - iter 7/7 - loss 0.04359621 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 20:38:06,696 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:06,696 EPOCH 4 done: loss 0.0436 - lr 0.0200000\n",
+      "2021-09-21 20:38:06,795 DEV : loss 0.8111870884895325 - score 0.0\n",
+      "2021-09-21 20:38:06,796 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:38:07,303 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:07,374 epoch 5 - iter 1/7 - loss 0.00108279 - samples/sec: 18.41 - lr: 0.020000\n",
+      "2021-09-21 20:38:07,427 epoch 5 - iter 2/7 - loss 0.00128594 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 20:38:07,580 epoch 5 - iter 3/7 - loss 0.00190785 - samples/sec: 6.54 - lr: 0.020000\n",
+      "2021-09-21 20:38:07,634 epoch 5 - iter 4/7 - loss 0.00199917 - samples/sec: 18.98 - lr: 0.020000\n",
+      "2021-09-21 20:38:07,688 epoch 5 - iter 5/7 - loss 0.00271633 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 20:38:07,741 epoch 5 - iter 6/7 - loss 0.00344937 - samples/sec: 19.02 - lr: 0.020000\n",
+      "2021-09-21 20:38:07,795 epoch 5 - iter 7/7 - loss 0.00323789 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 20:38:07,796 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:07,796 EPOCH 5 done: loss 0.0032 - lr 0.0200000\n",
+      "2021-09-21 20:38:07,894 DEV : loss 0.7884151935577393 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:38:07,895 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:38:07,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:08,113 epoch 6 - iter 1/7 - loss 0.00200896 - samples/sec: 6.31 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,168 epoch 6 - iter 2/7 - loss 0.00158394 - samples/sec: 18.48 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,221 epoch 6 - iter 3/7 - loss 0.00132553 - samples/sec: 19.01 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,276 epoch 6 - iter 4/7 - loss 0.00122807 - samples/sec: 18.50 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,329 epoch 6 - iter 5/7 - loss 0.00152302 - samples/sec: 18.79 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,385 epoch 6 - iter 6/7 - loss 0.00377380 - samples/sec: 18.31 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,440 epoch 6 - iter 7/7 - loss 0.00378268 - samples/sec: 18.33 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,441 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:08,441 EPOCH 6 done: loss 0.0038 - lr 0.0100000\n",
+      "2021-09-21 20:38:08,475 DEV : loss 0.7941868305206299 - score 0.0\n",
+      "2021-09-21 20:38:08,476 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:38:08,478 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:08,547 epoch 7 - iter 1/7 - loss 0.00723137 - samples/sec: 18.95 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,600 epoch 7 - iter 2/7 - loss 0.00394586 - samples/sec: 18.97 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,654 epoch 7 - iter 3/7 - loss 0.00341604 - samples/sec: 18.78 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,808 epoch 7 - iter 4/7 - loss 0.00340543 - samples/sec: 6.50 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,862 epoch 7 - iter 5/7 - loss 0.00292231 - samples/sec: 18.97 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,916 epoch 7 - iter 6/7 - loss 0.00257377 - samples/sec: 18.51 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,970 epoch 7 - iter 7/7 - loss 0.00231129 - samples/sec: 18.61 - lr: 0.010000\n",
+      "2021-09-21 20:38:08,971 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:08,971 EPOCH 7 done: loss 0.0023 - lr 0.0100000\n",
+      "2021-09-21 20:38:09,003 DEV : loss 0.7908565402030945 - score 0.0\n",
+      "2021-09-21 20:38:09,004 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:38:09,006 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:09,181 epoch 8 - iter 1/7 - loss 0.02739216 - samples/sec: 6.31 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,234 epoch 8 - iter 2/7 - loss 0.01540209 - samples/sec: 19.07 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:31:48,816 epoch 8 - iter 6/7 - loss 0.00316695 - samples/sec: 23.39 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,909 epoch 8 - iter 7/7 - loss 0.00415029 - samples/sec: 10.76 - lr: 0.010000\n",
-      "2021-09-08 14:31:48,910 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:48,910 EPOCH 8 done: loss 0.0042 - lr 0.0100000\n",
-      "2021-09-08 14:31:48,963 DEV : loss 0.7723047733306885 - score 0.0\n",
-      "2021-09-08 14:31:48,964 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:31:48,966 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:49,046 epoch 9 - iter 1/7 - loss 0.00115174 - samples/sec: 15.04 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,088 epoch 9 - iter 2/7 - loss 0.00149775 - samples/sec: 23.79 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,132 epoch 9 - iter 3/7 - loss 0.00110294 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,226 epoch 9 - iter 4/7 - loss 0.00194389 - samples/sec: 10.72 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,313 epoch 9 - iter 5/7 - loss 0.00193125 - samples/sec: 11.54 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,370 epoch 9 - iter 6/7 - loss 0.00167171 - samples/sec: 17.73 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,413 epoch 9 - iter 7/7 - loss 0.00152305 - samples/sec: 23.22 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,414 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:49,415 EPOCH 9 done: loss 0.0015 - lr 0.0100000\n",
-      "2021-09-08 14:31:49,466 DEV : loss 0.7953552007675171 - score 0.0\n",
-      "2021-09-08 14:31:49,466 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:31:49,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:49,568 epoch 10 - iter 1/7 - loss 0.00447600 - samples/sec: 11.60 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,612 epoch 10 - iter 2/7 - loss 0.00275708 - samples/sec: 23.19 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,705 epoch 10 - iter 3/7 - loss 0.00229345 - samples/sec: 10.71 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,762 epoch 10 - iter 4/7 - loss 0.00185246 - samples/sec: 17.67 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,805 epoch 10 - iter 5/7 - loss 0.00162117 - samples/sec: 23.75 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,876 epoch 10 - iter 6/7 - loss 0.00323059 - samples/sec: 14.18 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,919 epoch 10 - iter 7/7 - loss 0.00280065 - samples/sec: 23.15 - lr: 0.010000\n",
-      "2021-09-08 14:31:49,920 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:49,920 EPOCH 10 done: loss 0.0028 - lr 0.0100000\n",
-      "2021-09-08 14:31:49,977 DEV : loss 0.7573540210723877 - score 0.0\n",
-      "Epoch    10: reducing learning rate of group 0 to 5.0000e-03.\n",
-      "2021-09-08 14:31:49,977 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 14:31:53,767 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:31:53,767 Testing using best model ...\n",
-      "2021-09-08 14:31:53,769 loading file None/best-model.pt\n",
+      "2021-09-21 20:38:09,288 epoch 8 - iter 3/7 - loss 0.01060931 - samples/sec: 18.52 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,341 epoch 8 - iter 4/7 - loss 0.00840542 - samples/sec: 19.00 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,394 epoch 8 - iter 5/7 - loss 0.00698051 - samples/sec: 18.89 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,449 epoch 8 - iter 6/7 - loss 0.00598535 - samples/sec: 18.59 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,502 epoch 8 - iter 7/7 - loss 0.00527205 - samples/sec: 18.82 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,503 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:09,504 EPOCH 8 done: loss 0.0053 - lr 0.0100000\n",
+      "2021-09-21 20:38:09,538 DEV : loss 0.7623345255851746 - score 0.0\n",
+      "2021-09-21 20:38:09,539 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:38:09,542 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:09,612 epoch 9 - iter 1/7 - loss 0.00494084 - samples/sec: 18.78 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,665 epoch 9 - iter 2/7 - loss 0.00272197 - samples/sec: 19.02 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,818 epoch 9 - iter 3/7 - loss 0.00220307 - samples/sec: 6.54 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,873 epoch 9 - iter 4/7 - loss 0.00188008 - samples/sec: 18.29 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,926 epoch 9 - iter 5/7 - loss 0.00211049 - samples/sec: 19.07 - lr: 0.010000\n",
+      "2021-09-21 20:38:09,979 epoch 9 - iter 6/7 - loss 0.00195763 - samples/sec: 18.98 - lr: 0.010000\n",
+      "2021-09-21 20:38:10,034 epoch 9 - iter 7/7 - loss 0.00178942 - samples/sec: 18.59 - lr: 0.010000\n",
+      "2021-09-21 20:38:10,035 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:10,035 EPOCH 9 done: loss 0.0018 - lr 0.0100000\n",
+      "2021-09-21 20:38:10,066 DEV : loss 0.7754566073417664 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:38:10,067 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:38:10,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:10,138 epoch 10 - iter 1/7 - loss 0.00059395 - samples/sec: 19.11 - lr: 0.005000\n",
+      "2021-09-21 20:38:10,192 epoch 10 - iter 2/7 - loss 0.00068246 - samples/sec: 18.57 - lr: 0.005000\n",
+      "2021-09-21 20:38:10,246 epoch 10 - iter 3/7 - loss 0.00091109 - samples/sec: 18.67 - lr: 0.005000\n",
+      "2021-09-21 20:38:10,302 epoch 10 - iter 4/7 - loss 0.00084172 - samples/sec: 18.14 - lr: 0.005000\n",
+      "2021-09-21 20:38:10,354 epoch 10 - iter 5/7 - loss 0.00158031 - samples/sec: 19.13 - lr: 0.005000\n",
+      "2021-09-21 20:38:10,407 epoch 10 - iter 6/7 - loss 0.00161815 - samples/sec: 18.96 - lr: 0.005000\n",
+      "2021-09-21 20:38:10,693 epoch 10 - iter 7/7 - loss 0.00169294 - samples/sec: 3.50 - lr: 0.005000\n",
+      "2021-09-21 20:38:10,695 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:10,695 EPOCH 10 done: loss 0.0017 - lr 0.0050000\n",
+      "2021-09-21 20:38:10,730 DEV : loss 0.7802026271820068 - score 0.0\n",
+      "2021-09-21 20:38:10,731 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:38:14,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:14,734 Testing using best model ...\n",
+      "2021-09-21 20:38:14,736 loading file None/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:31:58,371 \t1.0\n",
-      "2021-09-08 14:31:58,372 \n",
+      "2021-09-21 20:38:19,566 \t1.0\n",
+      "2021-09-21 20:38:19,566 \n",
       "Results:\n",
       "- F-score (micro) 1.0\n",
       "- F-score (macro) 0.125\n",
@@ -8196,19 +8224,19 @@
       "                                                   a social unit living together     0.0000    0.0000    0.0000         0\n",
       "                                     a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
       "                      an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
-      "                             a machine for performing calculations automatically     0.0000    0.0000    0.0000         0\n",
       "                 an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
       "an extended social group having a distinctive cultural and economic organization     0.0000    0.0000    0.0000         0\n",
       "                          the commercial activity of providing funds and capital     0.0000    0.0000    0.0000         0\n",
-      "                     the study of government of states and other political units     1.0000    1.0000    1.0000         1\n",
+      "                     the study of government of states and other political units     0.0000    0.0000    0.0000         0\n",
+      "                             a machine for performing calculations automatically     1.0000    1.0000    1.0000         1\n",
       "\n",
       "                                                                       micro avg     1.0000    1.0000    1.0000         1\n",
       "                                                                       macro avg     0.1250    0.1250    0.1250         1\n",
       "                                                                    weighted avg     1.0000    1.0000    1.0000         1\n",
       "                                                                     samples avg     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "2021-09-08 14:31:58,372 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.45571030640668525\n"
+      "2021-09-21 20:38:19,566 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.46128133704735375\n"
      ]
     }
    ],
@@ -8284,11 +8312,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "f35dd726",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.4735376044568245, 0.4958217270194986, 0.4623955431754875, 0.44846796657381616, 0.42618384401114207]\n",
+      "0.02341158929186707\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
diff --git a/oneshot/topic_yin_oneshot.ipynb b/oneshot/topic_yin_oneshot.ipynb
index e9e476e..a53a43c 100644
--- a/oneshot/topic_yin_oneshot.ipynb
+++ b/oneshot/topic_yin_oneshot.ipynb
@@ -70,25 +70,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:47:40,331 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:20:13,854 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:47:47,171 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:20:22,712 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 52516.33it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 52298.05it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:47:47,174 [b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'divorce', b'religion', b'sports', b'politics', b'comedy', b'science', b'crime']\n",
-      "2021-09-08 11:47:47,178 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,180 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:20:22,714 [b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'divorce', b'crime', b'religion', b'sports', b'politics', b'travel', b'science']\n",
+      "2021-09-21 20:20:22,716 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:22,718 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -401,28 +401,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:47:47,181 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,181 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:47:47,181 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,182 Parameters:\n",
-      "2021-09-08 11:47:47,182  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:47:47,182  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:47:47,183  - patience: \"3\"\n",
-      "2021-09-08 11:47:47,183  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:47:47,183  - max_epochs: \"10\"\n",
-      "2021-09-08 11:47:47,183  - shuffle: \"True\"\n",
-      "2021-09-08 11:47:47,184  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:47:47,184  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:47:47,184 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,185 Model training base path: \"None1\"\n",
-      "2021-09-08 11:47:47,185 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,185 Device: cuda:0\n",
-      "2021-09-08 11:47:47,185 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,186 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:47:47,192 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,261 epoch 1 - iter 1/13 - loss 0.27806291 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,309 epoch 1 - iter 2/13 - loss 1.00271891 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,355 epoch 1 - iter 3/13 - loss 0.86700822 - samples/sec: 21.75 - lr: 0.020000\n"
+      "2021-09-21 20:20:22,719 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:22,719 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:20:22,719 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:22,719 Parameters:\n",
+      "2021-09-21 20:20:22,720  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:20:22,720  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:20:22,720  - patience: \"3\"\n",
+      "2021-09-21 20:20:22,721  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:20:22,721  - max_epochs: \"10\"\n",
+      "2021-09-21 20:20:22,721  - shuffle: \"True\"\n",
+      "2021-09-21 20:20:22,721  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:20:22,722  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:20:22,722 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:22,722 Model training base path: \"None1\"\n",
+      "2021-09-21 20:20:22,722 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:22,723 Device: cuda:0\n",
+      "2021-09-21 20:20:22,723 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:22,723 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:20:22,730 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:22,829 epoch 1 - iter 1/13 - loss 0.03193409 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 20:20:22,882 epoch 1 - iter 2/13 - loss 0.23710594 - samples/sec: 19.39 - lr: 0.020000\n"
      ]
     },
     {
@@ -436,210 +435,209 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:47:47,401 epoch 1 - iter 4/13 - loss 0.70382545 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,447 epoch 1 - iter 5/13 - loss 0.70204398 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,493 epoch 1 - iter 6/13 - loss 0.65362529 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,539 epoch 1 - iter 7/13 - loss 0.61349366 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,584 epoch 1 - iter 8/13 - loss 0.55504088 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,631 epoch 1 - iter 9/13 - loss 0.58638572 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,673 epoch 1 - iter 10/13 - loss 0.53445917 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,715 epoch 1 - iter 11/13 - loss 0.48744027 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,760 epoch 1 - iter 12/13 - loss 0.45189638 - samples/sec: 22.62 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,810 epoch 1 - iter 13/13 - loss 0.53100551 - samples/sec: 20.31 - lr: 0.020000\n",
-      "2021-09-08 11:47:47,811 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:47,812 EPOCH 1 done: loss 0.5310 - lr 0.0200000\n",
-      "2021-09-08 11:47:47,865 DEV : loss 0.7966743111610413 - score 0.0\n",
-      "2021-09-08 11:47:47,866 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:20:22,946 epoch 1 - iter 3/13 - loss 0.34121009 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,006 epoch 1 - iter 4/13 - loss 0.52194575 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,061 epoch 1 - iter 5/13 - loss 0.43398163 - samples/sec: 18.32 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,107 epoch 1 - iter 6/13 - loss 0.36793503 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,169 epoch 1 - iter 7/13 - loss 0.50584945 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,222 epoch 1 - iter 8/13 - loss 0.44517485 - samples/sec: 18.78 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,275 epoch 1 - iter 9/13 - loss 0.42172878 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,337 epoch 1 - iter 10/13 - loss 0.38878168 - samples/sec: 16.08 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,405 epoch 1 - iter 11/13 - loss 0.35472157 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,457 epoch 1 - iter 12/13 - loss 0.32672727 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,512 epoch 1 - iter 13/13 - loss 0.30187599 - samples/sec: 18.35 - lr: 0.020000\n",
+      "2021-09-21 20:20:23,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:23,514 EPOCH 1 done: loss 0.3019 - lr 0.0200000\n",
+      "2021-09-21 20:20:23,636 DEV : loss 0.19361527264118195 - score 0.0\n",
+      "2021-09-21 20:20:23,637 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:47:51,698 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:51,761 epoch 2 - iter 1/13 - loss 0.58979815 - samples/sec: 21.43 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,807 epoch 2 - iter 2/13 - loss 0.44404745 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,850 epoch 2 - iter 3/13 - loss 0.30678559 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,895 epoch 2 - iter 4/13 - loss 0.26455348 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,939 epoch 2 - iter 5/13 - loss 0.21799939 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 11:47:51,981 epoch 2 - iter 6/13 - loss 0.18285107 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,027 epoch 2 - iter 7/13 - loss 0.16751211 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,070 epoch 2 - iter 8/13 - loss 0.15074441 - samples/sec: 23.51 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,114 epoch 2 - iter 9/13 - loss 0.13511299 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,156 epoch 2 - iter 10/13 - loss 0.12180961 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,202 epoch 2 - iter 11/13 - loss 0.22156343 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,248 epoch 2 - iter 12/13 - loss 0.29339714 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,291 epoch 2 - iter 13/13 - loss 0.27106559 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:47:52,293 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:52,293 EPOCH 2 done: loss 0.2711 - lr 0.0200000\n",
-      "2021-09-08 11:47:52,325 DEV : loss 0.16986505687236786 - score 0.0\n",
-      "2021-09-08 11:47:52,326 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:20:36,075 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:36,162 epoch 2 - iter 1/13 - loss 0.01178776 - samples/sec: 19.24 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,207 epoch 2 - iter 2/13 - loss 0.01042833 - samples/sec: 22.07 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,262 epoch 2 - iter 3/13 - loss 0.01446766 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,331 epoch 2 - iter 4/13 - loss 0.05147605 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,382 epoch 2 - iter 5/13 - loss 0.04237603 - samples/sec: 20.04 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,444 epoch 2 - iter 6/13 - loss 0.21377497 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,500 epoch 2 - iter 7/13 - loss 0.18394503 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,566 epoch 2 - iter 8/13 - loss 0.16639044 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,628 epoch 2 - iter 9/13 - loss 0.15932797 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,696 epoch 2 - iter 10/13 - loss 0.21088315 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,748 epoch 2 - iter 11/13 - loss 0.19194591 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,813 epoch 2 - iter 12/13 - loss 0.21258393 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,873 epoch 2 - iter 13/13 - loss 0.20122221 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 20:20:36,874 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:36,875 EPOCH 2 done: loss 0.2012 - lr 0.0200000\n",
+      "2021-09-21 20:20:36,927 DEV : loss 0.7762499451637268 - score 0.0\n",
+      "2021-09-21 20:20:36,930 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:20:36,933 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:37,061 epoch 3 - iter 1/13 - loss 0.00980292 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,131 epoch 3 - iter 2/13 - loss 0.90897070 - samples/sec: 14.50 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,192 epoch 3 - iter 3/13 - loss 0.63978664 - samples/sec: 16.28 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,278 epoch 3 - iter 4/13 - loss 0.48444018 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,364 epoch 3 - iter 5/13 - loss 0.39001216 - samples/sec: 11.59 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,440 epoch 3 - iter 6/13 - loss 0.32601552 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,508 epoch 3 - iter 7/13 - loss 0.28765387 - samples/sec: 14.77 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,571 epoch 3 - iter 8/13 - loss 0.25216187 - samples/sec: 15.90 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,636 epoch 3 - iter 9/13 - loss 0.22472604 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,703 epoch 3 - iter 10/13 - loss 0.20782167 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,765 epoch 3 - iter 11/13 - loss 0.19018165 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,823 epoch 3 - iter 12/13 - loss 0.17443396 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,882 epoch 3 - iter 13/13 - loss 0.16153388 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 20:20:37,883 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:37,884 EPOCH 3 done: loss 0.1615 - lr 0.0200000\n",
+      "2021-09-21 20:20:37,921 DEV : loss 0.4632222354412079 - score 0.0\n",
+      "2021-09-21 20:20:37,922 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:20:37,923 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:38,023 epoch 4 - iter 1/13 - loss 0.04182610 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,079 epoch 4 - iter 2/13 - loss 0.02365312 - samples/sec: 18.06 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,145 epoch 4 - iter 3/13 - loss 0.01595384 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,205 epoch 4 - iter 4/13 - loss 0.01236815 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,259 epoch 4 - iter 5/13 - loss 0.01016479 - samples/sec: 18.80 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,327 epoch 4 - iter 6/13 - loss 0.00976365 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,381 epoch 4 - iter 7/13 - loss 0.00862570 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,437 epoch 4 - iter 8/13 - loss 0.00770741 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,491 epoch 4 - iter 9/13 - loss 0.00830377 - samples/sec: 18.78 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,553 epoch 4 - iter 10/13 - loss 0.00768531 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,620 epoch 4 - iter 11/13 - loss 0.20507241 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,682 epoch 4 - iter 12/13 - loss 0.18847218 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,731 epoch 4 - iter 13/13 - loss 0.22640268 - samples/sec: 20.76 - lr: 0.020000\n",
+      "2021-09-21 20:20:38,732 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:38,732 EPOCH 4 done: loss 0.2264 - lr 0.0200000\n",
+      "2021-09-21 20:20:38,918 DEV : loss 0.07953700423240662 - score 0.0\n",
+      "2021-09-21 20:20:38,919 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:47:59,868 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:47:59,928 epoch 3 - iter 1/13 - loss 0.02117005 - samples/sec: 22.98 - lr: 0.020000\n",
-      "2021-09-08 11:47:59,974 epoch 3 - iter 2/13 - loss 0.02627247 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,020 epoch 3 - iter 3/13 - loss 0.03962818 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,062 epoch 3 - iter 4/13 - loss 0.03104179 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,108 epoch 3 - iter 5/13 - loss 0.04028500 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,154 epoch 3 - iter 6/13 - loss 0.20148276 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,197 epoch 3 - iter 7/13 - loss 0.17350805 - samples/sec: 23.82 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,239 epoch 3 - iter 8/13 - loss 0.15197707 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,285 epoch 3 - iter 9/13 - loss 0.46693821 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,328 epoch 3 - iter 10/13 - loss 0.42060348 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,373 epoch 3 - iter 11/13 - loss 0.48279849 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,419 epoch 3 - iter 12/13 - loss 0.46521719 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,465 epoch 3 - iter 13/13 - loss 0.43809495 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:48:00,466 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:00,467 EPOCH 3 done: loss 0.4381 - lr 0.0200000\n",
-      "2021-09-08 11:48:02,160 DEV : loss 0.17248359322547913 - score 0.0\n",
-      "2021-09-08 11:48:02,161 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:48:02,164 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:02,227 epoch 4 - iter 1/13 - loss 0.45416120 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,277 epoch 4 - iter 2/13 - loss 0.79083706 - samples/sec: 20.65 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,326 epoch 4 - iter 3/13 - loss 0.92228521 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,376 epoch 4 - iter 4/13 - loss 0.72362766 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,422 epoch 4 - iter 5/13 - loss 0.57999946 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,468 epoch 4 - iter 6/13 - loss 0.48851829 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,518 epoch 4 - iter 7/13 - loss 0.46059499 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,564 epoch 4 - iter 8/13 - loss 0.40387918 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,609 epoch 4 - iter 9/13 - loss 0.36006621 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,656 epoch 4 - iter 10/13 - loss 0.32767208 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,704 epoch 4 - iter 11/13 - loss 0.35695739 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,751 epoch 4 - iter 12/13 - loss 0.45300798 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,798 epoch 4 - iter 13/13 - loss 0.41857102 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 11:48:02,800 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:02,800 EPOCH 4 done: loss 0.4186 - lr 0.0200000\n",
-      "2021-09-08 11:48:02,831 DEV : loss 0.14312846958637238 - score 0.0\n",
-      "2021-09-08 11:48:02,832 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:48:10,387 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:10,447 epoch 5 - iter 1/13 - loss 0.00882082 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,494 epoch 5 - iter 2/13 - loss 0.04384953 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,540 epoch 5 - iter 3/13 - loss 0.07861876 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,586 epoch 5 - iter 4/13 - loss 0.15804680 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,628 epoch 5 - iter 5/13 - loss 0.12703460 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,674 epoch 5 - iter 6/13 - loss 0.12236231 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,720 epoch 5 - iter 7/13 - loss 0.17420167 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,768 epoch 5 - iter 8/13 - loss 0.16663224 - samples/sec: 20.98 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,810 epoch 5 - iter 9/13 - loss 0.14910570 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,852 epoch 5 - iter 10/13 - loss 0.13466882 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,896 epoch 5 - iter 11/13 - loss 0.12263841 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,941 epoch 5 - iter 12/13 - loss 0.16543692 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,987 epoch 5 - iter 13/13 - loss 0.16133063 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:48:10,988 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:10,988 EPOCH 5 done: loss 0.1613 - lr 0.0200000\n",
-      "2021-09-08 11:48:11,026 DEV : loss 0.04426678642630577 - score 0.0\n",
-      "2021-09-08 11:48:11,027 BAD EPOCHS (no improvement): 0\n"
+      "2021-09-21 20:20:54,537 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:54,630 epoch 5 - iter 1/13 - loss 0.00656876 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,686 epoch 5 - iter 2/13 - loss 0.00440878 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,741 epoch 5 - iter 3/13 - loss 0.00640575 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,803 epoch 5 - iter 4/13 - loss 0.35648626 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,856 epoch 5 - iter 5/13 - loss 0.29136720 - samples/sec: 19.26 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,903 epoch 5 - iter 6/13 - loss 0.24304567 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 20:20:54,950 epoch 5 - iter 7/13 - loss 0.20854444 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,001 epoch 5 - iter 8/13 - loss 0.36775982 - samples/sec: 19.93 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,048 epoch 5 - iter 9/13 - loss 0.32737887 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,095 epoch 5 - iter 10/13 - loss 0.29490035 - samples/sec: 21.68 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,144 epoch 5 - iter 11/13 - loss 0.26870870 - samples/sec: 20.74 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,194 epoch 5 - iter 12/13 - loss 0.26152579 - samples/sec: 19.98 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,241 epoch 5 - iter 13/13 - loss 0.24174795 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,242 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:55,243 EPOCH 5 done: loss 0.2417 - lr 0.0200000\n",
+      "2021-09-21 20:20:55,369 DEV : loss 0.14690448343753815 - score 0.0\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "saving best model\n",
-      "2021-09-08 11:48:15,961 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:16,024 epoch 6 - iter 1/13 - loss 0.07728550 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,073 epoch 6 - iter 2/13 - loss 0.12602382 - samples/sec: 20.96 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,115 epoch 6 - iter 3/13 - loss 0.08491947 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,158 epoch 6 - iter 4/13 - loss 0.06567793 - samples/sec: 23.87 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,204 epoch 6 - iter 5/13 - loss 0.05771132 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,250 epoch 6 - iter 6/13 - loss 0.06435770 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,292 epoch 6 - iter 7/13 - loss 0.05741264 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,338 epoch 6 - iter 8/13 - loss 0.05321346 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,384 epoch 6 - iter 9/13 - loss 0.05328226 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,426 epoch 6 - iter 10/13 - loss 0.04848599 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,472 epoch 6 - iter 11/13 - loss 0.15732229 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,518 epoch 6 - iter 12/13 - loss 0.32508806 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,564 epoch 6 - iter 13/13 - loss 0.30417924 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:48:16,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:16,565 EPOCH 6 done: loss 0.3042 - lr 0.0200000\n",
-      "2021-09-08 11:48:17,587 DEV : loss 0.4139416813850403 - score 0.0\n",
-      "2021-09-08 11:48:17,588 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:48:17,592 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:17,649 epoch 7 - iter 1/13 - loss 0.01390760 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,695 epoch 7 - iter 2/13 - loss 0.31077185 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,741 epoch 7 - iter 3/13 - loss 0.29415223 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,783 epoch 7 - iter 4/13 - loss 0.22107954 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,825 epoch 7 - iter 5/13 - loss 0.17746616 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,871 epoch 7 - iter 6/13 - loss 0.15447320 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,914 epoch 7 - iter 7/13 - loss 0.13296800 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:48:17,956 epoch 7 - iter 8/13 - loss 0.11640443 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,000 epoch 7 - iter 9/13 - loss 0.10370237 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,152 epoch 7 - iter 10/13 - loss 0.12159498 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,195 epoch 7 - iter 11/13 - loss 0.11059020 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,238 epoch 7 - iter 12/13 - loss 0.10159348 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,280 epoch 7 - iter 13/13 - loss 0.09391023 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:48:18,281 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:18,282 EPOCH 7 done: loss 0.0939 - lr 0.0200000\n",
-      "2021-09-08 11:48:18,311 DEV : loss 0.01632840745151043 - score 0.0\n",
-      "2021-09-08 11:48:18,312 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:48:25,406 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:25,469 epoch 8 - iter 1/13 - loss 0.02593935 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,515 epoch 8 - iter 2/13 - loss 0.89740986 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,560 epoch 8 - iter 3/13 - loss 0.63954820 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,603 epoch 8 - iter 4/13 - loss 0.47984371 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,645 epoch 8 - iter 5/13 - loss 0.38439726 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,688 epoch 8 - iter 6/13 - loss 0.32051035 - samples/sec: 23.87 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,733 epoch 8 - iter 7/13 - loss 0.27663140 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,776 epoch 8 - iter 8/13 - loss 0.24249413 - samples/sec: 23.64 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,818 epoch 8 - iter 9/13 - loss 0.21680147 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,860 epoch 8 - iter 10/13 - loss 0.19516996 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,903 epoch 8 - iter 11/13 - loss 0.17783946 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,945 epoch 8 - iter 12/13 - loss 0.16315508 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,991 epoch 8 - iter 13/13 - loss 0.15986844 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 11:48:25,992 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:25,992 EPOCH 8 done: loss 0.1599 - lr 0.0200000\n",
-      "2021-09-08 11:48:29,316 DEV : loss 0.003407934447750449 - score 0.0\n",
-      "2021-09-08 11:48:29,317 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:48:39,450 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:39,510 epoch 9 - iter 1/13 - loss 0.00066141 - samples/sec: 23.04 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,556 epoch 9 - iter 2/13 - loss 0.33091246 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,601 epoch 9 - iter 3/13 - loss 0.22223426 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,643 epoch 9 - iter 4/13 - loss 0.16715341 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,689 epoch 9 - iter 5/13 - loss 0.56476244 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,731 epoch 9 - iter 6/13 - loss 0.47078202 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,773 epoch 9 - iter 7/13 - loss 0.40359071 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,816 epoch 9 - iter 8/13 - loss 0.35409841 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,862 epoch 9 - iter 9/13 - loss 0.35042962 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,904 epoch 9 - iter 10/13 - loss 0.31557322 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,947 epoch 9 - iter 11/13 - loss 0.28699755 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:48:39,992 epoch 9 - iter 12/13 - loss 0.29425375 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 11:48:40,034 epoch 9 - iter 13/13 - loss 0.27183841 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:48:40,035 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:40,035 EPOCH 9 done: loss 0.2718 - lr 0.0200000\n",
-      "2021-09-08 11:48:42,170 DEV : loss 0.012316936627030373 - score 0.0\n",
-      "2021-09-08 11:48:42,171 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:48:42,189 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:42,249 epoch 10 - iter 1/13 - loss 0.19028808 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,292 epoch 10 - iter 2/13 - loss 0.09687664 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,336 epoch 10 - iter 3/13 - loss 0.06527287 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,382 epoch 10 - iter 4/13 - loss 0.05006016 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,429 epoch 10 - iter 5/13 - loss 0.04028415 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,479 epoch 10 - iter 6/13 - loss 0.06518025 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,524 epoch 10 - iter 7/13 - loss 0.05693392 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,571 epoch 10 - iter 8/13 - loss 0.05089282 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,620 epoch 10 - iter 9/13 - loss 0.04809759 - samples/sec: 20.50 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,666 epoch 10 - iter 10/13 - loss 0.04388312 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,710 epoch 10 - iter 11/13 - loss 0.04168427 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,757 epoch 10 - iter 12/13 - loss 0.04887321 - samples/sec: 21.73 - lr: 0.020000\n"
+      "2021-09-21 20:20:55,370 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:20:55,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:55,503 epoch 6 - iter 1/13 - loss 0.00179225 - samples/sec: 22.55 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,547 epoch 6 - iter 2/13 - loss 0.00203315 - samples/sec: 22.87 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,591 epoch 6 - iter 3/13 - loss 0.00307200 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,634 epoch 6 - iter 4/13 - loss 0.00367117 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,677 epoch 6 - iter 5/13 - loss 0.00316853 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,720 epoch 6 - iter 6/13 - loss 0.00418695 - samples/sec: 23.59 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,764 epoch 6 - iter 7/13 - loss 0.00375463 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 20:20:55,806 epoch 6 - iter 8/13 - loss 0.00359378 - samples/sec: 23.59 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,215 epoch 6 - iter 9/13 - loss 0.00452071 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,260 epoch 6 - iter 10/13 - loss 0.00418150 - samples/sec: 22.37 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,303 epoch 6 - iter 11/13 - loss 0.00392324 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,346 epoch 6 - iter 12/13 - loss 0.00422712 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,392 epoch 6 - iter 13/13 - loss 0.13634077 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,393 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:56,394 EPOCH 6 done: loss 0.1363 - lr 0.0200000\n",
+      "2021-09-21 20:20:56,534 DEV : loss 0.26886069774627686 - score 0.0\n",
+      "2021-09-21 20:20:56,535 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:20:56,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:56,684 epoch 7 - iter 1/13 - loss 0.00294849 - samples/sec: 22.76 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,728 epoch 7 - iter 2/13 - loss 0.00259120 - samples/sec: 22.96 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,771 epoch 7 - iter 3/13 - loss 0.00291763 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,817 epoch 7 - iter 4/13 - loss 0.15645208 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,864 epoch 7 - iter 5/13 - loss 0.14406628 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,907 epoch 7 - iter 6/13 - loss 0.12064854 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,949 epoch 7 - iter 7/13 - loss 0.10369772 - samples/sec: 23.65 - lr: 0.020000\n",
+      "2021-09-21 20:20:56,993 epoch 7 - iter 8/13 - loss 0.09099343 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,035 epoch 7 - iter 9/13 - loss 0.08202254 - samples/sec: 23.73 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,078 epoch 7 - iter 10/13 - loss 0.07413128 - samples/sec: 23.76 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,122 epoch 7 - iter 11/13 - loss 0.06844102 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,165 epoch 7 - iter 12/13 - loss 0.06294437 - samples/sec: 23.42 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,208 epoch 7 - iter 13/13 - loss 0.05822828 - samples/sec: 23.63 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,209 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:57,209 EPOCH 7 done: loss 0.0582 - lr 0.0200000\n",
+      "2021-09-21 20:20:57,353 DEV : loss 0.2806015610694885 - score 0.0\n",
+      "2021-09-21 20:20:57,353 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:20:57,450 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:57,510 epoch 8 - iter 1/13 - loss 0.00213864 - samples/sec: 22.55 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,554 epoch 8 - iter 2/13 - loss 0.00286033 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,597 epoch 8 - iter 3/13 - loss 0.00239652 - samples/sec: 23.30 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,641 epoch 8 - iter 4/13 - loss 0.00216203 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,684 epoch 8 - iter 5/13 - loss 0.00196852 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,730 epoch 8 - iter 6/13 - loss 0.04531502 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,775 epoch 8 - iter 7/13 - loss 0.03898027 - samples/sec: 22.43 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,818 epoch 8 - iter 8/13 - loss 0.03418487 - samples/sec: 23.44 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,864 epoch 8 - iter 9/13 - loss 0.04649104 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,908 epoch 8 - iter 10/13 - loss 0.04195559 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,951 epoch 8 - iter 11/13 - loss 0.03819442 - samples/sec: 23.25 - lr: 0.020000\n",
+      "2021-09-21 20:20:57,997 epoch 8 - iter 12/13 - loss 0.05028698 - samples/sec: 22.02 - lr: 0.020000\n",
+      "2021-09-21 20:20:58,040 epoch 8 - iter 13/13 - loss 0.04666041 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 20:20:58,041 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:58,042 EPOCH 8 done: loss 0.0467 - lr 0.0200000\n",
+      "2021-09-21 20:20:58,189 DEV : loss 0.3861340582370758 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:20:58,189 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:20:58,268 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:58,327 epoch 9 - iter 1/13 - loss 0.00048943 - samples/sec: 23.27 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,370 epoch 9 - iter 2/13 - loss 0.00143474 - samples/sec: 23.29 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,416 epoch 9 - iter 3/13 - loss 0.00751297 - samples/sec: 21.89 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,459 epoch 9 - iter 4/13 - loss 0.00575637 - samples/sec: 23.71 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,506 epoch 9 - iter 5/13 - loss 0.35313794 - samples/sec: 21.66 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,552 epoch 9 - iter 6/13 - loss 0.40132160 - samples/sec: 21.84 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,594 epoch 9 - iter 7/13 - loss 0.34427644 - samples/sec: 23.71 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,638 epoch 9 - iter 8/13 - loss 0.30153527 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,685 epoch 9 - iter 9/13 - loss 0.26902695 - samples/sec: 21.41 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,728 epoch 9 - iter 10/13 - loss 0.24221453 - samples/sec: 23.40 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,775 epoch 9 - iter 11/13 - loss 0.31584160 - samples/sec: 21.25 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,820 epoch 9 - iter 12/13 - loss 0.28964182 - samples/sec: 22.80 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,867 epoch 9 - iter 13/13 - loss 0.26905589 - samples/sec: 21.58 - lr: 0.010000\n",
+      "2021-09-21 20:20:58,867 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:58,868 EPOCH 9 done: loss 0.2691 - lr 0.0100000\n",
+      "2021-09-21 20:20:59,022 DEV : loss 0.1795513927936554 - score 0.0\n",
+      "2021-09-21 20:20:59,023 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:20:59,127 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:59,185 epoch 10 - iter 1/13 - loss 0.00981904 - samples/sec: 23.30 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,228 epoch 10 - iter 2/13 - loss 0.00546226 - samples/sec: 23.56 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,272 epoch 10 - iter 3/13 - loss 0.00468121 - samples/sec: 23.05 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,318 epoch 10 - iter 4/13 - loss 0.01827025 - samples/sec: 21.87 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,361 epoch 10 - iter 5/13 - loss 0.01470454 - samples/sec: 23.55 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,404 epoch 10 - iter 6/13 - loss 0.01242459 - samples/sec: 23.22 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,450 epoch 10 - iter 7/13 - loss 0.03180901 - samples/sec: 21.88 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,493 epoch 10 - iter 8/13 - loss 0.02901153 - samples/sec: 23.69 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,538 epoch 10 - iter 9/13 - loss 0.02581214 - samples/sec: 22.58 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,582 epoch 10 - iter 10/13 - loss 0.02362223 - samples/sec: 22.98 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,625 epoch 10 - iter 11/13 - loss 0.02152689 - samples/sec: 23.14 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,670 epoch 10 - iter 12/13 - loss 0.01977988 - samples/sec: 22.43 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:48:42,800 epoch 10 - iter 13/13 - loss 0.04518059 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 11:48:42,801 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:42,802 EPOCH 10 done: loss 0.0452 - lr 0.0200000\n",
-      "2021-09-08 11:48:42,876 DEV : loss 0.11537650972604752 - score 0.0\n",
-      "2021-09-08 11:48:42,876 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:48:49,640 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:48:49,641 Testing using best model ...\n",
-      "2021-09-08 11:48:49,643 loading file None1/best-model.pt\n",
+      "2021-09-21 20:20:59,717 epoch 10 - iter 13/13 - loss 0.05064544 - samples/sec: 21.47 - lr: 0.010000\n",
+      "2021-09-21 20:20:59,718 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:20:59,718 EPOCH 10 done: loss 0.0506 - lr 0.0100000\n",
+      "2021-09-21 20:20:59,858 DEV : loss 0.06853050738573074 - score 0.0\n",
+      "2021-09-21 20:20:59,859 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:21:18,440 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:18,441 Testing using best model ...\n",
+      "2021-09-21 20:21:18,473 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:48:54,637 \t0.5\n",
-      "2021-09-08 11:48:54,637 \n",
+      "2021-09-21 20:21:29,122 \t0.0\n",
+      "2021-09-21 20:21:29,122 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.0667\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
@@ -653,38 +651,38 @@
       "      fashion     0.0000    0.0000    0.0000         0\n",
       "entertainment     0.0000    0.0000    0.0000         0\n",
       "      divorce     0.0000    0.0000    0.0000         0\n",
+      "        crime     0.0000    0.0000    0.0000         0\n",
       "     religion     0.0000    0.0000    0.0000         0\n",
       "       sports     0.0000    0.0000    0.0000         0\n",
       "     politics     0.0000    0.0000    0.0000         0\n",
-      "       comedy     0.0000    0.0000    0.0000         0\n",
+      "       travel     0.0000    0.0000    0.0000         1\n",
       "      science     0.0000    0.0000    0.0000         1\n",
-      "        crime     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "    micro avg     0.5000    0.5000    0.5000         2\n",
-      "    macro avg     0.0667    0.0667    0.0667         2\n",
-      " weighted avg     0.5000    0.5000    0.5000         2\n",
-      "  samples avg     0.5000    0.5000    0.5000         2\n",
+      "    micro avg     0.0000    0.0000    0.0000         2\n",
+      "    macro avg     0.0000    0.0000    0.0000         2\n",
+      " weighted avg     0.0000    0.0000    0.0000         2\n",
+      "  samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 11:48:54,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:01,994 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:21:29,122 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:45,954 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:49:06,123 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:21:50,535 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 49152.00it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 49774.18it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:06,125 [b'travel', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'divorce', b'crime', b'religion', b'sports', b'politics', b'comedy', b'wellness', b'science']\n",
-      "2021-09-08 11:49:06,134 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,136 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:21:50,537 [b'travel', b'technology', b'wellness', b'women', b'parents', b'business', b'weddings', b'entertainment', b'science', b'divorce', b'religion', b'politics', b'comedy', b'crime', b'sports']\n",
+      "2021-09-21 20:21:50,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:50,548 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -997,28 +995,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:06,136 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,136 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:49:06,137 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,137 Parameters:\n",
-      "2021-09-08 11:49:06,137  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:49:06,138  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:49:06,138  - patience: \"3\"\n",
-      "2021-09-08 11:49:06,138  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:49:06,138  - max_epochs: \"10\"\n",
-      "2021-09-08 11:49:06,139  - shuffle: \"True\"\n",
-      "2021-09-08 11:49:06,139  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:49:06,139  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:49:06,139 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,140 Model training base path: \"None1\"\n",
-      "2021-09-08 11:49:06,140 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,140 Device: cuda:0\n",
-      "2021-09-08 11:49:06,141 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,141 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:49:06,147 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,202 epoch 1 - iter 1/13 - loss 0.31358096 - samples/sec: 25.54 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,249 epoch 1 - iter 2/13 - loss 0.35172635 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,296 epoch 1 - iter 3/13 - loss 0.52235244 - samples/sec: 21.54 - lr: 0.020000\n"
+      "2021-09-21 20:21:50,549 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:50,549 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:21:50,549 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:50,550 Parameters:\n",
+      "2021-09-21 20:21:50,550  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:21:50,550  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:21:50,550  - patience: \"3\"\n",
+      "2021-09-21 20:21:50,551  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:21:50,551  - max_epochs: \"10\"\n",
+      "2021-09-21 20:21:50,551  - shuffle: \"True\"\n",
+      "2021-09-21 20:21:50,552  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:21:50,552  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:21:50,552 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:50,552 Model training base path: \"None1\"\n",
+      "2021-09-21 20:21:50,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:50,553 Device: cuda:0\n",
+      "2021-09-21 20:21:50,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:50,554 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:21:50,569 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:50,705 epoch 1 - iter 1/13 - loss 0.71017140 - samples/sec: 19.90 - lr: 0.020000\n"
      ]
     },
     {
@@ -1032,253 +1028,256 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:06,342 epoch 1 - iter 4/13 - loss 0.95962785 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,389 epoch 1 - iter 5/13 - loss 0.79807694 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,435 epoch 1 - iter 6/13 - loss 0.71288321 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,480 epoch 1 - iter 7/13 - loss 0.63980561 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,526 epoch 1 - iter 8/13 - loss 0.65214425 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,572 epoch 1 - iter 9/13 - loss 0.63471100 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,617 epoch 1 - iter 10/13 - loss 0.61009933 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,660 epoch 1 - iter 11/13 - loss 0.55649290 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,703 epoch 1 - iter 12/13 - loss 0.51404992 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,749 epoch 1 - iter 13/13 - loss 0.51954307 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:49:06,750 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:06,750 EPOCH 1 done: loss 0.5195 - lr 0.0200000\n",
-      "2021-09-08 11:49:06,780 DEV : loss 0.08885572105646133 - score 0.0\n",
-      "2021-09-08 11:49:06,781 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:21:50,792 epoch 1 - iter 2/13 - loss 0.68602940 - samples/sec: 11.58 - lr: 0.020000\n",
+      "2021-09-21 20:21:50,849 epoch 1 - iter 3/13 - loss 0.70802186 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 20:21:50,904 epoch 1 - iter 4/13 - loss 0.72647315 - samples/sec: 18.59 - lr: 0.020000\n",
+      "2021-09-21 20:21:50,963 epoch 1 - iter 5/13 - loss 0.63774300 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,028 epoch 1 - iter 6/13 - loss 0.54300945 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,085 epoch 1 - iter 7/13 - loss 0.55581738 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,172 epoch 1 - iter 8/13 - loss 0.53497204 - samples/sec: 11.51 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,244 epoch 1 - iter 9/13 - loss 0.48013406 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,331 epoch 1 - iter 10/13 - loss 0.46446239 - samples/sec: 11.47 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,393 epoch 1 - iter 11/13 - loss 0.43234366 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,469 epoch 1 - iter 12/13 - loss 0.42343037 - samples/sec: 13.22 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,521 epoch 1 - iter 13/13 - loss 0.42183888 - samples/sec: 19.40 - lr: 0.020000\n",
+      "2021-09-21 20:21:51,522 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:51,522 EPOCH 1 done: loss 0.4218 - lr 0.0200000\n",
+      "2021-09-21 20:21:51,597 DEV : loss 0.36306053400039673 - score 0.0\n",
+      "2021-09-21 20:21:51,598 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:11,061 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,125 epoch 2 - iter 1/13 - loss 0.71279651 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,182 epoch 2 - iter 2/13 - loss 0.36022731 - samples/sec: 17.73 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,226 epoch 2 - iter 3/13 - loss 0.24366118 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,273 epoch 2 - iter 4/13 - loss 0.32975429 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,319 epoch 2 - iter 5/13 - loss 0.35690094 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,365 epoch 2 - iter 6/13 - loss 0.44262307 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,410 epoch 2 - iter 7/13 - loss 0.50068082 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,458 epoch 2 - iter 8/13 - loss 0.47065497 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,503 epoch 2 - iter 9/13 - loss 0.46917121 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,548 epoch 2 - iter 10/13 - loss 0.43095116 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,594 epoch 2 - iter 11/13 - loss 0.43187582 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,640 epoch 2 - iter 12/13 - loss 0.41616373 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,685 epoch 2 - iter 13/13 - loss 0.39145492 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,687 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,687 EPOCH 2 done: loss 0.3915 - lr 0.0200000\n",
-      "2021-09-08 11:49:11,730 DEV : loss 0.2082030177116394 - score 0.0\n",
-      "2021-09-08 11:49:11,730 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:49:11,752 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:11,813 epoch 3 - iter 1/13 - loss 0.32549736 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,858 epoch 3 - iter 2/13 - loss 0.20566667 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,904 epoch 3 - iter 3/13 - loss 0.26948851 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,946 epoch 3 - iter 4/13 - loss 0.20746463 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:49:11,989 epoch 3 - iter 5/13 - loss 0.16681095 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,034 epoch 3 - iter 6/13 - loss 0.22893289 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,078 epoch 3 - iter 7/13 - loss 0.19702191 - samples/sec: 23.11 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,127 epoch 3 - iter 8/13 - loss 0.18091899 - samples/sec: 20.54 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,177 epoch 3 - iter 9/13 - loss 0.31034341 - samples/sec: 20.69 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,227 epoch 3 - iter 10/13 - loss 0.29937897 - samples/sec: 20.28 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,271 epoch 3 - iter 11/13 - loss 0.27425517 - samples/sec: 22.93 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,314 epoch 3 - iter 12/13 - loss 0.25304565 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,359 epoch 3 - iter 13/13 - loss 0.23363711 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,361 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:12,361 EPOCH 3 done: loss 0.2336 - lr 0.0200000\n",
-      "2021-09-08 11:49:12,403 DEV : loss 0.23882319033145905 - score 0.0\n",
-      "2021-09-08 11:49:12,404 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:49:12,447 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:12,510 epoch 4 - iter 1/13 - loss 0.00430760 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,560 epoch 4 - iter 2/13 - loss 0.58519261 - samples/sec: 20.37 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,606 epoch 4 - iter 3/13 - loss 0.39178316 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,655 epoch 4 - iter 4/13 - loss 0.31676402 - samples/sec: 20.61 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,702 epoch 4 - iter 5/13 - loss 0.25489673 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,746 epoch 4 - iter 6/13 - loss 0.21724098 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,789 epoch 4 - iter 7/13 - loss 0.18693782 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,832 epoch 4 - iter 8/13 - loss 0.16380935 - samples/sec: 23.46 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,875 epoch 4 - iter 9/13 - loss 0.14634582 - samples/sec: 23.51 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,920 epoch 4 - iter 10/13 - loss 0.14200388 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 11:49:12,966 epoch 4 - iter 11/13 - loss 0.16127586 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,011 epoch 4 - iter 12/13 - loss 0.14791959 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,054 epoch 4 - iter 13/13 - loss 0.13672044 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:13,055 EPOCH 4 done: loss 0.1367 - lr 0.0200000\n",
-      "2021-09-08 11:49:13,186 DEV : loss 0.19754014909267426 - score 0.0\n",
-      "2021-09-08 11:49:13,186 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:49:13,260 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:13,318 epoch 5 - iter 1/13 - loss 0.00494753 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,361 epoch 5 - iter 2/13 - loss 0.00391409 - samples/sec: 23.41 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,403 epoch 5 - iter 3/13 - loss 0.00424290 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,447 epoch 5 - iter 4/13 - loss 0.00414751 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,493 epoch 5 - iter 5/13 - loss 0.28555573 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,537 epoch 5 - iter 6/13 - loss 0.23828227 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,583 epoch 5 - iter 7/13 - loss 0.27396630 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,626 epoch 5 - iter 8/13 - loss 0.24040930 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,672 epoch 5 - iter 9/13 - loss 0.23461363 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,715 epoch 5 - iter 10/13 - loss 0.21253964 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,760 epoch 5 - iter 11/13 - loss 0.19838843 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,806 epoch 5 - iter 12/13 - loss 0.19780463 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,852 epoch 5 - iter 13/13 - loss 0.18480661 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:13,853 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:13,853 EPOCH 5 done: loss 0.1848 - lr 0.0200000\n",
-      "2021-09-08 11:49:13,985 DEV : loss 0.05443252995610237 - score 0.0\n",
-      "2021-09-08 11:49:13,985 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:21:55,662 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:55,900 epoch 2 - iter 1/13 - loss 0.23938370 - samples/sec: 13.05 - lr: 0.020000\n",
+      "2021-09-21 20:21:55,950 epoch 2 - iter 2/13 - loss 0.12109827 - samples/sec: 20.49 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,009 epoch 2 - iter 3/13 - loss 0.64487047 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,078 epoch 2 - iter 4/13 - loss 0.75888299 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,156 epoch 2 - iter 5/13 - loss 0.86669085 - samples/sec: 12.85 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,217 epoch 2 - iter 6/13 - loss 0.79172283 - samples/sec: 16.35 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,277 epoch 2 - iter 7/13 - loss 0.68012215 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,360 epoch 2 - iter 8/13 - loss 0.79656339 - samples/sec: 12.01 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,440 epoch 2 - iter 9/13 - loss 0.76895695 - samples/sec: 12.64 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,514 epoch 2 - iter 10/13 - loss 0.73868505 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,578 epoch 2 - iter 11/13 - loss 0.67347361 - samples/sec: 15.94 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,640 epoch 2 - iter 12/13 - loss 0.62189515 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,712 epoch 2 - iter 13/13 - loss 0.61688867 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 20:21:56,713 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:21:56,713 EPOCH 2 done: loss 0.6169 - lr 0.0200000\n",
+      "2021-09-21 20:21:56,814 DEV : loss 0.18148048222064972 - score 0.0\n",
+      "2021-09-21 20:21:56,818 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:19,193 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:22:00,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:00,980 epoch 3 - iter 1/13 - loss 0.18111382 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,046 epoch 3 - iter 2/13 - loss 0.09800752 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,110 epoch 3 - iter 3/13 - loss 0.06580290 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,177 epoch 3 - iter 4/13 - loss 0.08125538 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,242 epoch 3 - iter 5/13 - loss 0.06923180 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,308 epoch 3 - iter 6/13 - loss 0.26446337 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,376 epoch 3 - iter 7/13 - loss 0.28192105 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,436 epoch 3 - iter 8/13 - loss 0.37530094 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,500 epoch 3 - iter 9/13 - loss 0.33482053 - samples/sec: 15.96 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,552 epoch 3 - iter 10/13 - loss 0.30182185 - samples/sec: 19.35 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,613 epoch 3 - iter 11/13 - loss 0.32650178 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,685 epoch 3 - iter 12/13 - loss 0.29969379 - samples/sec: 14.11 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,745 epoch 3 - iter 13/13 - loss 0.29040573 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,746 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:01,747 EPOCH 3 done: loss 0.2904 - lr 0.0200000\n",
+      "2021-09-21 20:22:01,784 DEV : loss 0.18811365962028503 - score 0.0\n",
+      "2021-09-21 20:22:01,784 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:01,786 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:01,869 epoch 4 - iter 1/13 - loss 0.21754093 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 20:22:01,940 epoch 4 - iter 2/13 - loss 0.21877499 - samples/sec: 14.31 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,000 epoch 4 - iter 3/13 - loss 0.39094382 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,052 epoch 4 - iter 4/13 - loss 0.47540157 - samples/sec: 19.48 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,102 epoch 4 - iter 5/13 - loss 0.40962482 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,167 epoch 4 - iter 6/13 - loss 0.34221580 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,217 epoch 4 - iter 7/13 - loss 0.29409508 - samples/sec: 20.57 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,283 epoch 4 - iter 8/13 - loss 0.26345984 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,338 epoch 4 - iter 9/13 - loss 0.24300859 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,407 epoch 4 - iter 10/13 - loss 0.22124079 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,463 epoch 4 - iter 11/13 - loss 0.20142137 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,534 epoch 4 - iter 12/13 - loss 0.18745481 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,602 epoch 4 - iter 13/13 - loss 0.17310677 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:02,604 EPOCH 4 done: loss 0.1731 - lr 0.0200000\n",
+      "2021-09-21 20:22:02,659 DEV : loss 0.3518994450569153 - score 0.0\n",
+      "2021-09-21 20:22:02,660 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:22:02,662 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:02,776 epoch 5 - iter 1/13 - loss 0.28090608 - samples/sec: 14.90 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,836 epoch 5 - iter 2/13 - loss 0.14246797 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,886 epoch 5 - iter 3/13 - loss 0.09695279 - samples/sec: 20.08 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,944 epoch 5 - iter 4/13 - loss 0.07297395 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 20:22:02,998 epoch 5 - iter 5/13 - loss 0.06139635 - samples/sec: 18.82 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,051 epoch 5 - iter 6/13 - loss 0.05136665 - samples/sec: 18.87 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,117 epoch 5 - iter 7/13 - loss 0.04428500 - samples/sec: 15.26 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,171 epoch 5 - iter 8/13 - loss 0.17930665 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,240 epoch 5 - iter 9/13 - loss 0.15949881 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,293 epoch 5 - iter 10/13 - loss 0.16823746 - samples/sec: 18.85 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,341 epoch 5 - iter 11/13 - loss 0.19862070 - samples/sec: 21.27 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,387 epoch 5 - iter 12/13 - loss 0.18830200 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,430 epoch 5 - iter 13/13 - loss 0.17421488 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 20:22:03,431 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:19,253 epoch 6 - iter 1/13 - loss 0.00113444 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,299 epoch 6 - iter 2/13 - loss 0.01656300 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,341 epoch 6 - iter 3/13 - loss 0.01485669 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,385 epoch 6 - iter 4/13 - loss 0.01224304 - samples/sec: 23.37 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,428 epoch 6 - iter 5/13 - loss 0.01030254 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,471 epoch 6 - iter 6/13 - loss 0.00870067 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,514 epoch 6 - iter 7/13 - loss 0.01007155 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,557 epoch 6 - iter 8/13 - loss 0.00949375 - samples/sec: 23.38 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,602 epoch 6 - iter 9/13 - loss 0.01392972 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,645 epoch 6 - iter 10/13 - loss 0.01259012 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,688 epoch 6 - iter 11/13 - loss 0.01211103 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,731 epoch 6 - iter 12/13 - loss 0.01120030 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,774 epoch 6 - iter 13/13 - loss 0.01054633 - samples/sec: 23.33 - lr: 0.020000\n",
-      "2021-09-08 11:49:19,775 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:19,775 EPOCH 6 done: loss 0.0105 - lr 0.0200000\n",
-      "2021-09-08 11:49:19,910 DEV : loss 0.09418854862451553 - score 0.0\n",
-      "2021-09-08 11:49:19,911 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:49:19,990 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:20,051 epoch 7 - iter 1/13 - loss 0.22180502 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,093 epoch 7 - iter 2/13 - loss 0.11104150 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,136 epoch 7 - iter 3/13 - loss 0.07437858 - samples/sec: 23.52 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,180 epoch 7 - iter 4/13 - loss 0.05622326 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,222 epoch 7 - iter 5/13 - loss 0.04505382 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,265 epoch 7 - iter 6/13 - loss 0.03758512 - samples/sec: 23.22 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,308 epoch 7 - iter 7/13 - loss 0.03231569 - samples/sec: 23.44 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,351 epoch 7 - iter 8/13 - loss 0.02842811 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,396 epoch 7 - iter 9/13 - loss 0.07816733 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,442 epoch 7 - iter 10/13 - loss 0.08514819 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,485 epoch 7 - iter 11/13 - loss 0.07742051 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,528 epoch 7 - iter 12/13 - loss 0.07110927 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,570 epoch 7 - iter 13/13 - loss 0.06568551 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:49:20,571 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:20,571 EPOCH 7 done: loss 0.0657 - lr 0.0200000\n",
-      "2021-09-08 11:49:20,709 DEV : loss 0.020187562331557274 - score 0.0\n",
-      "2021-09-08 11:49:20,710 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:22:03,432 EPOCH 5 done: loss 0.1742 - lr 0.0200000\n",
+      "2021-09-21 20:22:03,462 DEV : loss 0.13181664049625397 - score 0.0\n",
+      "2021-09-21 20:22:03,463 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:26,473 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:26,533 epoch 8 - iter 1/13 - loss 0.00038546 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,580 epoch 8 - iter 2/13 - loss 0.03390060 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,626 epoch 8 - iter 3/13 - loss 0.26724431 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,669 epoch 8 - iter 4/13 - loss 0.20146732 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,712 epoch 8 - iter 5/13 - loss 0.16199262 - samples/sec: 23.44 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,754 epoch 8 - iter 6/13 - loss 0.13514902 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,796 epoch 8 - iter 7/13 - loss 0.11602677 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,840 epoch 8 - iter 8/13 - loss 0.10157253 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,885 epoch 8 - iter 9/13 - loss 0.12330898 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,931 epoch 8 - iter 10/13 - loss 0.11682578 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 11:49:26,973 epoch 8 - iter 11/13 - loss 0.10624032 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:27,017 epoch 8 - iter 12/13 - loss 0.09745491 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 11:49:27,062 epoch 8 - iter 13/13 - loss 0.21635147 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:49:27,063 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:27,063 EPOCH 8 done: loss 0.2164 - lr 0.0200000\n",
-      "2021-09-08 11:49:27,236 DEV : loss 0.0013520161155611277 - score 0.0\n",
-      "2021-09-08 11:49:27,237 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:22:07,532 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:07,594 epoch 6 - iter 1/13 - loss 0.00371463 - samples/sec: 22.47 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,638 epoch 6 - iter 2/13 - loss 0.01688814 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,682 epoch 6 - iter 3/13 - loss 0.01173645 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,728 epoch 6 - iter 4/13 - loss 0.00934976 - samples/sec: 22.04 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,771 epoch 6 - iter 5/13 - loss 0.00780162 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,829 epoch 6 - iter 6/13 - loss 0.29307864 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,875 epoch 6 - iter 7/13 - loss 0.25143244 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,924 epoch 6 - iter 8/13 - loss 0.26358444 - samples/sec: 20.93 - lr: 0.020000\n",
+      "2021-09-21 20:22:07,970 epoch 6 - iter 9/13 - loss 0.23478623 - samples/sec: 22.55 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,017 epoch 6 - iter 10/13 - loss 0.27046260 - samples/sec: 21.28 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,065 epoch 6 - iter 11/13 - loss 0.25798070 - samples/sec: 21.42 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,110 epoch 6 - iter 12/13 - loss 0.23754436 - samples/sec: 22.47 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,154 epoch 6 - iter 13/13 - loss 0.21937095 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,155 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,156 EPOCH 6 done: loss 0.2194 - lr 0.0200000\n",
+      "2021-09-21 20:22:08,196 DEV : loss 0.5179975628852844 - score 0.0\n",
+      "2021-09-21 20:22:08,198 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:08,201 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,292 epoch 7 - iter 1/13 - loss 0.02016739 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,346 epoch 7 - iter 2/13 - loss 0.01815085 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,405 epoch 7 - iter 3/13 - loss 0.14810564 - samples/sec: 17.10 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,457 epoch 7 - iter 4/13 - loss 0.11191179 - samples/sec: 19.48 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,510 epoch 7 - iter 5/13 - loss 0.09182934 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,562 epoch 7 - iter 6/13 - loss 0.07665934 - samples/sec: 19.28 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,617 epoch 7 - iter 7/13 - loss 0.06653444 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,685 epoch 7 - iter 8/13 - loss 0.28911602 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,755 epoch 7 - iter 9/13 - loss 0.25899769 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,817 epoch 7 - iter 10/13 - loss 0.27112898 - samples/sec: 16.38 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,879 epoch 7 - iter 11/13 - loss 0.24685091 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,933 epoch 7 - iter 12/13 - loss 0.22641568 - samples/sec: 18.70 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,987 epoch 7 - iter 13/13 - loss 0.20927811 - samples/sec: 18.97 - lr: 0.020000\n",
+      "2021-09-21 20:22:08,988 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:08,988 EPOCH 7 done: loss 0.2093 - lr 0.0200000\n",
+      "2021-09-21 20:22:09,060 DEV : loss 0.017071127891540527 - score 0.0\n",
+      "2021-09-21 20:22:09,061 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:31,874 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:31,933 epoch 9 - iter 1/13 - loss 0.00202004 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 11:49:31,978 epoch 9 - iter 2/13 - loss 0.05812808 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,023 epoch 9 - iter 3/13 - loss 0.03960137 - samples/sec: 22.77 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,065 epoch 9 - iter 4/13 - loss 0.03050009 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,107 epoch 9 - iter 5/13 - loss 0.02584165 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,150 epoch 9 - iter 6/13 - loss 0.02193499 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,192 epoch 9 - iter 7/13 - loss 0.01902687 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,235 epoch 9 - iter 8/13 - loss 0.01718796 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,278 epoch 9 - iter 9/13 - loss 0.01532414 - samples/sec: 23.52 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,320 epoch 9 - iter 10/13 - loss 0.01414392 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,362 epoch 9 - iter 11/13 - loss 0.01336223 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,406 epoch 9 - iter 12/13 - loss 0.01270366 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,448 epoch 9 - iter 13/13 - loss 0.01176775 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,449 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:32,449 EPOCH 9 done: loss 0.0118 - lr 0.0200000\n",
-      "2021-09-08 11:49:32,482 DEV : loss 0.001980697503313422 - score 0.0\n",
-      "2021-09-08 11:49:32,482 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:49:32,484 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:32,541 epoch 10 - iter 1/13 - loss 0.00050374 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,584 epoch 10 - iter 2/13 - loss 0.00126228 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,629 epoch 10 - iter 3/13 - loss 0.06177847 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,672 epoch 10 - iter 4/13 - loss 0.04648462 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,715 epoch 10 - iter 5/13 - loss 0.03739592 - samples/sec: 23.94 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,757 epoch 10 - iter 6/13 - loss 0.03131275 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,803 epoch 10 - iter 7/13 - loss 0.02950424 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,845 epoch 10 - iter 8/13 - loss 0.02688055 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,890 epoch 10 - iter 9/13 - loss 0.02686212 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,933 epoch 10 - iter 10/13 - loss 0.02420621 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 11:49:32,976 epoch 10 - iter 11/13 - loss 0.02361046 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 11:49:33,018 epoch 10 - iter 12/13 - loss 0.02167396 - samples/sec: 24.00 - lr: 0.020000\n",
-      "2021-09-08 11:49:33,060 epoch 10 - iter 13/13 - loss 0.02003365 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 11:49:33,061 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:22:18,307 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:18,367 epoch 8 - iter 1/13 - loss 0.00371685 - samples/sec: 22.75 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,414 epoch 8 - iter 2/13 - loss 1.28162033 - samples/sec: 21.89 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,461 epoch 8 - iter 3/13 - loss 1.12598678 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,507 epoch 8 - iter 4/13 - loss 0.85413173 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,550 epoch 8 - iter 5/13 - loss 0.68359897 - samples/sec: 23.57 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,593 epoch 8 - iter 6/13 - loss 0.57248100 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,636 epoch 8 - iter 7/13 - loss 0.49629511 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,682 epoch 8 - iter 8/13 - loss 0.44047537 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,725 epoch 8 - iter 9/13 - loss 0.39164032 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,768 epoch 8 - iter 10/13 - loss 0.35315526 - samples/sec: 23.53 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,811 epoch 8 - iter 11/13 - loss 0.32111471 - samples/sec: 23.69 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,854 epoch 8 - iter 12/13 - loss 0.29461603 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,897 epoch 8 - iter 13/13 - loss 0.27199766 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 20:22:18,898 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:18,898 EPOCH 8 done: loss 0.2720 - lr 0.0200000\n",
+      "2021-09-21 20:22:19,191 DEV : loss 0.09505855292081833 - score 0.0\n",
+      "2021-09-21 20:22:19,192 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:22:19,197 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:19,300 epoch 9 - iter 1/13 - loss 0.13139449 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,381 epoch 9 - iter 2/13 - loss 0.06664108 - samples/sec: 12.42 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,455 epoch 9 - iter 3/13 - loss 0.04594003 - samples/sec: 13.51 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,532 epoch 9 - iter 4/13 - loss 0.03479042 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,610 epoch 9 - iter 5/13 - loss 0.03256637 - samples/sec: 13.04 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,690 epoch 9 - iter 6/13 - loss 0.02729357 - samples/sec: 12.43 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,757 epoch 9 - iter 7/13 - loss 0.02988538 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,827 epoch 9 - iter 8/13 - loss 0.02650209 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,913 epoch 9 - iter 9/13 - loss 0.03253983 - samples/sec: 11.78 - lr: 0.020000\n",
+      "2021-09-21 20:22:19,995 epoch 9 - iter 10/13 - loss 0.02962945 - samples/sec: 12.16 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,060 epoch 9 - iter 11/13 - loss 0.16735071 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,133 epoch 9 - iter 12/13 - loss 0.15362346 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,200 epoch 9 - iter 13/13 - loss 0.14186763 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,201 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,201 EPOCH 9 done: loss 0.1419 - lr 0.0200000\n",
+      "2021-09-21 20:22:20,287 DEV : loss 0.28061196208000183 - score 0.0\n",
+      "2021-09-21 20:22:20,289 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:22:20,291 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:20,498 epoch 10 - iter 1/13 - loss 0.03679281 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,570 epoch 10 - iter 2/13 - loss 0.20497206 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,639 epoch 10 - iter 3/13 - loss 0.15572737 - samples/sec: 14.66 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,716 epoch 10 - iter 4/13 - loss 0.11697316 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,781 epoch 10 - iter 5/13 - loss 0.09428220 - samples/sec: 15.44 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,857 epoch 10 - iter 6/13 - loss 0.09742641 - samples/sec: 13.24 - lr: 0.020000\n",
+      "2021-09-21 20:22:20,939 epoch 10 - iter 7/13 - loss 0.08415755 - samples/sec: 12.37 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,028 epoch 10 - iter 8/13 - loss 0.07367942 - samples/sec: 11.29 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,101 epoch 10 - iter 9/13 - loss 0.06557956 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,172 epoch 10 - iter 10/13 - loss 0.05918989 - samples/sec: 14.09 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:33,061 EPOCH 10 done: loss 0.0200 - lr 0.0200000\n",
-      "2021-09-08 11:49:33,091 DEV : loss 0.08481361716985703 - score 0.0\n",
-      "2021-09-08 11:49:33,091 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:49:38,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:38,733 Testing using best model ...\n",
-      "2021-09-08 11:49:38,756 loading file None1/best-model.pt\n",
+      "2021-09-21 20:22:21,251 epoch 10 - iter 11/13 - loss 0.05408971 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,313 epoch 10 - iter 12/13 - loss 0.04975244 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,394 epoch 10 - iter 13/13 - loss 0.15063193 - samples/sec: 12.32 - lr: 0.020000\n",
+      "2021-09-21 20:22:21,395 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:21,396 EPOCH 10 done: loss 0.1506 - lr 0.0200000\n",
+      "2021-09-21 20:22:21,506 DEV : loss 0.0018496689153835177 - score 0.0\n",
+      "2021-09-21 20:22:21,509 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:22:34,413 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:22:34,414 Testing using best model ...\n",
+      "2021-09-21 20:22:34,415 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:49:43,471 \t0.0\n",
-      "2021-09-08 11:49:43,472 \n",
+      "2021-09-21 20:22:40,044 \t0.5\n",
+      "2021-09-21 20:22:40,045 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.5\n",
+      "- F-score (macro) 0.0667\n",
+      "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
       "       travel     0.0000    0.0000    0.0000         0\n",
+      "   technology     0.0000    0.0000    0.0000         0\n",
+      "     wellness     0.0000    0.0000    0.0000         0\n",
       "        women     0.0000    0.0000    0.0000         0\n",
       "      parents     0.0000    0.0000    0.0000         0\n",
       "     business     0.0000    0.0000    0.0000         0\n",
       "     weddings     0.0000    0.0000    0.0000         0\n",
-      "      fashion     0.0000    0.0000    0.0000         0\n",
       "entertainment     0.0000    0.0000    0.0000         0\n",
+      "      science     0.0000    0.0000    0.0000         0\n",
       "      divorce     0.0000    0.0000    0.0000         0\n",
-      "        crime     0.0000    0.0000    0.0000         0\n",
       "     religion     0.0000    0.0000    0.0000         0\n",
-      "       sports     0.0000    0.0000    0.0000         0\n",
       "     politics     0.0000    0.0000    0.0000         0\n",
       "       comedy     0.0000    0.0000    0.0000         0\n",
-      "     wellness     0.0000    0.0000    0.0000         1\n",
-      "      science     0.0000    0.0000    0.0000         1\n",
+      "        crime     0.0000    0.0000    0.0000         1\n",
+      "       sports     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "    micro avg     0.0000    0.0000    0.0000         2\n",
-      "    macro avg     0.0000    0.0000    0.0000         2\n",
-      " weighted avg     0.0000    0.0000    0.0000         2\n",
-      "  samples avg     0.0000    0.0000    0.0000         2\n",
+      "    micro avg     0.5000    0.5000    0.5000         2\n",
+      "    macro avg     0.0667    0.0667    0.0667         2\n",
+      " weighted avg     0.5000    0.5000    0.5000         2\n",
+      "  samples avg     0.5000    0.5000    0.5000         2\n",
       "\n",
-      "2021-09-08 11:49:43,472 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:50,618 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:22:40,045 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:05,763 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:49:54,656 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:23:10,144 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 49695.55it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 50331.65it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:54,657 [b'technology', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'sports', b'politics', b'comedy', b'travel', b'religion']\n",
-      "2021-09-08 11:49:54,671 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:54,673 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:23:10,146 [b'travel', b'wellness', b'women', b'parents', b'business', b'fashion', b'entertainment', b'science', b'crime', b'religion', b'sports', b'politics', b'comedy', b'weddings', b'divorce']\n",
+      "2021-09-21 20:23:10,285 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:10,287 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -1591,28 +1590,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:54,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:54,674 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:49:54,674 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:54,674 Parameters:\n",
-      "2021-09-08 11:49:54,675  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:49:54,675  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:49:54,675  - patience: \"3\"\n",
-      "2021-09-08 11:49:54,675  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:49:54,676  - max_epochs: \"10\"\n",
-      "2021-09-08 11:49:54,676  - shuffle: \"True\"\n",
-      "2021-09-08 11:49:54,676  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:49:54,677  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:49:54,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:54,677 Model training base path: \"None1\"\n",
-      "2021-09-08 11:49:54,677 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:54,678 Device: cuda:0\n",
-      "2021-09-08 11:49:54,678 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:54,678 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:49:54,684 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:54,738 epoch 1 - iter 1/13 - loss 0.80570954 - samples/sec: 26.07 - lr: 0.020000\n",
-      "2021-09-08 11:49:54,785 epoch 1 - iter 2/13 - loss 0.60164294 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 11:49:54,830 epoch 1 - iter 3/13 - loss 0.67567346 - samples/sec: 22.20 - lr: 0.020000\n"
+      "2021-09-21 20:23:10,287 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:10,288 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:23:10,288 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:10,288 Parameters:\n",
+      "2021-09-21 20:23:10,289  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:23:10,289  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:23:10,289  - patience: \"3\"\n",
+      "2021-09-21 20:23:10,289  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:23:10,290  - max_epochs: \"10\"\n",
+      "2021-09-21 20:23:10,290  - shuffle: \"True\"\n",
+      "2021-09-21 20:23:10,290  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:23:10,291  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:23:10,291 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:10,291 Model training base path: \"None1\"\n",
+      "2021-09-21 20:23:10,291 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:10,292 Device: cuda:0\n",
+      "2021-09-21 20:23:10,292 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:10,292 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -1626,254 +1621,259 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:49:54,876 epoch 1 - iter 4/13 - loss 0.54047012 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:49:54,922 epoch 1 - iter 5/13 - loss 0.55123891 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:49:54,968 epoch 1 - iter 6/13 - loss 0.53693192 - samples/sec: 21.70 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,014 epoch 1 - iter 7/13 - loss 0.46780909 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,061 epoch 1 - iter 8/13 - loss 0.41964201 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,109 epoch 1 - iter 9/13 - loss 0.42924778 - samples/sec: 21.17 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,154 epoch 1 - iter 10/13 - loss 0.44213765 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,197 epoch 1 - iter 11/13 - loss 0.40222427 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,243 epoch 1 - iter 12/13 - loss 0.52978176 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,289 epoch 1 - iter 13/13 - loss 0.63793246 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 11:49:55,290 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:55,290 EPOCH 1 done: loss 0.6379 - lr 0.0200000\n",
-      "2021-09-08 11:49:55,320 DEV : loss 0.9322115182876587 - score 0.0\n",
-      "2021-09-08 11:49:55,320 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:23:10,482 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:10,563 epoch 1 - iter 1/13 - loss 0.16298138 - samples/sec: 21.07 - lr: 0.020000\n",
+      "2021-09-21 20:23:10,614 epoch 1 - iter 2/13 - loss 0.60586118 - samples/sec: 19.85 - lr: 0.020000\n",
+      "2021-09-21 20:23:10,685 epoch 1 - iter 3/13 - loss 0.52757807 - samples/sec: 14.23 - lr: 0.020000\n",
+      "2021-09-21 20:23:10,749 epoch 1 - iter 4/13 - loss 0.60762193 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 20:23:10,810 epoch 1 - iter 5/13 - loss 0.55104453 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 20:23:10,871 epoch 1 - iter 6/13 - loss 0.50187494 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 20:23:10,925 epoch 1 - iter 7/13 - loss 0.44769412 - samples/sec: 18.53 - lr: 0.020000\n",
+      "2021-09-21 20:23:10,992 epoch 1 - iter 8/13 - loss 0.44601857 - samples/sec: 15.05 - lr: 0.020000\n",
+      "2021-09-21 20:23:11,055 epoch 1 - iter 9/13 - loss 0.44298144 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 20:23:11,119 epoch 1 - iter 10/13 - loss 0.42808224 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 20:23:11,179 epoch 1 - iter 11/13 - loss 0.38964569 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 20:23:11,235 epoch 1 - iter 12/13 - loss 0.35856140 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 20:23:11,288 epoch 1 - iter 13/13 - loss 0.37560107 - samples/sec: 18.92 - lr: 0.020000\n",
+      "2021-09-21 20:23:11,289 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:11,290 EPOCH 1 done: loss 0.3756 - lr 0.0200000\n",
+      "2021-09-21 20:23:11,876 DEV : loss 0.6162222027778625 - score 0.0\n",
+      "2021-09-21 20:23:11,877 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:49:59,391 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:49:59,455 epoch 2 - iter 1/13 - loss 0.58175373 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,502 epoch 2 - iter 2/13 - loss 0.32676219 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,547 epoch 2 - iter 3/13 - loss 0.48607699 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,593 epoch 2 - iter 4/13 - loss 0.58474628 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,635 epoch 2 - iter 5/13 - loss 0.47018372 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,681 epoch 2 - iter 6/13 - loss 0.42717546 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,727 epoch 2 - iter 7/13 - loss 0.45480515 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,772 epoch 2 - iter 8/13 - loss 0.43305397 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,818 epoch 2 - iter 9/13 - loss 0.39482494 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,864 epoch 2 - iter 10/13 - loss 0.36711649 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,909 epoch 2 - iter 11/13 - loss 0.35732942 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:49:59,955 epoch 2 - iter 12/13 - loss 0.41100626 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 11:50:00,000 epoch 2 - iter 13/13 - loss 0.47815410 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 11:50:00,001 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:00,002 EPOCH 2 done: loss 0.4782 - lr 0.0200000\n",
-      "2021-09-08 11:50:00,362 DEV : loss 0.1239478588104248 - score 0.0\n",
-      "2021-09-08 11:50:00,363 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:23:16,691 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:16,756 epoch 2 - iter 1/13 - loss 0.49466220 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 20:23:16,800 epoch 2 - iter 2/13 - loss 0.24997237 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 20:23:16,846 epoch 2 - iter 3/13 - loss 0.19929379 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 20:23:16,893 epoch 2 - iter 4/13 - loss 0.33530006 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 20:23:16,942 epoch 2 - iter 5/13 - loss 0.27098764 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 20:23:16,993 epoch 2 - iter 6/13 - loss 0.25495221 - samples/sec: 19.87 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,043 epoch 2 - iter 7/13 - loss 0.57598909 - samples/sec: 20.06 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,094 epoch 2 - iter 8/13 - loss 0.54080229 - samples/sec: 19.88 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,139 epoch 2 - iter 9/13 - loss 0.48197815 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,185 epoch 2 - iter 10/13 - loss 0.45834738 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,229 epoch 2 - iter 11/13 - loss 0.41853213 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,275 epoch 2 - iter 12/13 - loss 0.39661380 - samples/sec: 21.83 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,321 epoch 2 - iter 13/13 - loss 0.40132453 - samples/sec: 21.96 - lr: 0.020000\n",
+      "2021-09-21 20:23:17,322 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:17,323 EPOCH 2 done: loss 0.4013 - lr 0.0200000\n",
+      "2021-09-21 20:23:17,449 DEV : loss 0.5546330213546753 - score 0.0\n",
+      "2021-09-21 20:23:17,450 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:50:10,362 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:10,426 epoch 3 - iter 1/13 - loss 1.12867749 - samples/sec: 21.35 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,471 epoch 3 - iter 2/13 - loss 0.79126912 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,517 epoch 3 - iter 3/13 - loss 0.56119915 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,563 epoch 3 - iter 4/13 - loss 0.43897396 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,605 epoch 3 - iter 5/13 - loss 0.35308380 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,652 epoch 3 - iter 6/13 - loss 0.40240141 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,694 epoch 3 - iter 7/13 - loss 0.34558865 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,739 epoch 3 - iter 8/13 - loss 0.41512672 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,783 epoch 3 - iter 9/13 - loss 0.37005170 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,826 epoch 3 - iter 10/13 - loss 0.33460068 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,871 epoch 3 - iter 11/13 - loss 0.30991612 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,914 epoch 3 - iter 12/13 - loss 0.28480702 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,956 epoch 3 - iter 13/13 - loss 0.26355341 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:50:10,957 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:10,958 EPOCH 3 done: loss 0.2636 - lr 0.0200000\n",
-      "2021-09-08 11:50:15,056 DEV : loss 0.15760588645935059 - score 0.0\n",
-      "2021-09-08 11:50:15,057 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:50:15,067 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:15,127 epoch 4 - iter 1/13 - loss 0.00264904 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,173 epoch 4 - iter 2/13 - loss 0.16412598 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,220 epoch 4 - iter 3/13 - loss 0.23508937 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,267 epoch 4 - iter 4/13 - loss 0.19931673 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,310 epoch 4 - iter 5/13 - loss 0.16210414 - samples/sec: 23.55 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,353 epoch 4 - iter 6/13 - loss 0.13556256 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,399 epoch 4 - iter 7/13 - loss 0.23602835 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,442 epoch 4 - iter 8/13 - loss 0.20723984 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,484 epoch 4 - iter 9/13 - loss 0.18765854 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,527 epoch 4 - iter 10/13 - loss 0.16928503 - samples/sec: 23.50 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,573 epoch 4 - iter 11/13 - loss 0.18936450 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,615 epoch 4 - iter 12/13 - loss 0.17401123 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,661 epoch 4 - iter 13/13 - loss 0.18441499 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,662 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:15,663 EPOCH 4 done: loss 0.1844 - lr 0.0200000\n",
-      "2021-09-08 11:50:15,708 DEV : loss 0.592096745967865 - score 0.0\n",
-      "2021-09-08 11:50:15,708 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:50:15,723 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:15,781 epoch 5 - iter 1/13 - loss 0.00447691 - samples/sec: 23.44 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,826 epoch 5 - iter 2/13 - loss 0.84740067 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,869 epoch 5 - iter 3/13 - loss 0.57754701 - samples/sec: 23.82 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,915 epoch 5 - iter 4/13 - loss 0.68507968 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 11:50:15,958 epoch 5 - iter 5/13 - loss 0.55004578 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,003 epoch 5 - iter 6/13 - loss 0.47767187 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,049 epoch 5 - iter 7/13 - loss 0.48895200 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,092 epoch 5 - iter 8/13 - loss 0.42824817 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,138 epoch 5 - iter 9/13 - loss 0.39003705 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,185 epoch 5 - iter 10/13 - loss 0.36450400 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,228 epoch 5 - iter 11/13 - loss 0.33181836 - samples/sec: 23.58 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,271 epoch 5 - iter 12/13 - loss 0.30820368 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,314 epoch 5 - iter 13/13 - loss 0.28471375 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 11:50:16,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:16,315 EPOCH 5 done: loss 0.2847 - lr 0.0200000\n",
-      "2021-09-08 11:50:16,365 DEV : loss 0.05121264234185219 - score 0.0\n",
-      "2021-09-08 11:50:16,366 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n"
+      "2021-09-21 20:23:26,989 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:27,073 epoch 3 - iter 1/13 - loss 0.01226671 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,147 epoch 3 - iter 2/13 - loss 0.03534019 - samples/sec: 13.45 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,211 epoch 3 - iter 3/13 - loss 0.05823718 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,284 epoch 3 - iter 4/13 - loss 0.50619953 - samples/sec: 13.77 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,336 epoch 3 - iter 5/13 - loss 0.40623299 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,405 epoch 3 - iter 6/13 - loss 0.34776180 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,461 epoch 3 - iter 7/13 - loss 0.30114977 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,510 epoch 3 - iter 8/13 - loss 0.26469708 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,569 epoch 3 - iter 9/13 - loss 0.23590075 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,621 epoch 3 - iter 10/13 - loss 0.21241728 - samples/sec: 19.27 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,681 epoch 3 - iter 11/13 - loss 0.19337585 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,737 epoch 3 - iter 12/13 - loss 0.17745404 - samples/sec: 18.09 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,790 epoch 3 - iter 13/13 - loss 0.16445027 - samples/sec: 18.75 - lr: 0.020000\n",
+      "2021-09-21 20:23:27,791 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:27,792 EPOCH 3 done: loss 0.1645 - lr 0.0200000\n",
+      "2021-09-21 20:23:27,900 DEV : loss 0.32205790281295776 - score 0.0\n",
+      "2021-09-21 20:23:27,901 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:23:33,706 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:33,772 epoch 4 - iter 1/13 - loss 0.16173021 - samples/sec: 20.94 - lr: 0.020000\n",
+      "2021-09-21 20:23:33,815 epoch 4 - iter 2/13 - loss 0.09327860 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 20:23:33,860 epoch 4 - iter 3/13 - loss 0.06348745 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 20:23:33,903 epoch 4 - iter 4/13 - loss 0.04775261 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 20:23:33,946 epoch 4 - iter 5/13 - loss 0.03970173 - samples/sec: 23.32 - lr: 0.020000\n",
+      "2021-09-21 20:23:33,991 epoch 4 - iter 6/13 - loss 0.03335601 - samples/sec: 22.55 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,034 epoch 4 - iter 7/13 - loss 0.02897702 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,077 epoch 4 - iter 8/13 - loss 0.02547468 - samples/sec: 23.44 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,121 epoch 4 - iter 9/13 - loss 0.02358419 - samples/sec: 23.01 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,165 epoch 4 - iter 10/13 - loss 0.02180914 - samples/sec: 23.29 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,208 epoch 4 - iter 11/13 - loss 0.01991318 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,252 epoch 4 - iter 12/13 - loss 0.01890993 - samples/sec: 23.00 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,298 epoch 4 - iter 13/13 - loss 0.17280049 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 20:23:34,299 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:34,300 EPOCH 4 done: loss 0.1728 - lr 0.0200000\n",
+      "2021-09-21 20:23:34,440 DEV : loss 0.2994096577167511 - score 0.0\n",
+      "2021-09-21 20:23:34,440 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:23:43,504 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:43,608 epoch 5 - iter 1/13 - loss 0.13350482 - samples/sec: 14.83 - lr: 0.020000\n",
+      "2021-09-21 20:23:43,672 epoch 5 - iter 2/13 - loss 0.09197490 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 20:23:43,841 epoch 5 - iter 3/13 - loss 0.50795482 - samples/sec: 5.94 - lr: 0.020000\n",
+      "2021-09-21 20:23:43,898 epoch 5 - iter 4/13 - loss 0.38170476 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 20:23:43,962 epoch 5 - iter 5/13 - loss 0.31264188 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,015 epoch 5 - iter 6/13 - loss 0.26070201 - samples/sec: 19.32 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,081 epoch 5 - iter 7/13 - loss 0.22394680 - samples/sec: 15.11 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,139 epoch 5 - iter 8/13 - loss 0.20432464 - samples/sec: 17.63 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,199 epoch 5 - iter 9/13 - loss 0.18186124 - samples/sec: 16.73 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,255 epoch 5 - iter 10/13 - loss 0.23931535 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,324 epoch 5 - iter 11/13 - loss 0.21766036 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,389 epoch 5 - iter 12/13 - loss 0.20015354 - samples/sec: 15.49 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:50:27,917 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:27,977 epoch 6 - iter 1/13 - loss 0.00119190 - samples/sec: 22.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,021 epoch 6 - iter 2/13 - loss 0.00131840 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,063 epoch 6 - iter 3/13 - loss 0.00231418 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,106 epoch 6 - iter 4/13 - loss 0.00655187 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,152 epoch 6 - iter 5/13 - loss 0.07645471 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,198 epoch 6 - iter 6/13 - loss 0.07472196 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,243 epoch 6 - iter 7/13 - loss 0.36785779 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,287 epoch 6 - iter 8/13 - loss 0.32209335 - samples/sec: 23.37 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,329 epoch 6 - iter 9/13 - loss 0.28658694 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,375 epoch 6 - iter 10/13 - loss 0.26663445 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,420 epoch 6 - iter 11/13 - loss 0.33695152 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,463 epoch 6 - iter 12/13 - loss 0.30899279 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,506 epoch 6 - iter 13/13 - loss 0.28635817 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,507 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:28,507 EPOCH 6 done: loss 0.2864 - lr 0.0200000\n",
-      "2021-09-08 11:50:28,549 DEV : loss 0.22478565573692322 - score 0.0\n",
-      "2021-09-08 11:50:28,550 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:50:28,617 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:28,675 epoch 7 - iter 1/13 - loss 0.00088243 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,720 epoch 7 - iter 2/13 - loss 0.74861499 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,763 epoch 7 - iter 3/13 - loss 0.49964033 - samples/sec: 23.72 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,807 epoch 7 - iter 4/13 - loss 0.37622740 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,850 epoch 7 - iter 5/13 - loss 0.30134354 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,895 epoch 7 - iter 6/13 - loss 0.64724001 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,938 epoch 7 - iter 7/13 - loss 0.55519711 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 11:50:28,981 epoch 7 - iter 8/13 - loss 0.48736283 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,024 epoch 7 - iter 9/13 - loss 0.43363301 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,067 epoch 7 - iter 10/13 - loss 0.39051105 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,112 epoch 7 - iter 11/13 - loss 0.39103137 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,155 epoch 7 - iter 12/13 - loss 0.35893975 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,198 epoch 7 - iter 13/13 - loss 0.33168431 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,199 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:29,199 EPOCH 7 done: loss 0.3317 - lr 0.0200000\n",
-      "2021-09-08 11:50:29,233 DEV : loss 0.0762002021074295 - score 0.0\n",
-      "2021-09-08 11:50:29,234 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:50:29,236 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:29,296 epoch 8 - iter 1/13 - loss 0.47269440 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,339 epoch 8 - iter 2/13 - loss 0.23870775 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,382 epoch 8 - iter 3/13 - loss 0.15978490 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,426 epoch 8 - iter 4/13 - loss 0.12050243 - samples/sec: 23.45 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,468 epoch 8 - iter 5/13 - loss 0.09774833 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,510 epoch 8 - iter 6/13 - loss 0.08202819 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,557 epoch 8 - iter 7/13 - loss 0.11011881 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,600 epoch 8 - iter 8/13 - loss 0.09730262 - samples/sec: 23.63 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,642 epoch 8 - iter 9/13 - loss 0.08789502 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,688 epoch 8 - iter 10/13 - loss 0.21914907 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,731 epoch 8 - iter 11/13 - loss 0.19929807 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,776 epoch 8 - iter 12/13 - loss 0.19441215 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,823 epoch 8 - iter 13/13 - loss 0.18558102 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:50:29,824 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:29,824 EPOCH 8 done: loss 0.1856 - lr 0.0200000\n",
-      "2021-09-08 11:50:29,853 DEV : loss 0.034291330724954605 - score 0.0\n",
-      "2021-09-08 11:50:29,854 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:23:44,441 epoch 5 - iter 13/13 - loss 0.18767189 - samples/sec: 19.46 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,442 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:44,442 EPOCH 5 done: loss 0.1877 - lr 0.0200000\n",
+      "2021-09-21 20:23:44,600 DEV : loss 0.7877876162528992 - score 0.0\n",
+      "2021-09-21 20:23:44,602 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:23:44,685 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:44,745 epoch 6 - iter 1/13 - loss 0.00048151 - samples/sec: 22.74 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,845 epoch 6 - iter 2/13 - loss 0.36625890 - samples/sec: 14.44 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,901 epoch 6 - iter 3/13 - loss 0.24470302 - samples/sec: 18.32 - lr: 0.020000\n",
+      "2021-09-21 20:23:44,966 epoch 6 - iter 4/13 - loss 0.18396316 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,022 epoch 6 - iter 5/13 - loss 0.15036569 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,082 epoch 6 - iter 6/13 - loss 0.12570904 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,136 epoch 6 - iter 7/13 - loss 0.10893501 - samples/sec: 18.56 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,190 epoch 6 - iter 8/13 - loss 0.09537857 - samples/sec: 18.68 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,250 epoch 6 - iter 9/13 - loss 0.08497476 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,306 epoch 6 - iter 10/13 - loss 0.07971554 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,354 epoch 6 - iter 11/13 - loss 0.28854039 - samples/sec: 21.09 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,399 epoch 6 - iter 12/13 - loss 0.26475924 - samples/sec: 22.38 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,471 epoch 6 - iter 13/13 - loss 0.31774401 - samples/sec: 13.94 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,472 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:45,473 EPOCH 6 done: loss 0.3177 - lr 0.0200000\n",
+      "2021-09-21 20:23:45,622 DEV : loss 0.46694132685661316 - score 0.0\n",
+      "2021-09-21 20:23:45,623 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:23:45,708 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:45,795 epoch 7 - iter 1/13 - loss 0.00100456 - samples/sec: 18.77 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,861 epoch 7 - iter 2/13 - loss 0.00120431 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,909 epoch 7 - iter 3/13 - loss 0.00178617 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 20:23:45,954 epoch 7 - iter 4/13 - loss 0.00164176 - samples/sec: 22.45 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,005 epoch 7 - iter 5/13 - loss 0.00226505 - samples/sec: 19.56 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,071 epoch 7 - iter 6/13 - loss 0.00460537 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,125 epoch 7 - iter 7/13 - loss 0.00619151 - samples/sec: 18.62 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,191 epoch 7 - iter 8/13 - loss 0.00944480 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,251 epoch 7 - iter 9/13 - loss 0.00854510 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,308 epoch 7 - iter 10/13 - loss 0.00790634 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,371 epoch 7 - iter 11/13 - loss 0.00861172 - samples/sec: 16.04 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,427 epoch 7 - iter 12/13 - loss 0.00794202 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,492 epoch 7 - iter 13/13 - loss 0.00751834 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,493 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:46,493 EPOCH 7 done: loss 0.0075 - lr 0.0200000\n",
+      "2021-09-21 20:23:46,708 DEV : loss 0.37178659439086914 - score 0.0\n",
+      "2021-09-21 20:23:46,710 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:23:46,792 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:46,907 epoch 8 - iter 1/13 - loss 0.00650978 - samples/sec: 19.07 - lr: 0.020000\n",
+      "2021-09-21 20:23:46,966 epoch 8 - iter 2/13 - loss 0.00388398 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,028 epoch 8 - iter 3/13 - loss 0.61691421 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,079 epoch 8 - iter 4/13 - loss 0.46346453 - samples/sec: 19.75 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,139 epoch 8 - iter 5/13 - loss 0.37171780 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,199 epoch 8 - iter 6/13 - loss 0.31007050 - samples/sec: 16.98 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,247 epoch 8 - iter 7/13 - loss 0.26586721 - samples/sec: 20.86 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,291 epoch 8 - iter 8/13 - loss 0.23304288 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,334 epoch 8 - iter 9/13 - loss 0.20750720 - samples/sec: 23.15 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,377 epoch 8 - iter 10/13 - loss 0.18683196 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,421 epoch 8 - iter 11/13 - loss 0.16990464 - samples/sec: 23.04 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,464 epoch 8 - iter 12/13 - loss 0.15605550 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,507 epoch 8 - iter 13/13 - loss 0.14415571 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 20:23:47,508 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:47,509 EPOCH 8 done: loss 0.1442 - lr 0.0200000\n",
+      "2021-09-21 20:23:48,718 DEV : loss 0.10495408624410629 - score 0.0\n",
+      "2021-09-21 20:23:48,719 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:23:54,331 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:54,393 epoch 9 - iter 1/13 - loss 0.00376402 - samples/sec: 22.41 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,438 epoch 9 - iter 2/13 - loss 0.00219217 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,481 epoch 9 - iter 3/13 - loss 0.00173057 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,524 epoch 9 - iter 4/13 - loss 0.00140221 - samples/sec: 23.49 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,568 epoch 9 - iter 5/13 - loss 0.00145991 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,612 epoch 9 - iter 6/13 - loss 0.00141724 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,654 epoch 9 - iter 7/13 - loss 0.00127149 - samples/sec: 23.58 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,699 epoch 9 - iter 8/13 - loss 0.00121390 - samples/sec: 22.83 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,743 epoch 9 - iter 9/13 - loss 0.00151886 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,787 epoch 9 - iter 10/13 - loss 0.00153437 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,831 epoch 9 - iter 11/13 - loss 0.00145833 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,875 epoch 9 - iter 12/13 - loss 0.00149895 - samples/sec: 22.89 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,919 epoch 9 - iter 13/13 - loss 0.00192411 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 20:23:54,920 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:23:54,920 EPOCH 9 done: loss 0.0019 - lr 0.0200000\n",
+      "2021-09-21 20:23:55,081 DEV : loss 0.054954126477241516 - score 0.0\n",
+      "2021-09-21 20:23:55,082 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:50:36,041 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:36,101 epoch 9 - iter 1/13 - loss 0.00048055 - samples/sec: 22.89 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,145 epoch 9 - iter 2/13 - loss 0.00079419 - samples/sec: 23.39 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,187 epoch 9 - iter 3/13 - loss 0.00150980 - samples/sec: 23.71 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,230 epoch 9 - iter 4/13 - loss 0.00177342 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,273 epoch 9 - iter 5/13 - loss 0.00776085 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,319 epoch 9 - iter 6/13 - loss 0.12967066 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,364 epoch 9 - iter 7/13 - loss 0.11396026 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,407 epoch 9 - iter 8/13 - loss 0.10026965 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,453 epoch 9 - iter 9/13 - loss 0.12672892 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,498 epoch 9 - iter 10/13 - loss 0.13684501 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,544 epoch 9 - iter 11/13 - loss 0.12684768 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,590 epoch 9 - iter 12/13 - loss 0.14172809 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,632 epoch 9 - iter 13/13 - loss 0.13196786 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,633 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:36,634 EPOCH 9 done: loss 0.1320 - lr 0.0200000\n",
-      "2021-09-08 11:50:36,664 DEV : loss 0.2248648703098297 - score 0.0\n",
-      "2021-09-08 11:50:36,665 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:50:36,666 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:36,727 epoch 10 - iter 1/13 - loss 0.13256691 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,772 epoch 10 - iter 2/13 - loss 0.66378855 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,816 epoch 10 - iter 3/13 - loss 0.44296864 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,858 epoch 10 - iter 4/13 - loss 0.33303944 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,904 epoch 10 - iter 5/13 - loss 0.27611230 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,947 epoch 10 - iter 6/13 - loss 0.23030504 - samples/sec: 23.46 - lr: 0.020000\n",
-      "2021-09-08 11:50:36,990 epoch 10 - iter 7/13 - loss 0.19748287 - samples/sec: 23.71 - lr: 0.020000\n",
-      "2021-09-08 11:50:37,032 epoch 10 - iter 8/13 - loss 0.17282361 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:50:37,075 epoch 10 - iter 9/13 - loss 0.15376215 - samples/sec: 23.41 - lr: 0.020000\n",
-      "2021-09-08 11:50:37,120 epoch 10 - iter 10/13 - loss 0.13882180 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 11:50:37,169 epoch 10 - iter 11/13 - loss 0.13730898 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 11:50:37,221 epoch 10 - iter 12/13 - loss 0.18579843 - samples/sec: 19.80 - lr: 0.020000\n"
+      "2021-09-21 20:24:01,459 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:01,604 epoch 10 - iter 1/13 - loss 0.00043209 - samples/sec: 16.26 - lr: 0.020000\n",
+      "2021-09-21 20:24:01,670 epoch 10 - iter 2/13 - loss 0.00048253 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 20:24:01,736 epoch 10 - iter 3/13 - loss 0.00051699 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 20:24:01,786 epoch 10 - iter 4/13 - loss 0.00058875 - samples/sec: 20.42 - lr: 0.020000\n",
+      "2021-09-21 20:24:01,838 epoch 10 - iter 5/13 - loss 0.00167705 - samples/sec: 19.33 - lr: 0.020000\n",
+      "2021-09-21 20:24:01,899 epoch 10 - iter 6/13 - loss 0.01315710 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 20:24:01,945 epoch 10 - iter 7/13 - loss 0.01131681 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:24:02,003 epoch 10 - iter 8/13 - loss 0.01012429 - samples/sec: 17.62 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:50:37,268 epoch 10 - iter 13/13 - loss 0.17227019 - samples/sec: 21.37 - lr: 0.020000\n",
-      "2021-09-08 11:50:37,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:37,270 EPOCH 10 done: loss 0.1723 - lr 0.0200000\n",
-      "2021-09-08 11:50:37,301 DEV : loss 0.006326697301119566 - score 0.0\n",
-      "2021-09-08 11:50:37,302 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:50:51,849 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:50:51,850 Testing using best model ...\n",
-      "2021-09-08 11:50:51,851 loading file None1/best-model.pt\n",
+      "2021-09-21 20:24:02,056 epoch 10 - iter 9/13 - loss 0.00993426 - samples/sec: 18.77 - lr: 0.020000\n",
+      "2021-09-21 20:24:02,119 epoch 10 - iter 10/13 - loss 0.00909004 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 20:24:02,172 epoch 10 - iter 11/13 - loss 0.00836742 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 20:24:02,231 epoch 10 - iter 12/13 - loss 0.01944229 - samples/sec: 17.04 - lr: 0.020000\n",
+      "2021-09-21 20:24:02,285 epoch 10 - iter 13/13 - loss 0.01797283 - samples/sec: 18.77 - lr: 0.020000\n",
+      "2021-09-21 20:24:02,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:02,286 EPOCH 10 done: loss 0.0180 - lr 0.0200000\n",
+      "2021-09-21 20:24:02,422 DEV : loss 0.35862505435943604 - score 0.0\n",
+      "2021-09-21 20:24:02,422 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:24:10,233 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:10,234 Testing using best model ...\n",
+      "2021-09-21 20:24:10,236 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:50:56,663 \t0.0\n",
-      "2021-09-08 11:50:56,664 \n",
+      "2021-09-21 20:24:15,217 \t0.5\n",
+      "2021-09-21 20:24:15,217 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.5\n",
+      "- F-score (macro) 0.0667\n",
+      "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
-      "   technology     0.0000    0.0000    0.0000         0\n",
+      "       travel     0.0000    0.0000    0.0000         0\n",
+      "     wellness     0.0000    0.0000    0.0000         0\n",
       "        women     0.0000    0.0000    0.0000         0\n",
       "      parents     0.0000    0.0000    0.0000         0\n",
       "     business     0.0000    0.0000    0.0000         0\n",
-      "     weddings     0.0000    0.0000    0.0000         0\n",
       "      fashion     0.0000    0.0000    0.0000         0\n",
       "entertainment     0.0000    0.0000    0.0000         0\n",
       "      science     0.0000    0.0000    0.0000         0\n",
-      "      divorce     0.0000    0.0000    0.0000         0\n",
       "        crime     0.0000    0.0000    0.0000         0\n",
+      "     religion     0.0000    0.0000    0.0000         0\n",
       "       sports     0.0000    0.0000    0.0000         0\n",
       "     politics     0.0000    0.0000    0.0000         0\n",
       "       comedy     0.0000    0.0000    0.0000         0\n",
-      "       travel     0.0000    0.0000    0.0000         1\n",
-      "     religion     0.0000    0.0000    0.0000         1\n",
+      "     weddings     1.0000    1.0000    1.0000         1\n",
+      "      divorce     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "    micro avg     0.0000    0.0000    0.0000         2\n",
-      "    macro avg     0.0000    0.0000    0.0000         2\n",
-      " weighted avg     0.0000    0.0000    0.0000         2\n",
-      "  samples avg     0.0000    0.0000    0.0000         2\n",
+      "    micro avg     0.5000    0.5000    0.5000         2\n",
+      "    macro avg     0.0667    0.0667    0.0667         2\n",
+      " weighted avg     0.5000    0.5000    0.5000         2\n",
+      "  samples avg     0.5000    0.5000    0.5000         2\n",
       "\n",
-      "2021-09-08 11:50:56,664 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:04,099 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:24:15,218 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:39,873 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:51:08,119 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:24:44,610 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 47339.77it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 46568.88it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:08,121 [b'travel', b'wellness', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'sports', b'politics', b'technology', b'comedy']\n",
-      "2021-09-08 11:51:08,132 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,134 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:24:44,612 [b'travel', b'women', b'parents', b'business', b'weddings', b'fashion', b'entertainment', b'science', b'crime', b'religion', b'sports', b'politics', b'comedy', b'technology', b'divorce']\n",
+      "2021-09-21 20:24:44,749 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:44,751 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2186,28 +2186,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:08,134 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,134 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:51:08,135 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,135 Parameters:\n",
-      "2021-09-08 11:51:08,135  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:51:08,136  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:51:08,136  - patience: \"3\"\n",
-      "2021-09-08 11:51:08,136  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:51:08,136  - max_epochs: \"10\"\n",
-      "2021-09-08 11:51:08,137  - shuffle: \"True\"\n",
-      "2021-09-08 11:51:08,137  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:51:08,137  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:51:08,138 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,138 Model training base path: \"None1\"\n",
-      "2021-09-08 11:51:08,138 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,138 Device: cuda:0\n",
-      "2021-09-08 11:51:08,139 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,139 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:51:08,146 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,201 epoch 1 - iter 1/13 - loss 0.24622571 - samples/sec: 25.77 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,247 epoch 1 - iter 2/13 - loss 0.65419710 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,293 epoch 1 - iter 3/13 - loss 0.51564372 - samples/sec: 22.12 - lr: 0.020000\n"
+      "2021-09-21 20:24:44,752 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:44,752 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:24:44,752 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:44,752 Parameters:\n",
+      "2021-09-21 20:24:44,753  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:24:44,753  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:24:44,753  - patience: \"3\"\n",
+      "2021-09-21 20:24:44,754  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:24:44,754  - max_epochs: \"10\"\n",
+      "2021-09-21 20:24:44,754  - shuffle: \"True\"\n",
+      "2021-09-21 20:24:44,754  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:24:44,755  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:24:44,755 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:44,757 Model training base path: \"None1\"\n",
+      "2021-09-21 20:24:44,757 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:44,757 Device: cuda:0\n",
+      "2021-09-21 20:24:44,758 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:44,758 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -2221,205 +2217,207 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:08,340 epoch 1 - iter 4/13 - loss 0.45981030 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,386 epoch 1 - iter 5/13 - loss 0.42360194 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,431 epoch 1 - iter 6/13 - loss 0.36593244 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,478 epoch 1 - iter 7/13 - loss 0.41323124 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,524 epoch 1 - iter 8/13 - loss 0.50025149 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,570 epoch 1 - iter 9/13 - loss 0.56337548 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,615 epoch 1 - iter 10/13 - loss 0.65205721 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,662 epoch 1 - iter 11/13 - loss 0.65652743 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,708 epoch 1 - iter 12/13 - loss 0.67731035 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,754 epoch 1 - iter 13/13 - loss 0.63351743 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:51:08,755 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:08,755 EPOCH 1 done: loss 0.6335 - lr 0.0200000\n",
-      "2021-09-08 11:51:08,790 DEV : loss 0.026060637086629868 - score 0.0\n",
-      "2021-09-08 11:51:08,790 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:24:44,943 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:45,033 epoch 1 - iter 1/13 - loss 0.41853771 - samples/sec: 22.45 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,092 epoch 1 - iter 2/13 - loss 0.88539289 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,148 epoch 1 - iter 3/13 - loss 0.80263959 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,215 epoch 1 - iter 4/13 - loss 0.74678663 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,275 epoch 1 - iter 5/13 - loss 0.67200556 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,336 epoch 1 - iter 6/13 - loss 0.65814703 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,401 epoch 1 - iter 7/13 - loss 0.67055956 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,468 epoch 1 - iter 8/13 - loss 0.60462100 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,527 epoch 1 - iter 9/13 - loss 0.55280315 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,592 epoch 1 - iter 10/13 - loss 0.51204094 - samples/sec: 15.62 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,656 epoch 1 - iter 11/13 - loss 0.46575385 - samples/sec: 15.72 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,708 epoch 1 - iter 12/13 - loss 0.42834022 - samples/sec: 19.54 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,774 epoch 1 - iter 13/13 - loss 0.43709241 - samples/sec: 15.29 - lr: 0.020000\n",
+      "2021-09-21 20:24:45,775 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:45,775 EPOCH 1 done: loss 0.4371 - lr 0.0200000\n",
+      "2021-09-21 20:24:45,904 DEV : loss 0.3192406892776489 - score 0.0\n",
+      "2021-09-21 20:24:45,905 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:51:13,016 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,081 epoch 2 - iter 1/13 - loss 0.08829820 - samples/sec: 21.08 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,128 epoch 2 - iter 2/13 - loss 0.35746971 - samples/sec: 21.59 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,174 epoch 2 - iter 3/13 - loss 0.29991160 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,217 epoch 2 - iter 4/13 - loss 0.22711753 - samples/sec: 23.64 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,264 epoch 2 - iter 5/13 - loss 0.39070241 - samples/sec: 21.47 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,310 epoch 2 - iter 6/13 - loss 0.36514462 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,352 epoch 2 - iter 7/13 - loss 0.31565495 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,399 epoch 2 - iter 8/13 - loss 0.28528588 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,447 epoch 2 - iter 9/13 - loss 0.38574402 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,494 epoch 2 - iter 10/13 - loss 0.39965842 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,541 epoch 2 - iter 11/13 - loss 0.41782412 - samples/sec: 21.55 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,589 epoch 2 - iter 12/13 - loss 0.44257948 - samples/sec: 20.81 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,637 epoch 2 - iter 13/13 - loss 0.51108133 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,638 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,638 EPOCH 2 done: loss 0.5111 - lr 0.0200000\n",
-      "2021-09-08 11:51:13,680 DEV : loss 0.034552209079265594 - score 0.0\n",
-      "2021-09-08 11:51:13,681 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:51:13,683 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:13,746 epoch 3 - iter 1/13 - loss 0.29541722 - samples/sec: 21.27 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,792 epoch 3 - iter 2/13 - loss 0.27255896 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,838 epoch 3 - iter 3/13 - loss 0.36412144 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,885 epoch 3 - iter 4/13 - loss 0.32112981 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,931 epoch 3 - iter 5/13 - loss 0.26402150 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:51:13,977 epoch 3 - iter 6/13 - loss 0.28406675 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,025 epoch 3 - iter 7/13 - loss 0.50541947 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,071 epoch 3 - iter 8/13 - loss 0.45271409 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,113 epoch 3 - iter 9/13 - loss 0.40756209 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,159 epoch 3 - iter 10/13 - loss 0.55079222 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,206 epoch 3 - iter 11/13 - loss 0.51545376 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,252 epoch 3 - iter 12/13 - loss 0.48047973 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,294 epoch 3 - iter 13/13 - loss 0.44531593 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,295 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:14,296 EPOCH 3 done: loss 0.4453 - lr 0.0200000\n",
-      "2021-09-08 11:51:14,327 DEV : loss 0.02949143573641777 - score 0.0\n",
-      "2021-09-08 11:51:14,328 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:51:14,330 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:14,387 epoch 4 - iter 1/13 - loss 0.00772441 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,432 epoch 4 - iter 2/13 - loss 0.17493374 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,478 epoch 4 - iter 3/13 - loss 0.52157148 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,521 epoch 4 - iter 4/13 - loss 0.39226473 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,563 epoch 4 - iter 5/13 - loss 0.31443654 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,605 epoch 4 - iter 6/13 - loss 0.27436688 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,651 epoch 4 - iter 7/13 - loss 0.27649144 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,696 epoch 4 - iter 8/13 - loss 0.27429097 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,739 epoch 4 - iter 9/13 - loss 0.24777394 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,785 epoch 4 - iter 10/13 - loss 0.23400548 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,827 epoch 4 - iter 11/13 - loss 0.21334039 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,869 epoch 4 - iter 12/13 - loss 0.19581490 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,916 epoch 4 - iter 13/13 - loss 0.19246364 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 11:51:14,917 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:14,917 EPOCH 4 done: loss 0.1925 - lr 0.0200000\n",
-      "2021-09-08 11:51:14,949 DEV : loss 0.29122060537338257 - score 0.0\n",
-      "2021-09-08 11:51:14,950 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:51:14,953 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:15,014 epoch 5 - iter 1/13 - loss 0.03939693 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,064 epoch 5 - iter 2/13 - loss 0.05577043 - samples/sec: 20.46 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,109 epoch 5 - iter 3/13 - loss 0.03750290 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,158 epoch 5 - iter 4/13 - loss 0.18066385 - samples/sec: 20.76 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,206 epoch 5 - iter 5/13 - loss 0.14623194 - samples/sec: 21.25 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,249 epoch 5 - iter 6/13 - loss 0.12272327 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,293 epoch 5 - iter 7/13 - loss 0.10653084 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,339 epoch 5 - iter 8/13 - loss 0.09362875 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,386 epoch 5 - iter 9/13 - loss 0.08365486 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,431 epoch 5 - iter 10/13 - loss 0.07553111 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,475 epoch 5 - iter 11/13 - loss 0.06876824 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,521 epoch 5 - iter 12/13 - loss 0.17416687 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,564 epoch 5 - iter 13/13 - loss 0.16116480 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:51:15,565 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:15,565 EPOCH 5 done: loss 0.1612 - lr 0.0200000\n",
-      "2021-09-08 11:51:15,686 DEV : loss 0.04905056953430176 - score 0.0\n",
-      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:51:15,687 BAD EPOCHS (no improvement): 4\n"
+      "2021-09-21 20:24:54,305 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:54,369 epoch 2 - iter 1/13 - loss 0.60141152 - samples/sec: 21.00 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,413 epoch 2 - iter 2/13 - loss 0.31684632 - samples/sec: 23.29 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,457 epoch 2 - iter 3/13 - loss 0.23050115 - samples/sec: 22.90 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,500 epoch 2 - iter 4/13 - loss 0.18128956 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,547 epoch 2 - iter 5/13 - loss 0.15703425 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,594 epoch 2 - iter 6/13 - loss 0.16000447 - samples/sec: 21.43 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,637 epoch 2 - iter 7/13 - loss 0.14116572 - samples/sec: 23.29 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,683 epoch 2 - iter 8/13 - loss 0.21513974 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,730 epoch 2 - iter 9/13 - loss 0.20127625 - samples/sec: 21.48 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,774 epoch 2 - iter 10/13 - loss 0.18819172 - samples/sec: 23.32 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,820 epoch 2 - iter 11/13 - loss 0.25749902 - samples/sec: 21.97 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,866 epoch 2 - iter 12/13 - loss 0.23616938 - samples/sec: 21.92 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,910 epoch 2 - iter 13/13 - loss 0.21862862 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 20:24:54,911 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:54,911 EPOCH 2 done: loss 0.2186 - lr 0.0200000\n",
+      "2021-09-21 20:24:55,617 DEV : loss 0.7358100414276123 - score 0.0\n",
+      "2021-09-21 20:24:55,618 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:24:55,621 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:55,701 epoch 3 - iter 1/13 - loss 0.01108044 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 20:24:55,771 epoch 3 - iter 2/13 - loss 0.00638092 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 20:24:55,859 epoch 3 - iter 3/13 - loss 0.05563814 - samples/sec: 11.35 - lr: 0.020000\n",
+      "2021-09-21 20:24:55,928 epoch 3 - iter 4/13 - loss 0.26834229 - samples/sec: 14.75 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,005 epoch 3 - iter 5/13 - loss 0.21538447 - samples/sec: 13.02 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,089 epoch 3 - iter 6/13 - loss 0.17979402 - samples/sec: 12.02 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,174 epoch 3 - iter 7/13 - loss 0.15767758 - samples/sec: 11.81 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,253 epoch 3 - iter 8/13 - loss 0.19672655 - samples/sec: 12.70 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,301 epoch 3 - iter 9/13 - loss 0.17510090 - samples/sec: 21.06 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,355 epoch 3 - iter 10/13 - loss 0.15782602 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,410 epoch 3 - iter 11/13 - loss 0.20938417 - samples/sec: 18.33 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,470 epoch 3 - iter 12/13 - loss 0.19213264 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,529 epoch 3 - iter 13/13 - loss 0.18513035 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,530 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:56,531 EPOCH 3 done: loss 0.1851 - lr 0.0200000\n",
+      "2021-09-21 20:24:56,573 DEV : loss 0.7837637662887573 - score 0.0\n",
+      "2021-09-21 20:24:56,575 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:24:56,577 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:56,641 epoch 4 - iter 1/13 - loss 0.00350945 - samples/sec: 21.12 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,692 epoch 4 - iter 2/13 - loss 0.10250102 - samples/sec: 19.76 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,738 epoch 4 - iter 3/13 - loss 0.06871190 - samples/sec: 22.10 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,784 epoch 4 - iter 4/13 - loss 0.05188922 - samples/sec: 22.30 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,831 epoch 4 - iter 5/13 - loss 0.04194119 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,882 epoch 4 - iter 6/13 - loss 0.09270019 - samples/sec: 19.84 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,928 epoch 4 - iter 7/13 - loss 0.07964746 - samples/sec: 22.09 - lr: 0.020000\n",
+      "2021-09-21 20:24:56,978 epoch 4 - iter 8/13 - loss 0.08750855 - samples/sec: 20.18 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,027 epoch 4 - iter 9/13 - loss 0.22884017 - samples/sec: 20.62 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,072 epoch 4 - iter 10/13 - loss 0.20779677 - samples/sec: 22.35 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,118 epoch 4 - iter 11/13 - loss 0.18936545 - samples/sec: 22.24 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,166 epoch 4 - iter 12/13 - loss 0.17385967 - samples/sec: 21.02 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,215 epoch 4 - iter 13/13 - loss 0.27722790 - samples/sec: 20.68 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,216 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:57,216 EPOCH 4 done: loss 0.2772 - lr 0.0200000\n",
+      "2021-09-21 20:24:57,379 DEV : loss 0.40249204635620117 - score 0.0\n",
+      "2021-09-21 20:24:57,380 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:24:57,475 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:57,539 epoch 5 - iter 1/13 - loss 1.63213873 - samples/sec: 20.79 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,587 epoch 5 - iter 2/13 - loss 0.86645054 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,633 epoch 5 - iter 3/13 - loss 0.58505592 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,678 epoch 5 - iter 4/13 - loss 0.43905401 - samples/sec: 22.68 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,725 epoch 5 - iter 5/13 - loss 0.41002012 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,769 epoch 5 - iter 6/13 - loss 0.34207688 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,814 epoch 5 - iter 7/13 - loss 0.29343168 - samples/sec: 22.43 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,861 epoch 5 - iter 8/13 - loss 0.26387104 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,905 epoch 5 - iter 9/13 - loss 0.23463535 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,949 epoch 5 - iter 10/13 - loss 0.21145360 - samples/sec: 22.72 - lr: 0.020000\n",
+      "2021-09-21 20:24:57,998 epoch 5 - iter 11/13 - loss 0.26154912 - samples/sec: 20.91 - lr: 0.020000\n",
+      "2021-09-21 20:24:58,041 epoch 5 - iter 12/13 - loss 0.23983778 - samples/sec: 23.24 - lr: 0.020000\n",
+      "2021-09-21 20:24:58,088 epoch 5 - iter 13/13 - loss 0.22808239 - samples/sec: 21.80 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:15,758 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:15,816 epoch 6 - iter 1/13 - loss 0.00067644 - samples/sec: 23.48 - lr: 0.010000\n",
-      "2021-09-08 11:51:15,858 epoch 6 - iter 2/13 - loss 0.00365182 - samples/sec: 23.97 - lr: 0.010000\n",
-      "2021-09-08 11:51:15,904 epoch 6 - iter 3/13 - loss 0.51434169 - samples/sec: 22.13 - lr: 0.010000\n",
-      "2021-09-08 11:51:15,947 epoch 6 - iter 4/13 - loss 0.38989347 - samples/sec: 23.31 - lr: 0.010000\n",
-      "2021-09-08 11:51:15,993 epoch 6 - iter 5/13 - loss 0.37674383 - samples/sec: 22.09 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,035 epoch 6 - iter 6/13 - loss 0.31442284 - samples/sec: 23.94 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,078 epoch 6 - iter 7/13 - loss 0.26973767 - samples/sec: 23.49 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,124 epoch 6 - iter 8/13 - loss 0.24543761 - samples/sec: 22.09 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,167 epoch 6 - iter 9/13 - loss 0.21874877 - samples/sec: 23.65 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,213 epoch 6 - iter 10/13 - loss 0.35401329 - samples/sec: 21.73 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,260 epoch 6 - iter 11/13 - loss 0.34859183 - samples/sec: 21.52 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,302 epoch 6 - iter 12/13 - loss 0.32046928 - samples/sec: 23.84 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,349 epoch 6 - iter 13/13 - loss 0.29884877 - samples/sec: 21.43 - lr: 0.010000\n",
-      "2021-09-08 11:51:16,350 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:16,351 EPOCH 6 done: loss 0.2988 - lr 0.0100000\n",
-      "2021-09-08 11:51:16,504 DEV : loss 0.005997078493237495 - score 0.0\n",
-      "2021-09-08 11:51:16,505 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:51:26,840 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:26,900 epoch 7 - iter 1/13 - loss 0.02398179 - samples/sec: 22.91 - lr: 0.010000\n",
-      "2021-09-08 11:51:26,947 epoch 7 - iter 2/13 - loss 0.01972530 - samples/sec: 21.70 - lr: 0.010000\n",
-      "2021-09-08 11:51:26,996 epoch 7 - iter 3/13 - loss 0.17267672 - samples/sec: 20.48 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,042 epoch 7 - iter 4/13 - loss 0.13074963 - samples/sec: 22.18 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,091 epoch 7 - iter 5/13 - loss 0.20800527 - samples/sec: 20.76 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,138 epoch 7 - iter 6/13 - loss 0.17373707 - samples/sec: 21.40 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,188 epoch 7 - iter 7/13 - loss 0.15980030 - samples/sec: 20.32 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,237 epoch 7 - iter 8/13 - loss 0.14280370 - samples/sec: 20.80 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,284 epoch 7 - iter 9/13 - loss 0.12704866 - samples/sec: 21.42 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,333 epoch 7 - iter 10/13 - loss 0.11839671 - samples/sec: 20.87 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,376 epoch 7 - iter 11/13 - loss 0.10802231 - samples/sec: 23.44 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,420 epoch 7 - iter 12/13 - loss 0.10016861 - samples/sec: 22.78 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,469 epoch 7 - iter 13/13 - loss 0.09494637 - samples/sec: 21.00 - lr: 0.010000\n",
-      "2021-09-08 11:51:27,470 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:27,470 EPOCH 7 done: loss 0.0949 - lr 0.0100000\n",
-      "2021-09-08 11:51:27,610 DEV : loss 0.0006130224792286754 - score 0.0\n",
-      "2021-09-08 11:51:27,611 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:51:35,702 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:35,761 epoch 8 - iter 1/13 - loss 0.00063154 - samples/sec: 23.08 - lr: 0.010000\n",
-      "2021-09-08 11:51:35,808 epoch 8 - iter 2/13 - loss 0.05978780 - samples/sec: 21.83 - lr: 0.010000\n",
-      "2021-09-08 11:51:35,850 epoch 8 - iter 3/13 - loss 0.04021792 - samples/sec: 23.86 - lr: 0.010000\n",
-      "2021-09-08 11:51:35,895 epoch 8 - iter 4/13 - loss 0.11336938 - samples/sec: 22.27 - lr: 0.010000\n",
-      "2021-09-08 11:51:35,938 epoch 8 - iter 5/13 - loss 0.09131737 - samples/sec: 23.59 - lr: 0.010000\n",
-      "2021-09-08 11:51:35,980 epoch 8 - iter 6/13 - loss 0.07620438 - samples/sec: 23.80 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,023 epoch 8 - iter 7/13 - loss 0.06549020 - samples/sec: 23.94 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,066 epoch 8 - iter 8/13 - loss 0.05752821 - samples/sec: 23.48 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,108 epoch 8 - iter 9/13 - loss 0.05142769 - samples/sec: 24.02 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,153 epoch 8 - iter 10/13 - loss 0.05216037 - samples/sec: 22.27 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,196 epoch 8 - iter 11/13 - loss 0.04748357 - samples/sec: 23.45 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,241 epoch 8 - iter 12/13 - loss 0.06833025 - samples/sec: 22.19 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,283 epoch 8 - iter 13/13 - loss 0.06341387 - samples/sec: 24.00 - lr: 0.010000\n",
-      "2021-09-08 11:51:36,284 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:36,285 EPOCH 8 done: loss 0.0634 - lr 0.0100000\n",
-      "2021-09-08 11:51:36,511 DEV : loss 0.000397708237869665 - score 0.0\n",
-      "2021-09-08 11:51:36,512 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:24:58,089 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:58,089 EPOCH 5 done: loss 0.2281 - lr 0.0200000\n",
+      "2021-09-21 20:24:58,257 DEV : loss 0.80296391248703 - score 0.0\n",
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:24:58,258 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:24:58,367 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:58,430 epoch 6 - iter 1/13 - loss 0.23422326 - samples/sec: 21.19 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,474 epoch 6 - iter 2/13 - loss 0.12191033 - samples/sec: 23.18 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,521 epoch 6 - iter 3/13 - loss 0.10473168 - samples/sec: 21.55 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,569 epoch 6 - iter 4/13 - loss 0.08471295 - samples/sec: 21.21 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,612 epoch 6 - iter 5/13 - loss 0.06799175 - samples/sec: 23.18 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,656 epoch 6 - iter 6/13 - loss 0.05676652 - samples/sec: 23.31 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,700 epoch 6 - iter 7/13 - loss 0.04877701 - samples/sec: 22.60 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,744 epoch 6 - iter 8/13 - loss 0.04279705 - samples/sec: 23.19 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,790 epoch 6 - iter 9/13 - loss 0.25024198 - samples/sec: 21.65 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,838 epoch 6 - iter 10/13 - loss 0.28371273 - samples/sec: 21.37 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,881 epoch 6 - iter 11/13 - loss 0.25917431 - samples/sec: 23.27 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,925 epoch 6 - iter 12/13 - loss 0.23782845 - samples/sec: 23.30 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,971 epoch 6 - iter 13/13 - loss 0.22153162 - samples/sec: 21.72 - lr: 0.010000\n",
+      "2021-09-21 20:24:58,972 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:58,972 EPOCH 6 done: loss 0.2215 - lr 0.0100000\n",
+      "2021-09-21 20:24:59,129 DEV : loss 0.6418973803520203 - score 0.0\n",
+      "2021-09-21 20:24:59,130 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:24:59,226 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:59,286 epoch 7 - iter 1/13 - loss 0.00064136 - samples/sec: 22.99 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,329 epoch 7 - iter 2/13 - loss 0.00164108 - samples/sec: 23.29 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,373 epoch 7 - iter 3/13 - loss 0.00125176 - samples/sec: 23.01 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,417 epoch 7 - iter 4/13 - loss 0.00135664 - samples/sec: 23.04 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,463 epoch 7 - iter 5/13 - loss 0.01333282 - samples/sec: 21.77 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,509 epoch 7 - iter 6/13 - loss 0.07296766 - samples/sec: 21.73 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,557 epoch 7 - iter 7/13 - loss 0.08487802 - samples/sec: 21.22 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,600 epoch 7 - iter 8/13 - loss 0.07467456 - samples/sec: 23.47 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,643 epoch 7 - iter 9/13 - loss 0.06652303 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,687 epoch 7 - iter 10/13 - loss 0.05995813 - samples/sec: 22.84 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,731 epoch 7 - iter 11/13 - loss 0.05482538 - samples/sec: 23.20 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,774 epoch 7 - iter 12/13 - loss 0.05159912 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,820 epoch 7 - iter 13/13 - loss 0.04778496 - samples/sec: 21.97 - lr: 0.010000\n",
+      "2021-09-21 20:24:59,821 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:24:59,821 EPOCH 7 done: loss 0.0478 - lr 0.0100000\n",
+      "2021-09-21 20:24:59,990 DEV : loss 0.5722447633743286 - score 0.0\n",
+      "2021-09-21 20:24:59,990 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:25:00,099 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:00,159 epoch 8 - iter 1/13 - loss 0.00161470 - samples/sec: 22.92 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,204 epoch 8 - iter 2/13 - loss 0.00237514 - samples/sec: 22.28 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,248 epoch 8 - iter 3/13 - loss 0.00188903 - samples/sec: 23.13 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,291 epoch 8 - iter 4/13 - loss 0.00338450 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,335 epoch 8 - iter 5/13 - loss 0.00279336 - samples/sec: 22.95 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,379 epoch 8 - iter 6/13 - loss 0.00253575 - samples/sec: 23.18 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,422 epoch 8 - iter 7/13 - loss 0.00221318 - samples/sec: 23.37 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,465 epoch 8 - iter 8/13 - loss 0.00296682 - samples/sec: 23.44 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,509 epoch 8 - iter 9/13 - loss 0.00277568 - samples/sec: 22.84 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,553 epoch 8 - iter 10/13 - loss 0.00263970 - samples/sec: 23.20 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,596 epoch 8 - iter 11/13 - loss 0.00248223 - samples/sec: 23.32 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,643 epoch 8 - iter 12/13 - loss 0.03923632 - samples/sec: 21.42 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,687 epoch 8 - iter 13/13 - loss 0.03630382 - samples/sec: 23.14 - lr: 0.010000\n",
+      "2021-09-21 20:25:00,688 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:00,688 EPOCH 8 done: loss 0.0363 - lr 0.0100000\n",
+      "2021-09-21 20:25:00,847 DEV : loss 0.4527360796928406 - score 0.0\n",
+      "2021-09-21 20:25:00,848 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:25:00,957 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:01,017 epoch 9 - iter 1/13 - loss 0.00255283 - samples/sec: 22.94 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,060 epoch 9 - iter 2/13 - loss 0.00194611 - samples/sec: 23.15 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,103 epoch 9 - iter 3/13 - loss 0.00153672 - samples/sec: 23.51 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,148 epoch 9 - iter 4/13 - loss 0.00132667 - samples/sec: 22.44 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,196 epoch 9 - iter 5/13 - loss 0.01256907 - samples/sec: 21.32 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,239 epoch 9 - iter 6/13 - loss 0.01096094 - samples/sec: 23.30 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,283 epoch 9 - iter 7/13 - loss 0.01095036 - samples/sec: 22.82 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,327 epoch 9 - iter 8/13 - loss 0.00982602 - samples/sec: 23.15 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,373 epoch 9 - iter 9/13 - loss 0.14050432 - samples/sec: 21.70 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,421 epoch 9 - iter 10/13 - loss 0.14095934 - samples/sec: 21.44 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,464 epoch 9 - iter 11/13 - loss 0.12884678 - samples/sec: 23.12 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,508 epoch 9 - iter 12/13 - loss 0.11812479 - samples/sec: 23.34 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,552 epoch 9 - iter 13/13 - loss 0.10906089 - samples/sec: 22.93 - lr: 0.010000\n",
+      "2021-09-21 20:25:01,553 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:01,553 EPOCH 9 done: loss 0.1091 - lr 0.0100000\n",
+      "2021-09-21 20:25:01,718 DEV : loss 0.2768048942089081 - score 0.0\n",
+      "2021-09-21 20:25:01,719 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:51:43,581 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:43,641 epoch 9 - iter 1/13 - loss 0.00317505 - samples/sec: 22.90 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,684 epoch 9 - iter 2/13 - loss 0.00223372 - samples/sec: 23.79 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,727 epoch 9 - iter 3/13 - loss 0.00343627 - samples/sec: 23.41 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,769 epoch 9 - iter 4/13 - loss 0.00293129 - samples/sec: 23.93 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,811 epoch 9 - iter 5/13 - loss 0.00251123 - samples/sec: 23.81 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,854 epoch 9 - iter 6/13 - loss 0.00250407 - samples/sec: 23.43 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,897 epoch 9 - iter 7/13 - loss 0.00221675 - samples/sec: 23.45 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,940 epoch 9 - iter 8/13 - loss 0.00265448 - samples/sec: 23.70 - lr: 0.010000\n",
-      "2021-09-08 11:51:43,984 epoch 9 - iter 9/13 - loss 0.00240762 - samples/sec: 23.23 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,026 epoch 9 - iter 10/13 - loss 0.00248108 - samples/sec: 23.74 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,068 epoch 9 - iter 11/13 - loss 0.00283676 - samples/sec: 23.97 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,112 epoch 9 - iter 12/13 - loss 0.00269525 - samples/sec: 22.97 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,156 epoch 9 - iter 13/13 - loss 0.00278942 - samples/sec: 22.94 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:44,157 EPOCH 9 done: loss 0.0028 - lr 0.0100000\n",
-      "2021-09-08 11:51:44,187 DEV : loss 0.00044210365740582347 - score 0.0\n",
-      "2021-09-08 11:51:44,187 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:51:44,189 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:44,246 epoch 10 - iter 1/13 - loss 0.00171659 - samples/sec: 23.65 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,289 epoch 10 - iter 2/13 - loss 0.00181829 - samples/sec: 23.75 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,331 epoch 10 - iter 3/13 - loss 0.00142062 - samples/sec: 23.88 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,374 epoch 10 - iter 4/13 - loss 0.00129079 - samples/sec: 23.63 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,416 epoch 10 - iter 5/13 - loss 0.00118305 - samples/sec: 23.99 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,458 epoch 10 - iter 6/13 - loss 0.00205927 - samples/sec: 23.85 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,504 epoch 10 - iter 7/13 - loss 0.16243978 - samples/sec: 21.90 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,546 epoch 10 - iter 8/13 - loss 0.14226363 - samples/sec: 23.95 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,589 epoch 10 - iter 9/13 - loss 0.12677424 - samples/sec: 23.96 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,631 epoch 10 - iter 10/13 - loss 0.11437730 - samples/sec: 23.99 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,674 epoch 10 - iter 11/13 - loss 0.10402520 - samples/sec: 23.42 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,716 epoch 10 - iter 12/13 - loss 0.09540042 - samples/sec: 23.90 - lr: 0.010000\n"
+      "2021-09-21 20:25:08,191 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:08,253 epoch 10 - iter 1/13 - loss 0.00035337 - samples/sec: 22.37 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,298 epoch 10 - iter 2/13 - loss 0.00047199 - samples/sec: 22.85 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,345 epoch 10 - iter 3/13 - loss 0.03868553 - samples/sec: 21.50 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,388 epoch 10 - iter 4/13 - loss 0.02920964 - samples/sec: 23.28 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,432 epoch 10 - iter 5/13 - loss 0.02379853 - samples/sec: 22.90 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,476 epoch 10 - iter 6/13 - loss 0.02065423 - samples/sec: 23.20 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,522 epoch 10 - iter 7/13 - loss 0.01969274 - samples/sec: 21.73 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,566 epoch 10 - iter 8/13 - loss 0.01765686 - samples/sec: 22.98 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,609 epoch 10 - iter 9/13 - loss 0.01580502 - samples/sec: 23.21 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:51:44,758 epoch 10 - iter 13/13 - loss 0.08866512 - samples/sec: 24.01 - lr: 0.010000\n",
-      "2021-09-08 11:51:44,759 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:44,759 EPOCH 10 done: loss 0.0887 - lr 0.0100000\n",
-      "2021-09-08 11:51:44,789 DEV : loss 0.0005942346178926528 - score 0.0\n",
-      "2021-09-08 11:51:44,790 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:51:48,876 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:51:48,877 Testing using best model ...\n",
-      "2021-09-08 11:51:48,902 loading file None1/best-model.pt\n",
+      "2021-09-21 20:25:08,653 epoch 10 - iter 10/13 - loss 0.01437756 - samples/sec: 23.39 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,698 epoch 10 - iter 11/13 - loss 0.01316929 - samples/sec: 22.05 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,742 epoch 10 - iter 12/13 - loss 0.01228913 - samples/sec: 23.17 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,785 epoch 10 - iter 13/13 - loss 0.01153505 - samples/sec: 23.41 - lr: 0.010000\n",
+      "2021-09-21 20:25:08,786 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:08,787 EPOCH 10 done: loss 0.0115 - lr 0.0100000\n",
+      "2021-09-21 20:25:08,924 DEV : loss 0.34200528264045715 - score 0.0\n",
+      "2021-09-21 20:25:08,924 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:25:17,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:17,808 Testing using best model ...\n",
+      "2021-09-21 20:25:17,810 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:51:56,332 \t0.0\n",
-      "2021-09-08 11:51:56,333 \n",
+      "2021-09-21 20:25:23,163 \t0.0\n",
+      "2021-09-21 20:25:23,163 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -2429,7 +2427,6 @@
       "               precision    recall  f1-score   support\n",
       "\n",
       "       travel     0.0000    0.0000    0.0000         0\n",
-      "     wellness     0.0000    0.0000    0.0000         0\n",
       "        women     0.0000    0.0000    0.0000         0\n",
       "      parents     0.0000    0.0000    0.0000         0\n",
       "     business     0.0000    0.0000    0.0000         0\n",
@@ -2437,38 +2434,39 @@
       "      fashion     0.0000    0.0000    0.0000         0\n",
       "entertainment     0.0000    0.0000    0.0000         0\n",
       "      science     0.0000    0.0000    0.0000         0\n",
-      "      divorce     0.0000    0.0000    0.0000         0\n",
       "        crime     0.0000    0.0000    0.0000         0\n",
+      "     religion     0.0000    0.0000    0.0000         0\n",
       "       sports     0.0000    0.0000    0.0000         0\n",
       "     politics     0.0000    0.0000    0.0000         0\n",
+      "       comedy     0.0000    0.0000    0.0000         0\n",
       "   technology     0.0000    0.0000    0.0000         1\n",
-      "       comedy     0.0000    0.0000    0.0000         1\n",
+      "      divorce     0.0000    0.0000    0.0000         1\n",
       "\n",
       "    micro avg     0.0000    0.0000    0.0000         2\n",
       "    macro avg     0.0000    0.0000    0.0000         2\n",
       " weighted avg     0.0000    0.0000    0.0000         2\n",
       "  samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 11:51:56,333 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:03,711 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:25:23,164 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:32,444 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:52:07,606 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:25:37,121 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 46226.72it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 45491.37it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:07,608 [b'travel', b'technology', b'wellness', b'women', b'business', b'fashion', b'entertainment', b'science', b'divorce', b'crime', b'sports', b'politics', b'comedy', b'parents', b'religion']\n",
-      "2021-09-08 11:52:07,617 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:07,618 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:25:37,123 [b'travel', b'wellness', b'women', b'parents', b'weddings', b'fashion', b'entertainment', b'science', b'crime', b'religion', b'sports', b'politics', b'comedy', b'technology', b'divorce']\n",
+      "2021-09-21 20:25:37,254 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:37,256 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -2781,28 +2779,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:07,619 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:07,619 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:52:07,619 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:07,620 Parameters:\n",
-      "2021-09-08 11:52:07,620  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:52:07,620  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:52:07,621  - patience: \"3\"\n",
-      "2021-09-08 11:52:07,621  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:52:07,621  - max_epochs: \"10\"\n",
-      "2021-09-08 11:52:07,621  - shuffle: \"True\"\n",
-      "2021-09-08 11:52:07,622  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:52:07,622  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:52:07,622 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:07,623 Model training base path: \"None1\"\n",
-      "2021-09-08 11:52:07,623 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:07,623 Device: cuda:0\n",
-      "2021-09-08 11:52:07,623 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:07,624 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:52:07,630 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:07,684 epoch 1 - iter 1/13 - loss 0.44438195 - samples/sec: 26.00 - lr: 0.020000\n",
-      "2021-09-08 11:52:07,730 epoch 1 - iter 2/13 - loss 1.02117246 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 11:52:07,776 epoch 1 - iter 3/13 - loss 1.15988664 - samples/sec: 22.15 - lr: 0.020000\n"
+      "2021-09-21 20:25:37,257 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:37,257 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:25:37,257 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:37,257 Parameters:\n",
+      "2021-09-21 20:25:37,258  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:25:37,258  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:25:37,258  - patience: \"3\"\n",
+      "2021-09-21 20:25:37,259  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:25:37,259  - max_epochs: \"10\"\n",
+      "2021-09-21 20:25:37,259  - shuffle: \"True\"\n",
+      "2021-09-21 20:25:37,259  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:25:37,260  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:25:37,260 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:37,260 Model training base path: \"None1\"\n",
+      "2021-09-21 20:25:37,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:37,261 Device: cuda:0\n",
+      "2021-09-21 20:25:37,261 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:37,261 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -2816,235 +2810,243 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:07,822 epoch 1 - iter 4/13 - loss 0.97639178 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:52:07,868 epoch 1 - iter 5/13 - loss 1.13580949 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:52:07,914 epoch 1 - iter 6/13 - loss 0.96207412 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:52:07,957 epoch 1 - iter 7/13 - loss 0.82907485 - samples/sec: 23.52 - lr: 0.020000\n",
-      "2021-09-08 11:52:08,003 epoch 1 - iter 8/13 - loss 0.74896852 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:52:08,048 epoch 1 - iter 9/13 - loss 0.69474323 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:52:08,095 epoch 1 - iter 10/13 - loss 0.63127726 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:52:08,141 epoch 1 - iter 11/13 - loss 0.65369934 - samples/sec: 21.61 - lr: 0.020000\n",
-      "2021-09-08 11:52:08,187 epoch 1 - iter 12/13 - loss 0.60708700 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:52:08,233 epoch 1 - iter 13/13 - loss 0.64323529 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 11:52:08,234 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:08,234 EPOCH 1 done: loss 0.6432 - lr 0.0200000\n",
-      "2021-09-08 11:52:08,263 DEV : loss 0.8394702672958374 - score 0.0\n",
-      "2021-09-08 11:52:08,264 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:25:37,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:37,521 epoch 1 - iter 1/13 - loss 0.18483923 - samples/sec: 23.19 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,583 epoch 1 - iter 2/13 - loss 0.39004766 - samples/sec: 16.03 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,644 epoch 1 - iter 3/13 - loss 0.37674665 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,692 epoch 1 - iter 4/13 - loss 0.41756679 - samples/sec: 21.05 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,739 epoch 1 - iter 5/13 - loss 0.50100681 - samples/sec: 21.22 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,789 epoch 1 - iter 6/13 - loss 0.56708234 - samples/sec: 20.36 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,838 epoch 1 - iter 7/13 - loss 0.59881696 - samples/sec: 20.65 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,886 epoch 1 - iter 8/13 - loss 0.60046453 - samples/sec: 21.05 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,937 epoch 1 - iter 9/13 - loss 0.59864276 - samples/sec: 19.63 - lr: 0.020000\n",
+      "2021-09-21 20:25:37,996 epoch 1 - iter 10/13 - loss 0.61221194 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 20:25:38,064 epoch 1 - iter 11/13 - loss 0.56000698 - samples/sec: 14.91 - lr: 0.020000\n",
+      "2021-09-21 20:25:38,131 epoch 1 - iter 12/13 - loss 0.54334801 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 20:25:38,195 epoch 1 - iter 13/13 - loss 0.55931368 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 20:25:38,196 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:38,196 EPOCH 1 done: loss 0.5593 - lr 0.0200000\n",
+      "2021-09-21 20:25:38,346 DEV : loss 0.18742308020591736 - score 0.0\n",
+      "2021-09-21 20:25:38,347 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:52:11,898 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:11,959 epoch 2 - iter 1/13 - loss 0.00646696 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,005 epoch 2 - iter 2/13 - loss 0.40831547 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,051 epoch 2 - iter 3/13 - loss 0.55947766 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,093 epoch 2 - iter 4/13 - loss 0.42220788 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,139 epoch 2 - iter 5/13 - loss 0.40416469 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,184 epoch 2 - iter 6/13 - loss 0.35287235 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,227 epoch 2 - iter 7/13 - loss 0.30305859 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,271 epoch 2 - iter 8/13 - loss 0.26738063 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,317 epoch 2 - iter 9/13 - loss 0.41854155 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,362 epoch 2 - iter 10/13 - loss 0.39004682 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,405 epoch 2 - iter 11/13 - loss 0.35533062 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,450 epoch 2 - iter 12/13 - loss 0.41172185 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,496 epoch 2 - iter 13/13 - loss 0.38321844 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:52:12,497 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:12,497 EPOCH 2 done: loss 0.3832 - lr 0.0200000\n",
-      "2021-09-08 11:52:12,527 DEV : loss 0.09868704527616501 - score 0.0\n",
-      "2021-09-08 11:52:12,527 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:25:43,724 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:43,803 epoch 2 - iter 1/13 - loss 0.79441148 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 20:25:43,858 epoch 2 - iter 2/13 - loss 0.43225258 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 20:25:43,931 epoch 2 - iter 3/13 - loss 0.44601765 - samples/sec: 13.80 - lr: 0.020000\n",
+      "2021-09-21 20:25:43,987 epoch 2 - iter 4/13 - loss 0.33999186 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,047 epoch 2 - iter 5/13 - loss 0.49764406 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,111 epoch 2 - iter 6/13 - loss 0.51953808 - samples/sec: 15.75 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,177 epoch 2 - iter 7/13 - loss 0.50751505 - samples/sec: 15.36 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,241 epoch 2 - iter 8/13 - loss 0.46174814 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,310 epoch 2 - iter 9/13 - loss 0.41354831 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,369 epoch 2 - iter 10/13 - loss 0.41768241 - samples/sec: 17.18 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,445 epoch 2 - iter 11/13 - loss 0.38930589 - samples/sec: 13.29 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,501 epoch 2 - iter 12/13 - loss 0.39465855 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,563 epoch 2 - iter 13/13 - loss 0.36893307 - samples/sec: 16.30 - lr: 0.020000\n",
+      "2021-09-21 20:25:44,565 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:44,565 EPOCH 2 done: loss 0.3689 - lr 0.0200000\n",
+      "2021-09-21 20:25:44,755 DEV : loss 0.05950547382235527 - score 0.0\n",
+      "2021-09-21 20:25:44,757 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:52:19,173 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:19,233 epoch 3 - iter 1/13 - loss 0.02564982 - samples/sec: 22.85 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,281 epoch 3 - iter 2/13 - loss 0.32494174 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,327 epoch 3 - iter 3/13 - loss 0.37564951 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,372 epoch 3 - iter 4/13 - loss 0.30539979 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,418 epoch 3 - iter 5/13 - loss 0.25200125 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,460 epoch 3 - iter 6/13 - loss 0.21318454 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,503 epoch 3 - iter 7/13 - loss 0.18332573 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,546 epoch 3 - iter 8/13 - loss 0.16061835 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,591 epoch 3 - iter 9/13 - loss 0.17885204 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,637 epoch 3 - iter 10/13 - loss 0.18439207 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,683 epoch 3 - iter 11/13 - loss 0.21591466 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,726 epoch 3 - iter 12/13 - loss 0.19865781 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,768 epoch 3 - iter 13/13 - loss 0.18426357 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 11:52:19,769 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:19,769 EPOCH 3 done: loss 0.1843 - lr 0.0200000\n",
-      "2021-09-08 11:52:20,452 DEV : loss 0.43401774764060974 - score 0.0\n",
-      "2021-09-08 11:52:20,453 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:52:20,459 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:20,520 epoch 4 - iter 1/13 - loss 1.44817448 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,564 epoch 4 - iter 2/13 - loss 0.72727733 - samples/sec: 22.79 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,610 epoch 4 - iter 3/13 - loss 0.74244794 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,656 epoch 4 - iter 4/13 - loss 0.57946194 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,699 epoch 4 - iter 5/13 - loss 0.46742573 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,745 epoch 4 - iter 6/13 - loss 0.44429004 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,787 epoch 4 - iter 7/13 - loss 0.38135026 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,833 epoch 4 - iter 8/13 - loss 0.33675622 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,875 epoch 4 - iter 9/13 - loss 0.30366731 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,918 epoch 4 - iter 10/13 - loss 0.27339217 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:52:20,961 epoch 4 - iter 11/13 - loss 0.24892626 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 11:52:21,003 epoch 4 - iter 12/13 - loss 0.22855825 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 11:52:21,049 epoch 4 - iter 13/13 - loss 0.31410535 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:52:21,050 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:21,050 EPOCH 4 done: loss 0.3141 - lr 0.0200000\n",
-      "2021-09-08 11:52:24,309 DEV : loss 0.2510029971599579 - score 0.0\n",
-      "2021-09-08 11:52:24,310 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:52:24,324 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:24,386 epoch 5 - iter 1/13 - loss 0.29228863 - samples/sec: 21.49 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,429 epoch 5 - iter 2/13 - loss 0.14703173 - samples/sec: 23.38 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,475 epoch 5 - iter 3/13 - loss 0.11618482 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,521 epoch 5 - iter 4/13 - loss 0.09246627 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,563 epoch 5 - iter 5/13 - loss 0.07411829 - samples/sec: 23.82 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,607 epoch 5 - iter 6/13 - loss 0.06205926 - samples/sec: 23.14 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,649 epoch 5 - iter 7/13 - loss 0.05330021 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,695 epoch 5 - iter 8/13 - loss 0.25946926 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,738 epoch 5 - iter 9/13 - loss 0.23359983 - samples/sec: 23.37 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,781 epoch 5 - iter 10/13 - loss 0.21044241 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,823 epoch 5 - iter 11/13 - loss 0.19156559 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,866 epoch 5 - iter 12/13 - loss 0.17668787 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,908 epoch 5 - iter 13/13 - loss 0.16312293 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:52:24,909 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:24,909 EPOCH 5 done: loss 0.1631 - lr 0.0200000\n",
-      "2021-09-08 11:52:24,939 DEV : loss 0.11724186688661575 - score 0.0\n",
-      "2021-09-08 11:52:24,940 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:52:24,942 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:25:53,797 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:53,903 epoch 3 - iter 1/13 - loss 0.17644154 - samples/sec: 17.79 - lr: 0.020000\n",
+      "2021-09-21 20:25:53,957 epoch 3 - iter 2/13 - loss 0.29666465 - samples/sec: 18.67 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,015 epoch 3 - iter 3/13 - loss 0.19848745 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,078 epoch 3 - iter 4/13 - loss 0.16732567 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,142 epoch 3 - iter 5/13 - loss 0.43898844 - samples/sec: 15.78 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,198 epoch 3 - iter 6/13 - loss 0.40067981 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,252 epoch 3 - iter 7/13 - loss 0.49876622 - samples/sec: 18.95 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,318 epoch 3 - iter 8/13 - loss 0.44051409 - samples/sec: 15.08 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,377 epoch 3 - iter 9/13 - loss 0.39676428 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,424 epoch 3 - iter 10/13 - loss 0.35717041 - samples/sec: 21.48 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,477 epoch 3 - iter 11/13 - loss 0.48224942 - samples/sec: 19.05 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,537 epoch 3 - iter 12/13 - loss 0.44337100 - samples/sec: 16.88 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,589 epoch 3 - iter 13/13 - loss 0.41423254 - samples/sec: 19.61 - lr: 0.020000\n",
+      "2021-09-21 20:25:54,590 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:54,590 EPOCH 3 done: loss 0.4142 - lr 0.0200000\n",
+      "2021-09-21 20:25:54,645 DEV : loss 0.000715926056727767 - score 0.0\n",
+      "2021-09-21 20:25:54,645 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:25:58,697 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:58,852 epoch 4 - iter 1/13 - loss 0.00242744 - samples/sec: 14.08 - lr: 0.020000\n",
+      "2021-09-21 20:25:58,935 epoch 4 - iter 2/13 - loss 0.00728512 - samples/sec: 12.11 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,005 epoch 4 - iter 3/13 - loss 0.00668278 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,081 epoch 4 - iter 4/13 - loss 0.00634691 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,159 epoch 4 - iter 5/13 - loss 0.02019328 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,231 epoch 4 - iter 6/13 - loss 0.01734832 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,288 epoch 4 - iter 7/13 - loss 0.02674216 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,353 epoch 4 - iter 8/13 - loss 0.09051825 - samples/sec: 15.60 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,428 epoch 4 - iter 9/13 - loss 0.09010784 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,486 epoch 4 - iter 10/13 - loss 0.08130434 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,531 epoch 4 - iter 11/13 - loss 0.07405650 - samples/sec: 22.22 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,583 epoch 4 - iter 12/13 - loss 0.06837837 - samples/sec: 19.37 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,647 epoch 4 - iter 13/13 - loss 0.14216907 - samples/sec: 15.91 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,648 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:59,648 EPOCH 4 done: loss 0.1422 - lr 0.0200000\n",
+      "2021-09-21 20:25:59,754 DEV : loss 0.0011439088266342878 - score 0.0\n",
+      "2021-09-21 20:25:59,756 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:25:59,758 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:25:59,869 epoch 5 - iter 1/13 - loss 0.00674228 - samples/sec: 18.16 - lr: 0.020000\n",
+      "2021-09-21 20:25:59,930 epoch 5 - iter 2/13 - loss 0.00559714 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,010 epoch 5 - iter 3/13 - loss 0.01937173 - samples/sec: 12.56 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,058 epoch 5 - iter 4/13 - loss 0.31299190 - samples/sec: 20.99 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,129 epoch 5 - iter 5/13 - loss 0.25217115 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,202 epoch 5 - iter 6/13 - loss 0.24733482 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,270 epoch 5 - iter 7/13 - loss 0.51807548 - samples/sec: 14.78 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,340 epoch 5 - iter 8/13 - loss 0.45556524 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,403 epoch 5 - iter 9/13 - loss 0.41002790 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,471 epoch 5 - iter 10/13 - loss 0.36919860 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,520 epoch 5 - iter 11/13 - loss 0.33585744 - samples/sec: 20.62 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,572 epoch 5 - iter 12/13 - loss 0.31056475 - samples/sec: 19.18 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:24,999 epoch 6 - iter 1/13 - loss 0.00400797 - samples/sec: 23.63 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,043 epoch 6 - iter 2/13 - loss 0.00316246 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,086 epoch 6 - iter 3/13 - loss 0.00239247 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,128 epoch 6 - iter 4/13 - loss 0.00201210 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,172 epoch 6 - iter 5/13 - loss 0.00230533 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,214 epoch 6 - iter 6/13 - loss 0.00219179 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,260 epoch 6 - iter 7/13 - loss 0.27621237 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,306 epoch 6 - iter 8/13 - loss 0.38511128 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,349 epoch 6 - iter 9/13 - loss 0.34341677 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,391 epoch 6 - iter 10/13 - loss 0.30979995 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,437 epoch 6 - iter 11/13 - loss 0.31171214 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,479 epoch 6 - iter 12/13 - loss 0.28668636 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,523 epoch 6 - iter 13/13 - loss 0.26489071 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 11:52:25,524 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:25,524 EPOCH 6 done: loss 0.2649 - lr 0.0200000\n",
-      "2021-09-08 11:52:25,552 DEV : loss 0.029261818155646324 - score 0.0\n",
-      "2021-09-08 11:52:25,553 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:26:00,639 epoch 5 - iter 13/13 - loss 0.39828414 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,640 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:00,640 EPOCH 5 done: loss 0.3983 - lr 0.0200000\n",
+      "2021-09-21 20:26:00,717 DEV : loss 0.01619403250515461 - score 0.0\n",
+      "2021-09-21 20:26:00,721 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:26:00,723 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:00,832 epoch 6 - iter 1/13 - loss 0.00333487 - samples/sec: 20.03 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,912 epoch 6 - iter 2/13 - loss 0.00449581 - samples/sec: 12.46 - lr: 0.020000\n",
+      "2021-09-21 20:26:00,969 epoch 6 - iter 3/13 - loss 0.05905517 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,031 epoch 6 - iter 4/13 - loss 0.04791759 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,094 epoch 6 - iter 5/13 - loss 0.08193243 - samples/sec: 15.96 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,146 epoch 6 - iter 6/13 - loss 0.07803076 - samples/sec: 19.38 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,205 epoch 6 - iter 7/13 - loss 0.06712692 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,277 epoch 6 - iter 8/13 - loss 0.06070723 - samples/sec: 14.04 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,331 epoch 6 - iter 9/13 - loss 0.05455027 - samples/sec: 18.57 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,402 epoch 6 - iter 10/13 - loss 0.04918868 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,463 epoch 6 - iter 11/13 - loss 0.04489005 - samples/sec: 16.66 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,546 epoch 6 - iter 12/13 - loss 0.04147603 - samples/sec: 12.06 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,603 epoch 6 - iter 13/13 - loss 0.03845388 - samples/sec: 17.72 - lr: 0.020000\n",
+      "2021-09-21 20:26:01,604 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:01,605 EPOCH 6 done: loss 0.0385 - lr 0.0200000\n",
+      "2021-09-21 20:26:01,655 DEV : loss 0.0003073386906180531 - score 0.0\n",
+      "2021-09-21 20:26:01,657 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:26:05,807 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:05,995 epoch 7 - iter 1/13 - loss 0.00503570 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,059 epoch 7 - iter 2/13 - loss 0.02092735 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,119 epoch 7 - iter 3/13 - loss 0.01421059 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,176 epoch 7 - iter 4/13 - loss 0.01101545 - samples/sec: 17.64 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,238 epoch 7 - iter 5/13 - loss 0.00918552 - samples/sec: 16.25 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,296 epoch 7 - iter 6/13 - loss 0.00783816 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,353 epoch 7 - iter 7/13 - loss 0.00839258 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,409 epoch 7 - iter 8/13 - loss 0.00746247 - samples/sec: 17.96 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,489 epoch 7 - iter 9/13 - loss 0.01572368 - samples/sec: 12.63 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,545 epoch 7 - iter 10/13 - loss 0.01517168 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,619 epoch 7 - iter 11/13 - loss 0.01450547 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,678 epoch 7 - iter 12/13 - loss 0.01361555 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,735 epoch 7 - iter 13/13 - loss 0.01261719 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:26:06,736 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:06,737 EPOCH 7 done: loss 0.0126 - lr 0.0200000\n",
+      "2021-09-21 20:26:06,829 DEV : loss 0.0001322250027442351 - score 0.0\n",
+      "2021-09-21 20:26:06,832 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:26:13,372 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:13,474 epoch 8 - iter 1/13 - loss 0.00698184 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,529 epoch 8 - iter 2/13 - loss 0.00790953 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,595 epoch 8 - iter 3/13 - loss 0.00662468 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,646 epoch 8 - iter 4/13 - loss 0.00525103 - samples/sec: 19.85 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,707 epoch 8 - iter 5/13 - loss 0.00430531 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,773 epoch 8 - iter 6/13 - loss 0.10085040 - samples/sec: 15.22 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,837 epoch 8 - iter 7/13 - loss 0.08653757 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,893 epoch 8 - iter 8/13 - loss 0.07576502 - samples/sec: 17.78 - lr: 0.020000\n",
+      "2021-09-21 20:26:13,954 epoch 8 - iter 9/13 - loss 0.06741544 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 20:26:14,011 epoch 8 - iter 10/13 - loss 0.06072191 - samples/sec: 17.61 - lr: 0.020000\n",
+      "2021-09-21 20:26:14,071 epoch 8 - iter 11/13 - loss 0.05529460 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 20:26:14,127 epoch 8 - iter 12/13 - loss 0.05072141 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 20:26:14,189 epoch 8 - iter 13/13 - loss 0.04699116 - samples/sec: 16.43 - lr: 0.020000\n",
+      "2021-09-21 20:26:14,190 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:14,191 EPOCH 8 done: loss 0.0470 - lr 0.0200000\n",
+      "2021-09-21 20:26:14,249 DEV : loss 9.35346179176122e-05 - score 0.0\n",
+      "2021-09-21 20:26:14,251 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:52:29,780 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:29,843 epoch 7 - iter 1/13 - loss 0.10531610 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 11:52:29,886 epoch 7 - iter 2/13 - loss 0.06029956 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 11:52:29,929 epoch 7 - iter 3/13 - loss 0.04102848 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 11:52:29,975 epoch 7 - iter 4/13 - loss 0.08841148 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,021 epoch 7 - iter 5/13 - loss 0.14842567 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,064 epoch 7 - iter 6/13 - loss 0.12393379 - samples/sec: 23.46 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,107 epoch 7 - iter 7/13 - loss 0.10656477 - samples/sec: 23.58 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,150 epoch 7 - iter 8/13 - loss 0.09425004 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,192 epoch 7 - iter 9/13 - loss 0.08398030 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,234 epoch 7 - iter 10/13 - loss 0.07632809 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,278 epoch 7 - iter 11/13 - loss 0.06951454 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,324 epoch 7 - iter 12/13 - loss 0.08359009 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,367 epoch 7 - iter 13/13 - loss 0.07827050 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:52:30,368 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:30,368 EPOCH 7 done: loss 0.0783 - lr 0.0200000\n",
-      "2021-09-08 11:52:30,398 DEV : loss 0.0026208474300801754 - score 0.0\n",
-      "2021-09-08 11:52:30,398 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:26:18,220 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,282 epoch 9 - iter 1/13 - loss 0.00054741 - samples/sec: 22.52 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,326 epoch 9 - iter 2/13 - loss 0.00055036 - samples/sec: 22.68 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,370 epoch 9 - iter 3/13 - loss 0.00059137 - samples/sec: 23.16 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,416 epoch 9 - iter 4/13 - loss 0.60540058 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,459 epoch 9 - iter 5/13 - loss 0.48478493 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,503 epoch 9 - iter 6/13 - loss 0.40451046 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,547 epoch 9 - iter 7/13 - loss 0.34701052 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,593 epoch 9 - iter 8/13 - loss 0.31005118 - samples/sec: 21.94 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,649 epoch 9 - iter 9/13 - loss 0.27652651 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,697 epoch 9 - iter 10/13 - loss 0.24900429 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,743 epoch 9 - iter 11/13 - loss 0.22644456 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,788 epoch 9 - iter 12/13 - loss 0.20861088 - samples/sec: 22.12 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,840 epoch 9 - iter 13/13 - loss 0.19453078 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 20:26:18,841 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:18,841 EPOCH 9 done: loss 0.1945 - lr 0.0200000\n",
+      "2021-09-21 20:26:18,873 DEV : loss 7.461381756002083e-05 - score 0.0\n",
+      "2021-09-21 20:26:18,874 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:52:42,417 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:42,480 epoch 8 - iter 1/13 - loss 0.64852089 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,527 epoch 8 - iter 2/13 - loss 0.38882109 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,574 epoch 8 - iter 3/13 - loss 0.25980335 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,632 epoch 8 - iter 4/13 - loss 0.24148044 - samples/sec: 17.46 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,683 epoch 8 - iter 5/13 - loss 0.19411521 - samples/sec: 19.94 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,734 epoch 8 - iter 6/13 - loss 0.46152127 - samples/sec: 19.58 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,781 epoch 8 - iter 7/13 - loss 0.39596875 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,834 epoch 8 - iter 8/13 - loss 0.35050007 - samples/sec: 19.03 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,886 epoch 8 - iter 9/13 - loss 0.31881613 - samples/sec: 19.65 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,929 epoch 8 - iter 10/13 - loss 0.28736790 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 11:52:42,974 epoch 8 - iter 11/13 - loss 0.26139363 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,018 epoch 8 - iter 12/13 - loss 0.24004066 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,061 epoch 8 - iter 13/13 - loss 0.22175641 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,062 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:43,063 EPOCH 8 done: loss 0.2218 - lr 0.0200000\n",
-      "2021-09-08 11:52:43,218 DEV : loss 0.1767703890800476 - score 0.0\n",
-      "2021-09-08 11:52:43,219 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:52:43,262 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:43,332 epoch 9 - iter 1/13 - loss 0.00058060 - samples/sec: 19.77 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,384 epoch 9 - iter 2/13 - loss 0.63272884 - samples/sec: 19.44 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,433 epoch 9 - iter 3/13 - loss 0.42213848 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,483 epoch 9 - iter 4/13 - loss 0.31711504 - samples/sec: 20.11 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,533 epoch 9 - iter 5/13 - loss 0.28712090 - samples/sec: 20.24 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,580 epoch 9 - iter 6/13 - loss 0.24582509 - samples/sec: 21.76 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,623 epoch 9 - iter 7/13 - loss 0.21265218 - samples/sec: 23.24 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,666 epoch 9 - iter 8/13 - loss 0.18619753 - samples/sec: 23.71 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,708 epoch 9 - iter 9/13 - loss 0.16568916 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,751 epoch 9 - iter 10/13 - loss 0.14932974 - samples/sec: 23.58 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,793 epoch 9 - iter 11/13 - loss 0.13587740 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,835 epoch 9 - iter 12/13 - loss 0.12521015 - samples/sec: 24.12 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,881 epoch 9 - iter 13/13 - loss 0.11663347 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 11:52:43,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:43,882 EPOCH 9 done: loss 0.1166 - lr 0.0200000\n",
-      "2021-09-08 11:52:43,911 DEV : loss 0.2824660837650299 - score 0.0\n",
-      "2021-09-08 11:52:43,912 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:52:43,914 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:43,971 epoch 10 - iter 1/13 - loss 0.00061182 - samples/sec: 23.87 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,013 epoch 10 - iter 2/13 - loss 0.00110899 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,055 epoch 10 - iter 3/13 - loss 0.00100913 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,098 epoch 10 - iter 4/13 - loss 0.00222769 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,140 epoch 10 - iter 5/13 - loss 0.00205020 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,182 epoch 10 - iter 6/13 - loss 0.00185698 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,225 epoch 10 - iter 7/13 - loss 0.00213419 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,267 epoch 10 - iter 8/13 - loss 0.00193895 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,309 epoch 10 - iter 9/13 - loss 0.00176082 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,351 epoch 10 - iter 10/13 - loss 0.00203930 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,394 epoch 10 - iter 11/13 - loss 0.00193121 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,436 epoch 10 - iter 12/13 - loss 0.00214313 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,478 epoch 10 - iter 13/13 - loss 0.00205223 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:52:44,479 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:26:26,018 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:26,085 epoch 10 - iter 1/13 - loss 0.22565489 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,129 epoch 10 - iter 2/13 - loss 0.11668821 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,173 epoch 10 - iter 3/13 - loss 0.07923486 - samples/sec: 22.85 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,217 epoch 10 - iter 4/13 - loss 0.05948267 - samples/sec: 22.99 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,261 epoch 10 - iter 5/13 - loss 0.05020819 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,304 epoch 10 - iter 6/13 - loss 0.04234994 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,347 epoch 10 - iter 7/13 - loss 0.03647115 - samples/sec: 23.44 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,390 epoch 10 - iter 8/13 - loss 0.03197494 - samples/sec: 23.51 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:52:44,479 EPOCH 10 done: loss 0.0021 - lr 0.0200000\n",
-      "2021-09-08 11:52:44,508 DEV : loss 0.3192683160305023 - score 0.0\n",
-      "2021-09-08 11:52:44,509 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:52:49,208 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:52:49,208 Testing using best model ...\n",
-      "2021-09-08 11:52:49,210 loading file None1/best-model.pt\n",
+      "2021-09-21 20:26:26,437 epoch 10 - iter 9/13 - loss 0.03014137 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,480 epoch 10 - iter 10/13 - loss 0.02721346 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,523 epoch 10 - iter 11/13 - loss 0.02480634 - samples/sec: 23.54 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,567 epoch 10 - iter 12/13 - loss 0.02294573 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,610 epoch 10 - iter 13/13 - loss 0.02124283 - samples/sec: 23.64 - lr: 0.020000\n",
+      "2021-09-21 20:26:26,611 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:26:26,611 EPOCH 10 done: loss 0.0212 - lr 0.0200000\n",
+      "2021-09-21 20:26:34,101 DEV : loss 5.9181315009482205e-05 - score 0.0\n",
+      "2021-09-21 20:26:34,102 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:27:02,435 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:02,436 Testing using best model ...\n",
+      "2021-09-21 20:27:02,437 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:52:55,153 \t1.0\n",
-      "2021-09-08 11:52:55,154 \n",
+      "2021-09-21 20:27:17,203 \t0.5\n",
+      "2021-09-21 20:27:17,204 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1333\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.5\n",
+      "- F-score (macro) 0.0667\n",
+      "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
       "               precision    recall  f1-score   support\n",
       "\n",
       "       travel     0.0000    0.0000    0.0000         0\n",
-      "   technology     0.0000    0.0000    0.0000         0\n",
       "     wellness     0.0000    0.0000    0.0000         0\n",
       "        women     0.0000    0.0000    0.0000         0\n",
-      "     business     0.0000    0.0000    0.0000         0\n",
+      "      parents     0.0000    0.0000    0.0000         0\n",
+      "     weddings     0.0000    0.0000    0.0000         0\n",
       "      fashion     0.0000    0.0000    0.0000         0\n",
       "entertainment     0.0000    0.0000    0.0000         0\n",
       "      science     0.0000    0.0000    0.0000         0\n",
-      "      divorce     0.0000    0.0000    0.0000         0\n",
       "        crime     0.0000    0.0000    0.0000         0\n",
+      "     religion     0.0000    0.0000    0.0000         0\n",
       "       sports     0.0000    0.0000    0.0000         0\n",
       "     politics     0.0000    0.0000    0.0000         0\n",
       "       comedy     0.0000    0.0000    0.0000         0\n",
-      "      parents     1.0000    1.0000    1.0000         1\n",
-      "     religion     1.0000    1.0000    1.0000         1\n",
+      "   technology     1.0000    1.0000    1.0000         1\n",
+      "      divorce     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "    micro avg     1.0000    1.0000    1.0000         2\n",
-      "    macro avg     0.1333    0.1333    0.1333         2\n",
-      " weighted avg     1.0000    1.0000    1.0000         2\n",
-      "  samples avg     1.0000    1.0000    1.0000         2\n",
+      "    micro avg     0.5000    0.5000    0.5000         2\n",
+      "    macro avg     0.0667    0.0667    0.0667         2\n",
+      " weighted avg     0.5000    0.5000    0.5000         2\n",
+      "  samples avg     0.5000    0.5000    0.5000         2\n",
       "\n",
-      "2021-09-08 11:52:55,154 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.45\n"
+      "2021-09-21 20:27:17,204 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.43682170542635657\n"
      ]
     }
    ],
@@ -3128,11 +3130,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "0c4025f0",
+   "execution_count": 5,
+   "id": "e0b48924",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.45930232558139533, 0.4166666666666667, 0.4166666666666667, 0.46705426356589147, 0.42441860465116277]\n",
+      "0.021843415199388164\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -3144,7 +3158,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "ef4da272",
    "metadata": {},
    "outputs": [
@@ -3152,25 +3166,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:02,607 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:27:43,481 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:53:06,592 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:27:47,637 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 50493.23it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 50371.95it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:06,594 [b'this text is about travel', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about comedy', b'this text is about technology', b'this text is about science']\n",
-      "2021-09-08 11:53:06,727 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:06,729 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:27:47,639 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about business', b'this text is about comedy']\n",
+      "2021-09-21 20:27:47,648 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:47,650 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -3483,24 +3497,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:06,730 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:06,730 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:53:06,730 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:06,730 Parameters:\n",
-      "2021-09-08 11:53:06,731  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:53:06,731  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:53:06,731  - patience: \"3\"\n",
-      "2021-09-08 11:53:06,732  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:53:06,732  - max_epochs: \"10\"\n",
-      "2021-09-08 11:53:06,732  - shuffle: \"True\"\n",
-      "2021-09-08 11:53:06,732  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:53:06,733  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:53:06,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:06,733 Model training base path: \"None1\"\n",
-      "2021-09-08 11:53:06,733 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:06,734 Device: cuda:0\n",
-      "2021-09-08 11:53:06,734 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:06,734 Embeddings storage mode: cpu\n"
+      "2021-09-21 20:27:47,650 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:47,650 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:27:47,651 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:47,651 Parameters:\n",
+      "2021-09-21 20:27:47,651  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:27:47,652  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:27:47,652  - patience: \"3\"\n",
+      "2021-09-21 20:27:47,652  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:27:47,652  - max_epochs: \"10\"\n",
+      "2021-09-21 20:27:47,653  - shuffle: \"True\"\n",
+      "2021-09-21 20:27:47,653  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:27:47,653  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:27:47,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:47,654 Model training base path: \"None1\"\n",
+      "2021-09-21 20:27:47,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:47,654 Device: cuda:0\n",
+      "2021-09-21 20:27:47,655 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:47,655 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:27:47,661 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:47,765 epoch 1 - iter 1/13 - loss 0.94082284 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:27:47,820 epoch 1 - iter 2/13 - loss 1.37746936 - samples/sec: 18.33 - lr: 0.020000\n"
      ]
     },
     {
@@ -3514,208 +3531,205 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:06,925 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:06,980 epoch 1 - iter 1/13 - loss 0.82973510 - samples/sec: 25.82 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,027 epoch 1 - iter 2/13 - loss 0.54838838 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,074 epoch 1 - iter 3/13 - loss 0.66827090 - samples/sec: 21.33 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,120 epoch 1 - iter 4/13 - loss 0.60808878 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,166 epoch 1 - iter 5/13 - loss 0.67543681 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,212 epoch 1 - iter 6/13 - loss 0.59720886 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,259 epoch 1 - iter 7/13 - loss 0.57391176 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,305 epoch 1 - iter 8/13 - loss 0.56947710 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,351 epoch 1 - iter 9/13 - loss 0.56473079 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,397 epoch 1 - iter 10/13 - loss 0.52925722 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,443 epoch 1 - iter 11/13 - loss 0.49309852 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,489 epoch 1 - iter 12/13 - loss 0.45877236 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,537 epoch 1 - iter 13/13 - loss 0.44319878 - samples/sec: 21.00 - lr: 0.020000\n",
-      "2021-09-08 11:53:07,539 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:07,539 EPOCH 1 done: loss 0.4432 - lr 0.0200000\n",
-      "2021-09-08 11:53:07,672 DEV : loss 1.2405688762664795 - score 0.0\n",
-      "2021-09-08 11:53:07,672 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:27:47,882 epoch 1 - iter 3/13 - loss 1.18098273 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 20:27:47,945 epoch 1 - iter 4/13 - loss 1.04288420 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,006 epoch 1 - iter 5/13 - loss 0.89933960 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,072 epoch 1 - iter 6/13 - loss 0.86779433 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,133 epoch 1 - iter 7/13 - loss 0.82195389 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,199 epoch 1 - iter 8/13 - loss 0.84872721 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,267 epoch 1 - iter 9/13 - loss 0.81051340 - samples/sec: 14.70 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,333 epoch 1 - iter 10/13 - loss 0.77118704 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,398 epoch 1 - iter 11/13 - loss 0.72114719 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,454 epoch 1 - iter 12/13 - loss 0.66443996 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,517 epoch 1 - iter 13/13 - loss 0.62843256 - samples/sec: 15.80 - lr: 0.020000\n",
+      "2021-09-21 20:27:48,518 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:48,519 EPOCH 1 done: loss 0.6284 - lr 0.0200000\n",
+      "2021-09-21 20:27:48,563 DEV : loss 0.30519571900367737 - score 0.0\n",
+      "2021-09-21 20:27:48,564 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:53:19,259 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:19,323 epoch 2 - iter 1/13 - loss 0.14396350 - samples/sec: 21.28 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,366 epoch 2 - iter 2/13 - loss 0.07732342 - samples/sec: 23.64 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,409 epoch 2 - iter 3/13 - loss 0.05509727 - samples/sec: 23.45 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,451 epoch 2 - iter 4/13 - loss 0.04438655 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,497 epoch 2 - iter 5/13 - loss 0.31242538 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,543 epoch 2 - iter 6/13 - loss 0.46986239 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,588 epoch 2 - iter 7/13 - loss 0.47630632 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,634 epoch 2 - iter 8/13 - loss 0.50039499 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,680 epoch 2 - iter 9/13 - loss 0.49525953 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,725 epoch 2 - iter 10/13 - loss 0.49258341 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,771 epoch 2 - iter 11/13 - loss 0.45186257 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,813 epoch 2 - iter 12/13 - loss 0.41969024 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,859 epoch 2 - iter 13/13 - loss 0.39999723 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 11:53:19,860 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:19,860 EPOCH 2 done: loss 0.4000 - lr 0.0200000\n",
-      "2021-09-08 11:53:19,891 DEV : loss 0.5893579721450806 - score 0.0\n",
-      "2021-09-08 11:53:19,892 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:27:52,543 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:52,627 epoch 2 - iter 1/13 - loss 0.62173450 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 20:27:52,698 epoch 2 - iter 2/13 - loss 0.79843459 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 20:27:52,765 epoch 2 - iter 3/13 - loss 0.61050370 - samples/sec: 14.94 - lr: 0.020000\n",
+      "2021-09-21 20:27:52,832 epoch 2 - iter 4/13 - loss 0.50158555 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 20:27:52,882 epoch 2 - iter 5/13 - loss 0.48625969 - samples/sec: 20.10 - lr: 0.020000\n",
+      "2021-09-21 20:27:52,945 epoch 2 - iter 6/13 - loss 0.41617165 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,007 epoch 2 - iter 7/13 - loss 0.44983178 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,059 epoch 2 - iter 8/13 - loss 0.41825009 - samples/sec: 19.53 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,109 epoch 2 - iter 9/13 - loss 0.37678481 - samples/sec: 20.27 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,155 epoch 2 - iter 10/13 - loss 0.33932295 - samples/sec: 22.11 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,215 epoch 2 - iter 11/13 - loss 0.36785571 - samples/sec: 17.02 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,277 epoch 2 - iter 12/13 - loss 0.35314018 - samples/sec: 16.05 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,337 epoch 2 - iter 13/13 - loss 0.43498163 - samples/sec: 16.99 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,338 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:53,338 EPOCH 2 done: loss 0.4350 - lr 0.0200000\n",
+      "2021-09-21 20:27:53,483 DEV : loss 0.5779256820678711 - score 0.0\n",
+      "2021-09-21 20:27:53,484 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:27:53,559 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:53,666 epoch 3 - iter 1/13 - loss 0.32821891 - samples/sec: 15.59 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,725 epoch 3 - iter 2/13 - loss 0.68450676 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,783 epoch 3 - iter 3/13 - loss 0.50109495 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,844 epoch 3 - iter 4/13 - loss 0.42403345 - samples/sec: 16.36 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,897 epoch 3 - iter 5/13 - loss 0.35308428 - samples/sec: 19.20 - lr: 0.020000\n",
+      "2021-09-21 20:27:53,946 epoch 3 - iter 6/13 - loss 0.29489550 - samples/sec: 20.48 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,001 epoch 3 - iter 7/13 - loss 0.36535769 - samples/sec: 18.61 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,069 epoch 3 - iter 8/13 - loss 0.45768157 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,126 epoch 3 - iter 9/13 - loss 0.40709451 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,174 epoch 3 - iter 10/13 - loss 0.37100083 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,221 epoch 3 - iter 11/13 - loss 0.40149470 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,268 epoch 3 - iter 12/13 - loss 0.39646252 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,315 epoch 3 - iter 13/13 - loss 0.36799736 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,316 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:54,316 EPOCH 3 done: loss 0.3680 - lr 0.0200000\n",
+      "2021-09-21 20:27:54,440 DEV : loss 0.5031530857086182 - score 0.0\n",
+      "2021-09-21 20:27:54,441 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:27:54,561 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:54,627 epoch 4 - iter 1/13 - loss 0.16326565 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,674 epoch 4 - iter 2/13 - loss 0.08436935 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,721 epoch 4 - iter 3/13 - loss 0.09978882 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,769 epoch 4 - iter 4/13 - loss 0.10147981 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,816 epoch 4 - iter 5/13 - loss 0.54148587 - samples/sec: 21.49 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,859 epoch 4 - iter 6/13 - loss 0.45191345 - samples/sec: 23.35 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,903 epoch 4 - iter 7/13 - loss 0.38775248 - samples/sec: 22.91 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,950 epoch 4 - iter 8/13 - loss 0.43526837 - samples/sec: 21.49 - lr: 0.020000\n",
+      "2021-09-21 20:27:54,998 epoch 4 - iter 9/13 - loss 0.40811895 - samples/sec: 21.23 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,042 epoch 4 - iter 10/13 - loss 0.36758515 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,089 epoch 4 - iter 11/13 - loss 0.34188234 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,134 epoch 4 - iter 12/13 - loss 0.31397056 - samples/sec: 22.57 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,181 epoch 4 - iter 13/13 - loss 0.44467108 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 20:27:55,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:27:55,182 EPOCH 4 done: loss 0.4447 - lr 0.0200000\n",
+      "2021-09-21 20:27:55,518 DEV : loss 0.08761849254369736 - score 0.0\n",
+      "2021-09-21 20:27:55,519 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:53:24,055 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:24,116 epoch 3 - iter 1/13 - loss 0.01432364 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,161 epoch 3 - iter 2/13 - loss 0.26314034 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,208 epoch 3 - iter 3/13 - loss 0.23276868 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,250 epoch 3 - iter 4/13 - loss 0.17542666 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,296 epoch 3 - iter 5/13 - loss 0.17750165 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,339 epoch 3 - iter 6/13 - loss 0.15079077 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,385 epoch 3 - iter 7/13 - loss 0.33071376 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,430 epoch 3 - iter 8/13 - loss 0.47497852 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,473 epoch 3 - iter 9/13 - loss 0.42479614 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,519 epoch 3 - iter 10/13 - loss 0.40124183 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,564 epoch 3 - iter 11/13 - loss 0.37440854 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,610 epoch 3 - iter 12/13 - loss 0.36100192 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,652 epoch 3 - iter 13/13 - loss 0.33389431 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 11:53:24,653 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:24,653 EPOCH 3 done: loss 0.3339 - lr 0.0200000\n",
-      "2021-09-08 11:53:24,767 DEV : loss 0.7678309082984924 - score 0.0\n",
-      "2021-09-08 11:53:24,768 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:53:24,989 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:25,055 epoch 4 - iter 1/13 - loss 0.02429440 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,104 epoch 4 - iter 2/13 - loss 0.02889859 - samples/sec: 20.33 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,155 epoch 4 - iter 3/13 - loss 0.24098813 - samples/sec: 20.26 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,202 epoch 4 - iter 4/13 - loss 0.18980117 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,248 epoch 4 - iter 5/13 - loss 0.15374081 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,295 epoch 4 - iter 6/13 - loss 0.17438987 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,344 epoch 4 - iter 7/13 - loss 0.30060249 - samples/sec: 20.53 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,391 epoch 4 - iter 8/13 - loss 0.26363027 - samples/sec: 21.81 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,437 epoch 4 - iter 9/13 - loss 0.23562877 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,487 epoch 4 - iter 10/13 - loss 0.29505188 - samples/sec: 20.01 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,534 epoch 4 - iter 11/13 - loss 0.26852844 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,583 epoch 4 - iter 12/13 - loss 0.27947955 - samples/sec: 20.57 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,627 epoch 4 - iter 13/13 - loss 0.25856345 - samples/sec: 23.10 - lr: 0.020000\n",
-      "2021-09-08 11:53:25,628 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:25,628 EPOCH 4 done: loss 0.2586 - lr 0.0200000\n",
-      "2021-09-08 11:53:26,102 DEV : loss 1.075669765472412 - score 0.0\n",
-      "2021-09-08 11:53:26,102 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:53:26,435 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:26,494 epoch 5 - iter 1/13 - loss 0.01196682 - samples/sec: 23.26 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,537 epoch 5 - iter 2/13 - loss 0.00700226 - samples/sec: 23.51 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,582 epoch 5 - iter 3/13 - loss 0.16532253 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,628 epoch 5 - iter 4/13 - loss 0.13654658 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,671 epoch 5 - iter 5/13 - loss 0.11089188 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,717 epoch 5 - iter 6/13 - loss 0.17415402 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,763 epoch 5 - iter 7/13 - loss 0.42202040 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,805 epoch 5 - iter 8/13 - loss 0.37442943 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,848 epoch 5 - iter 9/13 - loss 0.33443731 - samples/sec: 23.69 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,890 epoch 5 - iter 10/13 - loss 0.30122280 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,932 epoch 5 - iter 11/13 - loss 0.27434613 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 11:53:26,975 epoch 5 - iter 12/13 - loss 0.25342882 - samples/sec: 23.59 - lr: 0.020000\n"
+      "2021-09-21 20:28:11,572 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:11,634 epoch 5 - iter 1/13 - loss 0.00238283 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,678 epoch 5 - iter 2/13 - loss 0.00488143 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,724 epoch 5 - iter 3/13 - loss 0.19087747 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,771 epoch 5 - iter 4/13 - loss 0.46544098 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,814 epoch 5 - iter 5/13 - loss 0.37496767 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,858 epoch 5 - iter 6/13 - loss 0.31283738 - samples/sec: 23.22 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,904 epoch 5 - iter 7/13 - loss 0.42266760 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,950 epoch 5 - iter 8/13 - loss 0.37533901 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:28:11,999 epoch 5 - iter 9/13 - loss 0.40975828 - samples/sec: 20.75 - lr: 0.020000\n",
+      "2021-09-21 20:28:12,041 epoch 5 - iter 10/13 - loss 0.36953890 - samples/sec: 23.52 - lr: 0.020000\n",
+      "2021-09-21 20:28:12,088 epoch 5 - iter 11/13 - loss 0.34153411 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 20:28:12,135 epoch 5 - iter 12/13 - loss 0.36548557 - samples/sec: 21.30 - lr: 0.020000\n",
+      "2021-09-21 20:28:12,178 epoch 5 - iter 13/13 - loss 0.33882460 - samples/sec: 23.44 - lr: 0.020000\n",
+      "2021-09-21 20:28:12,179 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:12,180 EPOCH 5 done: loss 0.3388 - lr 0.0200000\n",
+      "2021-09-21 20:28:12,924 DEV : loss 0.12197506427764893 - score 0.0\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:27,017 epoch 5 - iter 13/13 - loss 0.23409333 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 11:53:27,018 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:27,019 EPOCH 5 done: loss 0.2341 - lr 0.0200000\n",
-      "2021-09-08 11:53:27,148 DEV : loss 0.3628924787044525 - score 0.0\n",
-      "2021-09-08 11:53:27,148 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:28:12,924 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:28:12,938 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:13,073 epoch 6 - iter 1/13 - loss 0.00562301 - samples/sec: 12.21 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,160 epoch 6 - iter 2/13 - loss 0.10962219 - samples/sec: 11.67 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,235 epoch 6 - iter 3/13 - loss 0.14438704 - samples/sec: 13.35 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,308 epoch 6 - iter 4/13 - loss 0.11424743 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,386 epoch 6 - iter 5/13 - loss 0.09265468 - samples/sec: 12.86 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,453 epoch 6 - iter 6/13 - loss 0.37892799 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,510 epoch 6 - iter 7/13 - loss 0.32523439 - samples/sec: 17.96 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,569 epoch 6 - iter 8/13 - loss 0.28707376 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,627 epoch 6 - iter 9/13 - loss 0.25535234 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,683 epoch 6 - iter 10/13 - loss 0.23030012 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,736 epoch 6 - iter 11/13 - loss 0.21460266 - samples/sec: 19.14 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,787 epoch 6 - iter 12/13 - loss 0.20530525 - samples/sec: 19.99 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,837 epoch 6 - iter 13/13 - loss 0.25336963 - samples/sec: 19.99 - lr: 0.020000\n",
+      "2021-09-21 20:28:13,839 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:13,839 EPOCH 6 done: loss 0.2534 - lr 0.0200000\n",
+      "2021-09-21 20:28:13,871 DEV : loss 0.012365979142487049 - score 0.0\n",
+      "2021-09-21 20:28:13,872 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:53:32,457 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:32,518 epoch 6 - iter 1/13 - loss 0.00225927 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,564 epoch 6 - iter 2/13 - loss 0.13837654 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,609 epoch 6 - iter 3/13 - loss 0.09994069 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,652 epoch 6 - iter 4/13 - loss 0.07583062 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,694 epoch 6 - iter 5/13 - loss 0.06165815 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,740 epoch 6 - iter 6/13 - loss 0.27790771 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,782 epoch 6 - iter 7/13 - loss 0.23836070 - samples/sec: 24.10 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,824 epoch 6 - iter 8/13 - loss 0.20871940 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,867 epoch 6 - iter 9/13 - loss 0.18583488 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,909 epoch 6 - iter 10/13 - loss 0.16737229 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,955 epoch 6 - iter 11/13 - loss 0.15519913 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:53:32,997 epoch 6 - iter 12/13 - loss 0.14275927 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,042 epoch 6 - iter 13/13 - loss 0.25502731 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,043 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:33,044 EPOCH 6 done: loss 0.2550 - lr 0.0200000\n",
-      "2021-09-08 11:53:33,072 DEV : loss 0.7656503319740295 - score 0.0\n",
-      "2021-09-08 11:53:33,073 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:53:33,075 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:33,135 epoch 7 - iter 1/13 - loss 0.09180775 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,178 epoch 7 - iter 2/13 - loss 0.05031205 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,223 epoch 7 - iter 3/13 - loss 0.28682837 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,267 epoch 7 - iter 4/13 - loss 0.21549956 - samples/sec: 23.12 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,309 epoch 7 - iter 5/13 - loss 0.17258129 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,355 epoch 7 - iter 6/13 - loss 0.16257447 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,397 epoch 7 - iter 7/13 - loss 0.13958390 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,439 epoch 7 - iter 8/13 - loss 0.12244124 - samples/sec: 24.11 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,481 epoch 7 - iter 9/13 - loss 0.10919837 - samples/sec: 24.07 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,524 epoch 7 - iter 10/13 - loss 0.09839126 - samples/sec: 23.70 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,566 epoch 7 - iter 11/13 - loss 0.08953458 - samples/sec: 24.04 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,608 epoch 7 - iter 12/13 - loss 0.08221522 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,650 epoch 7 - iter 13/13 - loss 0.07594010 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,651 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:33,652 EPOCH 7 done: loss 0.0759 - lr 0.0200000\n",
-      "2021-09-08 11:53:33,680 DEV : loss 0.5517638921737671 - score 0.0\n",
-      "2021-09-08 11:53:33,681 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:53:33,683 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:33,740 epoch 8 - iter 1/13 - loss 0.00130020 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,783 epoch 8 - iter 2/13 - loss 0.00155088 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,825 epoch 8 - iter 3/13 - loss 0.00238054 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,867 epoch 8 - iter 4/13 - loss 0.00215918 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,910 epoch 8 - iter 5/13 - loss 0.00244196 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,952 epoch 8 - iter 6/13 - loss 0.00218647 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 11:53:33,994 epoch 8 - iter 7/13 - loss 0.00209039 - samples/sec: 23.62 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,036 epoch 8 - iter 8/13 - loss 0.00197896 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,079 epoch 8 - iter 9/13 - loss 0.00183102 - samples/sec: 23.63 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,121 epoch 8 - iter 10/13 - loss 0.00292105 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,167 epoch 8 - iter 11/13 - loss 0.03609390 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,209 epoch 8 - iter 12/13 - loss 0.03319586 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,255 epoch 8 - iter 13/13 - loss 0.03354277 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,257 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:34,257 EPOCH 8 done: loss 0.0335 - lr 0.0200000\n",
-      "2021-09-08 11:53:34,287 DEV : loss 0.7833501100540161 - score 0.0\n",
-      "2021-09-08 11:53:34,288 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:53:34,290 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:34,353 epoch 9 - iter 1/13 - loss 0.02572010 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,396 epoch 9 - iter 2/13 - loss 0.01351846 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,438 epoch 9 - iter 3/13 - loss 0.01193130 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,484 epoch 9 - iter 4/13 - loss 0.15776692 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,526 epoch 9 - iter 5/13 - loss 0.12630060 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,568 epoch 9 - iter 6/13 - loss 0.10532722 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,611 epoch 9 - iter 7/13 - loss 0.09036704 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,653 epoch 9 - iter 8/13 - loss 0.07914343 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,696 epoch 9 - iter 9/13 - loss 0.07042537 - samples/sec: 23.71 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,738 epoch 9 - iter 10/13 - loss 0.06348054 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,780 epoch 9 - iter 11/13 - loss 0.05774103 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,823 epoch 9 - iter 12/13 - loss 0.05298777 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,865 epoch 9 - iter 13/13 - loss 0.04893277 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 11:53:34,866 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:34,867 EPOCH 9 done: loss 0.0489 - lr 0.0200000\n",
-      "2021-09-08 11:53:34,898 DEV : loss 0.8135175704956055 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:53:34,898 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:53:34,901 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:34,958 epoch 10 - iter 1/13 - loss 0.00111732 - samples/sec: 23.83 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,002 epoch 10 - iter 2/13 - loss 0.00079387 - samples/sec: 23.24 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,045 epoch 10 - iter 3/13 - loss 0.00076885 - samples/sec: 23.51 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,088 epoch 10 - iter 4/13 - loss 0.00092457 - samples/sec: 23.36 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,132 epoch 10 - iter 5/13 - loss 0.00091883 - samples/sec: 22.85 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,178 epoch 10 - iter 6/13 - loss 0.03386861 - samples/sec: 22.02 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,220 epoch 10 - iter 7/13 - loss 0.02910580 - samples/sec: 23.85 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,263 epoch 10 - iter 8/13 - loss 0.02556869 - samples/sec: 23.97 - lr: 0.010000\n"
+      "2021-09-21 20:28:29,839 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:29,912 epoch 7 - iter 1/13 - loss 1.00278389 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 20:28:29,970 epoch 7 - iter 2/13 - loss 0.51290173 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,037 epoch 7 - iter 3/13 - loss 0.62036405 - samples/sec: 15.19 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,101 epoch 7 - iter 4/13 - loss 0.49479675 - samples/sec: 15.77 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,161 epoch 7 - iter 5/13 - loss 0.39799196 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,226 epoch 7 - iter 6/13 - loss 0.62258310 - samples/sec: 15.37 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,296 epoch 7 - iter 7/13 - loss 0.53471829 - samples/sec: 14.57 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,355 epoch 7 - iter 8/13 - loss 0.47032781 - samples/sec: 17.12 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,411 epoch 7 - iter 9/13 - loss 0.41889030 - samples/sec: 18.04 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,462 epoch 7 - iter 10/13 - loss 0.37730028 - samples/sec: 20.04 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,508 epoch 7 - iter 11/13 - loss 0.34489572 - samples/sec: 21.93 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,552 epoch 7 - iter 12/13 - loss 0.31653772 - samples/sec: 22.92 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,596 epoch 7 - iter 13/13 - loss 0.29233364 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,597 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:30,598 EPOCH 7 done: loss 0.2923 - lr 0.0200000\n",
+      "2021-09-21 20:28:30,735 DEV : loss 0.311421662569046 - score 0.0\n",
+      "2021-09-21 20:28:30,736 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:28:30,812 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:30,874 epoch 8 - iter 1/13 - loss 0.01661649 - samples/sec: 22.58 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,921 epoch 8 - iter 2/13 - loss 0.12494380 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 20:28:30,970 epoch 8 - iter 3/13 - loss 0.10495697 - samples/sec: 20.77 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,014 epoch 8 - iter 4/13 - loss 0.08047163 - samples/sec: 22.53 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,058 epoch 8 - iter 5/13 - loss 0.06515475 - samples/sec: 23.04 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,102 epoch 8 - iter 6/13 - loss 0.05446298 - samples/sec: 22.92 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,146 epoch 8 - iter 7/13 - loss 0.04780395 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,192 epoch 8 - iter 8/13 - loss 0.04281160 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,239 epoch 8 - iter 9/13 - loss 0.05311645 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,285 epoch 8 - iter 10/13 - loss 0.22365282 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,329 epoch 8 - iter 11/13 - loss 0.20340238 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,372 epoch 8 - iter 12/13 - loss 0.18657006 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,419 epoch 8 - iter 13/13 - loss 0.17457334 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,420 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:31,420 EPOCH 8 done: loss 0.1746 - lr 0.0200000\n",
+      "2021-09-21 20:28:31,591 DEV : loss 0.08590163290500641 - score 0.0\n",
+      "2021-09-21 20:28:31,592 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:28:31,689 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:31,748 epoch 9 - iter 1/13 - loss 0.00861784 - samples/sec: 23.13 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,792 epoch 9 - iter 2/13 - loss 0.00486699 - samples/sec: 23.33 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,835 epoch 9 - iter 3/13 - loss 0.00372343 - samples/sec: 23.04 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,879 epoch 9 - iter 4/13 - loss 0.00306713 - samples/sec: 23.35 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,922 epoch 9 - iter 5/13 - loss 0.00268148 - samples/sec: 23.10 - lr: 0.020000\n",
+      "2021-09-21 20:28:31,966 epoch 9 - iter 6/13 - loss 0.00248498 - samples/sec: 23.25 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,012 epoch 9 - iter 7/13 - loss 0.02459118 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,056 epoch 9 - iter 8/13 - loss 0.02176372 - samples/sec: 23.01 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,099 epoch 9 - iter 9/13 - loss 0.01976109 - samples/sec: 23.29 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,143 epoch 9 - iter 10/13 - loss 0.01897320 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,186 epoch 9 - iter 11/13 - loss 0.01741203 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,233 epoch 9 - iter 12/13 - loss 0.03332207 - samples/sec: 21.79 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,276 epoch 9 - iter 13/13 - loss 0.03084790 - samples/sec: 23.17 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,277 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:32,278 EPOCH 9 done: loss 0.0308 - lr 0.0200000\n",
+      "2021-09-21 20:28:32,423 DEV : loss 0.36013779044151306 - score 0.0\n",
+      "2021-09-21 20:28:32,424 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:28:32,526 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:32,586 epoch 10 - iter 1/13 - loss 0.00139730 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,632 epoch 10 - iter 2/13 - loss 0.00264563 - samples/sec: 21.94 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,675 epoch 10 - iter 3/13 - loss 0.00189965 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,722 epoch 10 - iter 4/13 - loss 0.14326968 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,765 epoch 10 - iter 5/13 - loss 0.11504750 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,812 epoch 10 - iter 6/13 - loss 0.27706120 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,855 epoch 10 - iter 7/13 - loss 0.23797584 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,902 epoch 10 - iter 8/13 - loss 0.24584901 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,945 epoch 10 - iter 9/13 - loss 0.21887967 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 20:28:32,991 epoch 10 - iter 10/13 - loss 0.36211846 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 20:28:33,035 epoch 10 - iter 11/13 - loss 0.32929412 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 20:28:33,079 epoch 10 - iter 12/13 - loss 0.30210559 - samples/sec: 23.36 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:35,306 epoch 10 - iter 9/13 - loss 0.02278109 - samples/sec: 23.42 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,348 epoch 10 - iter 10/13 - loss 0.02056123 - samples/sec: 23.70 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,393 epoch 10 - iter 11/13 - loss 0.01876023 - samples/sec: 22.54 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,436 epoch 10 - iter 12/13 - loss 0.01721730 - samples/sec: 23.63 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,479 epoch 10 - iter 13/13 - loss 0.01712338 - samples/sec: 23.49 - lr: 0.010000\n",
-      "2021-09-08 11:53:35,480 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:35,480 EPOCH 10 done: loss 0.0171 - lr 0.0100000\n",
-      "2021-09-08 11:53:35,509 DEV : loss 0.3707803785800934 - score 0.0\n",
-      "2021-09-08 11:53:35,509 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:53:40,157 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:40,158 Testing using best model ...\n",
-      "2021-09-08 11:53:40,159 loading file None1/best-model.pt\n",
+      "2021-09-21 20:28:33,122 epoch 10 - iter 13/13 - loss 0.27899020 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 20:28:33,123 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:33,123 EPOCH 10 done: loss 0.2790 - lr 0.0200000\n",
+      "2021-09-21 20:28:33,280 DEV : loss 0.05019720643758774 - score 0.0\n",
+      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:28:33,281 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:28:43,447 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:28:43,448 Testing using best model ...\n",
+      "2021-09-21 20:28:43,449 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:53:44,870 \t0.0\n",
-      "2021-09-08 11:53:44,871 \n",
+      "2021-09-21 20:28:57,735 \t0.0\n",
+      "2021-09-21 20:28:57,735 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -3725,10 +3739,10 @@
       "                                               precision    recall  f1-score   support\n",
       "\n",
       "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
+      "                this text is about technology     0.0000    0.0000    0.0000         0\n",
       "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
       "                     this text is about women     0.0000    0.0000    0.0000         0\n",
       "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
       "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
       "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
       "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
@@ -3736,35 +3750,48 @@
       "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
       "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
       "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
-      "                this text is about technology     0.0000    0.0000    0.0000         1\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         1\n",
+      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about business     0.0000    0.0000    0.0000         1\n",
+      "                    this text is about comedy     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                    micro avg     0.0000    0.0000    0.0000         2\n",
       "                                    macro avg     0.0000    0.0000    0.0000         2\n",
       "                                 weighted avg     0.0000    0.0000    0.0000         2\n",
       "                                  samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 11:53:44,871 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:52,858 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:28:57,736 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:08,993 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:53:56,839 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:29:13,431 Computing label dictionary. Progress:\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 15/15 [00:00<00:00, 45164.80it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:29:13,433 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about women', b'this text is about divorce']\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 48695.48it/s]"
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:56,841 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about divorce', b'this text is about religion', b'this text is about sports', b'this text is about comedy', b'this text is about science', b'this text is about crime']\n",
-      "2021-09-08 11:53:56,968 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:56,970 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:29:14,923 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:14,925 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4077,243 +4104,231 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:53:56,971 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:56,971 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:53:56,972 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:56,972 Parameters:\n",
-      "2021-09-08 11:53:56,972  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:53:56,972  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:53:56,973  - patience: \"3\"\n",
-      "2021-09-08 11:53:56,973  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:53:56,973  - max_epochs: \"10\"\n",
-      "2021-09-08 11:53:56,974  - shuffle: \"True\"\n",
-      "2021-09-08 11:53:56,974  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:53:56,974  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:53:56,975 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:56,975 Model training base path: \"None1\"\n",
-      "2021-09-08 11:53:56,975 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:56,976 Device: cuda:0\n",
-      "2021-09-08 11:53:56,976 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:56,976 Embeddings storage mode: cpu\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:53:57,154 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:57,209 epoch 1 - iter 1/13 - loss 0.21133351 - samples/sec: 25.85 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,255 epoch 1 - iter 2/13 - loss 0.28074306 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,301 epoch 1 - iter 3/13 - loss 0.85722069 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,347 epoch 1 - iter 4/13 - loss 0.79271719 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,392 epoch 1 - iter 5/13 - loss 0.74135333 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,438 epoch 1 - iter 6/13 - loss 0.76413037 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,484 epoch 1 - iter 7/13 - loss 0.75321063 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,530 epoch 1 - iter 8/13 - loss 0.72104788 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,578 epoch 1 - iter 9/13 - loss 0.73458553 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,623 epoch 1 - iter 10/13 - loss 0.72856059 - samples/sec: 22.09 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,672 epoch 1 - iter 11/13 - loss 0.67637069 - samples/sec: 20.93 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,715 epoch 1 - iter 12/13 - loss 0.62238188 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,761 epoch 1 - iter 13/13 - loss 0.63640495 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:53:57,762 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:53:57,762 EPOCH 1 done: loss 0.6364 - lr 0.0200000\n",
-      "2021-09-08 11:53:57,887 DEV : loss 0.8695148825645447 - score 0.0\n",
-      "2021-09-08 11:53:57,887 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:29:14,926 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:14,926 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:29:14,926 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:14,926 Parameters:\n",
+      "2021-09-21 20:29:14,927  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:29:14,927  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:29:14,927  - patience: \"3\"\n",
+      "2021-09-21 20:29:14,928  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:29:14,928  - max_epochs: \"10\"\n",
+      "2021-09-21 20:29:14,928  - shuffle: \"True\"\n",
+      "2021-09-21 20:29:14,928  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:29:14,929  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:29:14,929 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:14,929 Model training base path: \"None1\"\n",
+      "2021-09-21 20:29:14,930 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:14,930 Device: cuda:0\n",
+      "2021-09-21 20:29:14,930 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:14,930 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:29:14,937 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:15,071 epoch 1 - iter 1/13 - loss 0.84672266 - samples/sec: 17.40 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,133 epoch 1 - iter 2/13 - loss 0.83570784 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,200 epoch 1 - iter 3/13 - loss 0.72965149 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,255 epoch 1 - iter 4/13 - loss 0.66390885 - samples/sec: 18.42 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,317 epoch 1 - iter 5/13 - loss 0.57407464 - samples/sec: 16.10 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,385 epoch 1 - iter 6/13 - loss 0.58197125 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,441 epoch 1 - iter 7/13 - loss 0.52190149 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,502 epoch 1 - iter 8/13 - loss 0.53421127 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,572 epoch 1 - iter 9/13 - loss 0.73842111 - samples/sec: 14.47 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,626 epoch 1 - iter 10/13 - loss 0.69578493 - samples/sec: 18.77 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,692 epoch 1 - iter 11/13 - loss 0.63747214 - samples/sec: 15.20 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,756 epoch 1 - iter 12/13 - loss 0.63880585 - samples/sec: 15.81 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,807 epoch 1 - iter 13/13 - loss 0.59185105 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 20:29:15,808 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:15,809 EPOCH 1 done: loss 0.5919 - lr 0.0200000\n",
+      "2021-09-21 20:29:15,857 DEV : loss 0.7082672715187073 - score 0.0\n",
+      "2021-09-21 20:29:15,858 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:29:19,801 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:19,922 epoch 2 - iter 1/13 - loss 0.56378263 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 20:29:19,979 epoch 2 - iter 2/13 - loss 0.29930530 - samples/sec: 17.70 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,035 epoch 2 - iter 3/13 - loss 0.27705150 - samples/sec: 18.10 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,100 epoch 2 - iter 4/13 - loss 0.37433582 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,161 epoch 2 - iter 5/13 - loss 0.68527341 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,229 epoch 2 - iter 6/13 - loss 0.81010854 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,300 epoch 2 - iter 7/13 - loss 0.70850070 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,352 epoch 2 - iter 8/13 - loss 0.76651525 - samples/sec: 19.29 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,416 epoch 2 - iter 9/13 - loss 0.74147956 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,479 epoch 2 - iter 10/13 - loss 0.66868030 - samples/sec: 15.95 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,567 epoch 2 - iter 11/13 - loss 0.60941742 - samples/sec: 11.36 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,638 epoch 2 - iter 12/13 - loss 0.58879375 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,710 epoch 2 - iter 13/13 - loss 0.57580493 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 20:29:20,711 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:20,711 EPOCH 2 done: loss 0.5758 - lr 0.0200000\n",
+      "2021-09-21 20:29:20,790 DEV : loss 0.3514304459095001 - score 0.0\n",
+      "2021-09-21 20:29:20,793 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:54:06,686 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:06,750 epoch 2 - iter 1/13 - loss 0.23098755 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 11:54:06,798 epoch 2 - iter 2/13 - loss 0.30874340 - samples/sec: 20.95 - lr: 0.020000\n",
-      "2021-09-08 11:54:06,845 epoch 2 - iter 3/13 - loss 0.71870374 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 11:54:06,892 epoch 2 - iter 4/13 - loss 0.71931104 - samples/sec: 21.20 - lr: 0.020000\n",
-      "2021-09-08 11:54:06,940 epoch 2 - iter 5/13 - loss 0.66031674 - samples/sec: 21.15 - lr: 0.020000\n",
-      "2021-09-08 11:54:06,990 epoch 2 - iter 6/13 - loss 0.56854943 - samples/sec: 20.22 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,040 epoch 2 - iter 7/13 - loss 0.51168626 - samples/sec: 20.56 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,089 epoch 2 - iter 8/13 - loss 0.45340087 - samples/sec: 20.59 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,139 epoch 2 - iter 9/13 - loss 0.45479180 - samples/sec: 20.17 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,186 epoch 2 - iter 10/13 - loss 0.41345818 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,236 epoch 2 - iter 11/13 - loss 0.44865019 - samples/sec: 20.19 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,286 epoch 2 - iter 12/13 - loss 0.42417934 - samples/sec: 20.33 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,336 epoch 2 - iter 13/13 - loss 0.43921950 - samples/sec: 20.18 - lr: 0.020000\n",
-      "2021-09-08 11:54:07,338 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:07,338 EPOCH 2 done: loss 0.4392 - lr 0.0200000\n",
-      "2021-09-08 11:54:07,498 DEV : loss 0.4690447449684143 - score 0.0\n",
-      "2021-09-08 11:54:07,499 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:29:24,688 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:24,850 epoch 3 - iter 1/13 - loss 0.82224053 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 20:29:24,927 epoch 3 - iter 2/13 - loss 0.74425781 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 20:29:24,991 epoch 3 - iter 3/13 - loss 0.69966958 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,074 epoch 3 - iter 4/13 - loss 0.53255612 - samples/sec: 12.06 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,146 epoch 3 - iter 5/13 - loss 0.43091449 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,218 epoch 3 - iter 6/13 - loss 0.36425094 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,302 epoch 3 - iter 7/13 - loss 0.32561695 - samples/sec: 11.96 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,370 epoch 3 - iter 8/13 - loss 0.28594018 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,431 epoch 3 - iter 9/13 - loss 0.27190860 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,528 epoch 3 - iter 10/13 - loss 0.29723251 - samples/sec: 10.28 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,600 epoch 3 - iter 11/13 - loss 0.27040221 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,664 epoch 3 - iter 12/13 - loss 0.29947062 - samples/sec: 15.65 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,722 epoch 3 - iter 13/13 - loss 0.27851191 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 20:29:25,723 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:25,724 EPOCH 3 done: loss 0.2785 - lr 0.0200000\n",
+      "2021-09-21 20:29:25,801 DEV : loss 0.24703070521354675 - score 0.0\n",
+      "2021-09-21 20:29:25,805 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:54:16,745 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:16,808 epoch 3 - iter 1/13 - loss 0.09107675 - samples/sec: 21.48 - lr: 0.020000\n",
-      "2021-09-08 11:54:16,854 epoch 3 - iter 2/13 - loss 0.12067101 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:54:16,896 epoch 3 - iter 3/13 - loss 0.08756885 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 11:54:16,942 epoch 3 - iter 4/13 - loss 0.39497522 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:54:16,984 epoch 3 - iter 5/13 - loss 0.32060479 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,030 epoch 3 - iter 6/13 - loss 0.31279484 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,076 epoch 3 - iter 7/13 - loss 0.28702121 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,121 epoch 3 - iter 8/13 - loss 0.27884876 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,167 epoch 3 - iter 9/13 - loss 0.30988087 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,212 epoch 3 - iter 10/13 - loss 0.37677279 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,258 epoch 3 - iter 11/13 - loss 0.37780891 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,304 epoch 3 - iter 12/13 - loss 0.35811937 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,349 epoch 3 - iter 13/13 - loss 0.33857844 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,350 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:17,350 EPOCH 3 done: loss 0.3386 - lr 0.0200000\n",
-      "2021-09-08 11:54:17,379 DEV : loss 1.0289589166641235 - score 0.0\n",
-      "2021-09-08 11:54:17,380 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:54:17,382 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:17,443 epoch 4 - iter 1/13 - loss 0.15623082 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,486 epoch 4 - iter 2/13 - loss 0.07992999 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,528 epoch 4 - iter 3/13 - loss 0.05371712 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,575 epoch 4 - iter 4/13 - loss 0.07842120 - samples/sec: 21.50 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,621 epoch 4 - iter 5/13 - loss 0.10816358 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,666 epoch 4 - iter 6/13 - loss 0.36577961 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,712 epoch 4 - iter 7/13 - loss 0.32447489 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,758 epoch 4 - iter 8/13 - loss 0.51136698 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,800 epoch 4 - iter 9/13 - loss 0.45570960 - samples/sec: 24.04 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,848 epoch 4 - iter 10/13 - loss 0.42124004 - samples/sec: 20.92 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,894 epoch 4 - iter 11/13 - loss 0.43181023 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,940 epoch 4 - iter 12/13 - loss 0.50480651 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,985 epoch 4 - iter 13/13 - loss 0.47241443 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:54:17,986 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:17,986 EPOCH 4 done: loss 0.4724 - lr 0.0200000\n",
-      "2021-09-08 11:54:18,015 DEV : loss 1.6071162223815918 - score 0.0\n",
-      "2021-09-08 11:54:18,015 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:54:18,018 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:18,075 epoch 5 - iter 1/13 - loss 0.01177089 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,121 epoch 5 - iter 2/13 - loss 0.01993079 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,166 epoch 5 - iter 3/13 - loss 0.04300393 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,211 epoch 5 - iter 4/13 - loss 0.30780609 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,257 epoch 5 - iter 5/13 - loss 0.59194914 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,299 epoch 5 - iter 6/13 - loss 0.50007644 - samples/sec: 24.04 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,345 epoch 5 - iter 7/13 - loss 0.67889490 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,388 epoch 5 - iter 8/13 - loss 0.59580172 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,430 epoch 5 - iter 9/13 - loss 0.53071431 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,473 epoch 5 - iter 10/13 - loss 0.47837491 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,515 epoch 5 - iter 11/13 - loss 0.43525952 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,557 epoch 5 - iter 12/13 - loss 0.39988979 - samples/sec: 24.04 - lr: 0.020000\n"
+      "2021-09-21 20:29:29,743 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:29,935 epoch 4 - iter 1/13 - loss 0.19187611 - samples/sec: 11.25 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,007 epoch 4 - iter 2/13 - loss 0.09803995 - samples/sec: 14.06 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,064 epoch 4 - iter 3/13 - loss 0.07862472 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,131 epoch 4 - iter 4/13 - loss 0.19319236 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,188 epoch 4 - iter 5/13 - loss 0.25419404 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,258 epoch 4 - iter 6/13 - loss 0.29083965 - samples/sec: 14.32 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,328 epoch 4 - iter 7/13 - loss 0.25023423 - samples/sec: 14.25 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,383 epoch 4 - iter 8/13 - loss 0.44213024 - samples/sec: 18.43 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,464 epoch 4 - iter 9/13 - loss 0.39324711 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,524 epoch 4 - iter 10/13 - loss 0.35402116 - samples/sec: 16.51 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,609 epoch 4 - iter 11/13 - loss 0.32308979 - samples/sec: 11.84 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,675 epoch 4 - iter 12/13 - loss 0.31407800 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,757 epoch 4 - iter 13/13 - loss 0.29044120 - samples/sec: 12.30 - lr: 0.020000\n",
+      "2021-09-21 20:29:30,758 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:30,758 EPOCH 4 done: loss 0.2904 - lr 0.0200000\n",
+      "2021-09-21 20:29:30,869 DEV : loss 0.12204503268003464 - score 0.0\n",
+      "2021-09-21 20:29:30,872 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:18,603 epoch 5 - iter 13/13 - loss 0.44090280 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:54:18,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:18,604 EPOCH 5 done: loss 0.4409 - lr 0.0200000\n",
-      "2021-09-08 11:54:18,635 DEV : loss 0.27723443508148193 - score 0.0\n",
-      "2021-09-08 11:54:18,635 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:54:22,659 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:22,720 epoch 6 - iter 1/13 - loss 0.00643006 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 11:54:22,763 epoch 6 - iter 2/13 - loss 0.00742966 - samples/sec: 23.42 - lr: 0.020000\n",
-      "2021-09-08 11:54:22,806 epoch 6 - iter 3/13 - loss 0.01249036 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 11:54:22,849 epoch 6 - iter 4/13 - loss 0.01019177 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:54:22,894 epoch 6 - iter 5/13 - loss 0.27674864 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:54:22,937 epoch 6 - iter 6/13 - loss 0.23245074 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:54:22,980 epoch 6 - iter 7/13 - loss 0.20069116 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,025 epoch 6 - iter 8/13 - loss 0.20308249 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,071 epoch 6 - iter 9/13 - loss 0.18474461 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,113 epoch 6 - iter 10/13 - loss 0.16746702 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,159 epoch 6 - iter 11/13 - loss 0.21716762 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,201 epoch 6 - iter 12/13 - loss 0.19928810 - samples/sec: 23.64 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,248 epoch 6 - iter 13/13 - loss 0.26641345 - samples/sec: 21.89 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,249 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:23,249 EPOCH 6 done: loss 0.2664 - lr 0.0200000\n",
-      "2021-09-08 11:54:23,411 DEV : loss 0.541735053062439 - score 0.0\n",
-      "2021-09-08 11:54:23,412 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:54:23,507 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:23,569 epoch 7 - iter 1/13 - loss 0.91443557 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,612 epoch 7 - iter 2/13 - loss 0.46288704 - samples/sec: 23.45 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,658 epoch 7 - iter 3/13 - loss 0.37358434 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,703 epoch 7 - iter 4/13 - loss 0.29395289 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,750 epoch 7 - iter 5/13 - loss 0.27396752 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,796 epoch 7 - iter 6/13 - loss 0.24915699 - samples/sec: 21.88 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,839 epoch 7 - iter 7/13 - loss 0.21631896 - samples/sec: 23.62 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,881 epoch 7 - iter 8/13 - loss 0.19011319 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,923 epoch 7 - iter 9/13 - loss 0.16939604 - samples/sec: 24.08 - lr: 0.020000\n",
-      "2021-09-08 11:54:23,967 epoch 7 - iter 10/13 - loss 0.15292742 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,011 epoch 7 - iter 11/13 - loss 0.14098512 - samples/sec: 22.82 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,054 epoch 7 - iter 12/13 - loss 0.12956771 - samples/sec: 23.62 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,097 epoch 7 - iter 13/13 - loss 0.12109394 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,098 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:24,098 EPOCH 7 done: loss 0.1211 - lr 0.0200000\n",
-      "2021-09-08 11:54:24,469 DEV : loss 0.5317767262458801 - score 0.0\n",
-      "2021-09-08 11:54:24,470 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:54:24,549 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:24,610 epoch 8 - iter 1/13 - loss 0.14148520 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,653 epoch 8 - iter 2/13 - loss 0.07254499 - samples/sec: 23.63 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,698 epoch 8 - iter 3/13 - loss 0.58849279 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,740 epoch 8 - iter 4/13 - loss 0.44178843 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,783 epoch 8 - iter 5/13 - loss 0.35361617 - samples/sec: 23.71 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,825 epoch 8 - iter 6/13 - loss 0.29532740 - samples/sec: 24.16 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,867 epoch 8 - iter 7/13 - loss 0.25337307 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,909 epoch 8 - iter 8/13 - loss 0.22307766 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,955 epoch 8 - iter 9/13 - loss 0.22781016 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:54:24,997 epoch 8 - iter 10/13 - loss 0.20515615 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 11:54:25,042 epoch 8 - iter 11/13 - loss 0.24918505 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 11:54:25,088 epoch 8 - iter 12/13 - loss 0.24626195 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:54:25,133 epoch 8 - iter 13/13 - loss 0.33689670 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 11:54:25,134 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:25,134 EPOCH 8 done: loss 0.3369 - lr 0.0200000\n",
-      "2021-09-08 11:54:27,824 DEV : loss 0.6738012433052063 - score 0.0\n",
-      "2021-09-08 11:54:27,824 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:54:27,828 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:27,887 epoch 9 - iter 1/13 - loss 0.04220434 - samples/sec: 22.91 - lr: 0.020000\n",
-      "2021-09-08 11:54:27,931 epoch 9 - iter 2/13 - loss 0.02817556 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 11:54:27,975 epoch 9 - iter 3/13 - loss 0.02522640 - samples/sec: 23.45 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,021 epoch 9 - iter 4/13 - loss 0.02574630 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,068 epoch 9 - iter 5/13 - loss 0.23942660 - samples/sec: 21.46 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,113 epoch 9 - iter 6/13 - loss 0.19961939 - samples/sec: 22.63 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,157 epoch 9 - iter 7/13 - loss 0.17174735 - samples/sec: 22.71 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,201 epoch 9 - iter 8/13 - loss 0.15228780 - samples/sec: 23.02 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,245 epoch 9 - iter 9/13 - loss 0.13556379 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,289 epoch 9 - iter 10/13 - loss 0.12225865 - samples/sec: 23.28 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,333 epoch 9 - iter 11/13 - loss 0.11121303 - samples/sec: 22.76 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,377 epoch 9 - iter 12/13 - loss 0.10198021 - samples/sec: 22.92 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,424 epoch 9 - iter 13/13 - loss 0.11422016 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 11:54:28,426 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:28,426 EPOCH 9 done: loss 0.1142 - lr 0.0200000\n",
-      "2021-09-08 11:54:28,489 DEV : loss 0.14304953813552856 - score 0.0\n",
-      "2021-09-08 11:54:28,490 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:54:33,440 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:33,503 epoch 10 - iter 1/13 - loss 0.09075239 - samples/sec: 21.52 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,549 epoch 10 - iter 2/13 - loss 0.81550212 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,591 epoch 10 - iter 3/13 - loss 0.54444988 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,637 epoch 10 - iter 4/13 - loss 0.47733760 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,679 epoch 10 - iter 5/13 - loss 0.38205569 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,722 epoch 10 - iter 6/13 - loss 0.31860478 - samples/sec: 23.62 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,764 epoch 10 - iter 7/13 - loss 0.27321692 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,806 epoch 10 - iter 8/13 - loss 0.23918849 - samples/sec: 23.56 - lr: 0.020000\n"
+      "2021-09-21 20:29:36,361 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:36,515 epoch 5 - iter 1/13 - loss 0.00117128 - samples/sec: 14.87 - lr: 0.020000\n",
+      "2021-09-21 20:29:36,594 epoch 5 - iter 2/13 - loss 0.00121858 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 20:29:36,664 epoch 5 - iter 3/13 - loss 0.28453080 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 20:29:36,765 epoch 5 - iter 4/13 - loss 0.23429858 - samples/sec: 9.98 - lr: 0.020000\n",
+      "2021-09-21 20:29:36,835 epoch 5 - iter 5/13 - loss 0.18769497 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 20:29:36,897 epoch 5 - iter 6/13 - loss 0.15676992 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 20:29:36,964 epoch 5 - iter 7/13 - loss 0.14329982 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,025 epoch 5 - iter 8/13 - loss 0.23885626 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,113 epoch 5 - iter 9/13 - loss 0.21715038 - samples/sec: 11.36 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,192 epoch 5 - iter 10/13 - loss 0.20120940 - samples/sec: 12.75 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,254 epoch 5 - iter 11/13 - loss 0.18814798 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,319 epoch 5 - iter 12/13 - loss 0.17266711 - samples/sec: 15.33 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,381 epoch 5 - iter 13/13 - loss 0.19365083 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,382 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:37,383 EPOCH 5 done: loss 0.1937 - lr 0.0200000\n",
+      "2021-09-21 20:29:37,521 DEV : loss 0.8875477910041809 - score 0.0\n",
+      "2021-09-21 20:29:37,524 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:29:37,526 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:37,687 epoch 6 - iter 1/13 - loss 0.02860545 - samples/sec: 11.68 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,761 epoch 6 - iter 2/13 - loss 0.01661187 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,833 epoch 6 - iter 3/13 - loss 0.01648491 - samples/sec: 13.85 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,891 epoch 6 - iter 4/13 - loss 0.01375282 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 20:29:37,961 epoch 6 - iter 5/13 - loss 0.01137190 - samples/sec: 14.39 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,008 epoch 6 - iter 6/13 - loss 0.00967486 - samples/sec: 21.23 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,077 epoch 6 - iter 7/13 - loss 0.00905140 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,151 epoch 6 - iter 8/13 - loss 0.00947996 - samples/sec: 13.66 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,211 epoch 6 - iter 9/13 - loss 0.00855314 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,290 epoch 6 - iter 10/13 - loss 0.11789505 - samples/sec: 12.77 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,370 epoch 6 - iter 11/13 - loss 0.10722930 - samples/sec: 12.53 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,438 epoch 6 - iter 12/13 - loss 0.09835119 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,503 epoch 6 - iter 13/13 - loss 0.09084029 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,503 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:38,504 EPOCH 6 done: loss 0.0908 - lr 0.0200000\n",
+      "2021-09-21 20:29:38,571 DEV : loss 0.6047799587249756 - score 0.0\n",
+      "2021-09-21 20:29:38,575 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:29:38,577 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:38,767 epoch 7 - iter 1/13 - loss 0.00219397 - samples/sec: 15.66 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,829 epoch 7 - iter 2/13 - loss 0.00441695 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,903 epoch 7 - iter 3/13 - loss 0.00306007 - samples/sec: 13.67 - lr: 0.020000\n",
+      "2021-09-21 20:29:38,972 epoch 7 - iter 4/13 - loss 0.05930018 - samples/sec: 14.41 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,037 epoch 7 - iter 5/13 - loss 0.23311632 - samples/sec: 15.51 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,097 epoch 7 - iter 6/13 - loss 0.19446806 - samples/sec: 16.78 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,166 epoch 7 - iter 7/13 - loss 0.16694741 - samples/sec: 14.56 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,243 epoch 7 - iter 8/13 - loss 0.14632535 - samples/sec: 13.07 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,317 epoch 7 - iter 9/13 - loss 0.13058356 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,370 epoch 7 - iter 10/13 - loss 0.11756819 - samples/sec: 18.96 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,428 epoch 7 - iter 11/13 - loss 0.10693958 - samples/sec: 17.26 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,485 epoch 7 - iter 12/13 - loss 0.09814730 - samples/sec: 17.83 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,554 epoch 7 - iter 13/13 - loss 0.09494751 - samples/sec: 14.52 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,555 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:39,555 EPOCH 7 done: loss 0.0949 - lr 0.0200000\n",
+      "2021-09-21 20:29:39,596 DEV : loss 0.5369085073471069 - score 0.0\n",
+      "2021-09-21 20:29:39,601 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:29:39,603 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:39,799 epoch 8 - iter 1/13 - loss 0.00134355 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,876 epoch 8 - iter 2/13 - loss 0.04381601 - samples/sec: 13.12 - lr: 0.020000\n",
+      "2021-09-21 20:29:39,950 epoch 8 - iter 3/13 - loss 0.05572489 - samples/sec: 13.68 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,008 epoch 8 - iter 4/13 - loss 0.04189451 - samples/sec: 17.36 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,067 epoch 8 - iter 5/13 - loss 0.43193918 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,126 epoch 8 - iter 6/13 - loss 0.36004661 - samples/sec: 17.29 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,190 epoch 8 - iter 7/13 - loss 0.31034267 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,250 epoch 8 - iter 8/13 - loss 0.27177502 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,335 epoch 8 - iter 9/13 - loss 0.24164824 - samples/sec: 11.79 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,396 epoch 8 - iter 10/13 - loss 0.21865787 - samples/sec: 16.72 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,466 epoch 8 - iter 11/13 - loss 0.21133652 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,541 epoch 8 - iter 12/13 - loss 0.19376535 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,624 epoch 8 - iter 13/13 - loss 0.17892033 - samples/sec: 12.03 - lr: 0.020000\n",
+      "2021-09-21 20:29:40,625 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:40,626 EPOCH 8 done: loss 0.1789 - lr 0.0200000\n",
+      "2021-09-21 20:29:40,663 DEV : loss 0.4148842692375183 - score 0.0\n",
+      "Epoch     8: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:29:40,666 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:29:40,667 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:40,815 epoch 9 - iter 1/13 - loss 0.00170316 - samples/sec: 12.33 - lr: 0.010000\n",
+      "2021-09-21 20:29:40,893 epoch 9 - iter 2/13 - loss 1.75933499 - samples/sec: 12.89 - lr: 0.010000\n",
+      "2021-09-21 20:29:40,951 epoch 9 - iter 3/13 - loss 1.17362977 - samples/sec: 17.24 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,017 epoch 9 - iter 4/13 - loss 0.88047940 - samples/sec: 15.16 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,089 epoch 9 - iter 5/13 - loss 0.71240131 - samples/sec: 14.13 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,150 epoch 9 - iter 6/13 - loss 0.59377080 - samples/sec: 16.31 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,211 epoch 9 - iter 7/13 - loss 0.50906915 - samples/sec: 16.66 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,282 epoch 9 - iter 8/13 - loss 0.44550297 - samples/sec: 14.07 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,365 epoch 9 - iter 9/13 - loss 0.39614042 - samples/sec: 12.08 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,420 epoch 9 - iter 10/13 - loss 0.35657857 - samples/sec: 18.37 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,482 epoch 9 - iter 11/13 - loss 0.32426144 - samples/sec: 16.25 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,562 epoch 9 - iter 12/13 - loss 0.29777191 - samples/sec: 12.58 - lr: 0.010000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:33,849 epoch 10 - iter 9/13 - loss 0.21274323 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,892 epoch 10 - iter 10/13 - loss 0.19172126 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,934 epoch 10 - iter 11/13 - loss 0.17440302 - samples/sec: 24.08 - lr: 0.020000\n",
-      "2021-09-08 11:54:33,979 epoch 10 - iter 12/13 - loss 0.29440876 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 11:54:34,022 epoch 10 - iter 13/13 - loss 0.27185184 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 11:54:34,023 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:34,023 EPOCH 10 done: loss 0.2719 - lr 0.0200000\n",
-      "2021-09-08 11:54:34,053 DEV : loss 0.44742414355278015 - score 0.0\n",
-      "2021-09-08 11:54:34,054 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:54:38,267 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:38,268 Testing using best model ...\n",
-      "2021-09-08 11:54:38,269 loading file None1/best-model.pt\n",
+      "2021-09-21 20:29:41,631 epoch 9 - iter 13/13 - loss 0.27494007 - samples/sec: 14.56 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,632 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:41,633 EPOCH 9 done: loss 0.2749 - lr 0.0100000\n",
+      "2021-09-21 20:29:41,719 DEV : loss 0.529263973236084 - score 0.0\n",
+      "2021-09-21 20:29:41,721 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:29:41,723 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:41,876 epoch 10 - iter 1/13 - loss 0.01333893 - samples/sec: 12.30 - lr: 0.010000\n",
+      "2021-09-21 20:29:41,943 epoch 10 - iter 2/13 - loss 0.04866963 - samples/sec: 15.08 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,015 epoch 10 - iter 3/13 - loss 0.03322769 - samples/sec: 13.93 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,091 epoch 10 - iter 4/13 - loss 0.02500075 - samples/sec: 13.20 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,152 epoch 10 - iter 5/13 - loss 0.02005992 - samples/sec: 16.42 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,207 epoch 10 - iter 6/13 - loss 0.01676995 - samples/sec: 18.47 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,283 epoch 10 - iter 7/13 - loss 0.01444351 - samples/sec: 13.24 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,347 epoch 10 - iter 8/13 - loss 0.02905701 - samples/sec: 15.68 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,422 epoch 10 - iter 9/13 - loss 0.02817029 - samples/sec: 13.38 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,483 epoch 10 - iter 10/13 - loss 0.02542701 - samples/sec: 16.78 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,550 epoch 10 - iter 11/13 - loss 0.10021150 - samples/sec: 14.90 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,650 epoch 10 - iter 12/13 - loss 0.11770042 - samples/sec: 10.10 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,703 epoch 10 - iter 13/13 - loss 0.10916441 - samples/sec: 18.90 - lr: 0.010000\n",
+      "2021-09-21 20:29:42,704 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:42,704 EPOCH 10 done: loss 0.1092 - lr 0.0100000\n",
+      "2021-09-21 20:29:42,765 DEV : loss 0.29615017771720886 - score 0.0\n",
+      "2021-09-21 20:29:42,770 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:29:46,703 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:29:46,703 Testing using best model ...\n",
+      "2021-09-21 20:29:46,705 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:54:43,509 \t1.0\n",
-      "2021-09-08 11:54:43,510 \n",
+      "2021-09-21 20:29:51,692 \t0.0\n",
+      "2021-09-21 20:29:51,693 \n",
       "Results:\n",
-      "- F-score (micro) 1.0\n",
-      "- F-score (macro) 0.1333\n",
-      "- Accuracy 1.0\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                               precision    recall  f1-score   support\n",
@@ -4321,57 +4336,44 @@
       "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
       "                this text is about technology     0.0000    0.0000    0.0000         0\n",
       "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about women     0.0000    0.0000    0.0000         0\n",
       "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
       "                  this text is about business     0.0000    0.0000    0.0000         0\n",
       "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
       "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
       "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
+      "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
       "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
       "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about science     1.0000    1.0000    1.0000         1\n",
-      "                     this text is about crime     1.0000    1.0000    1.0000         1\n",
+      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
+      "                     this text is about women     0.0000    0.0000    0.0000         1\n",
+      "                   this text is about divorce     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                    micro avg     1.0000    1.0000    1.0000         2\n",
-      "                                    macro avg     0.1333    0.1333    0.1333         2\n",
-      "                                 weighted avg     1.0000    1.0000    1.0000         2\n",
-      "                                  samples avg     1.0000    1.0000    1.0000         2\n",
+      "                                    micro avg     0.0000    0.0000    0.0000         2\n",
+      "                                    macro avg     0.0000    0.0000    0.0000         2\n",
+      "                                 weighted avg     0.0000    0.0000    0.0000         2\n",
+      "                                  samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 11:54:43,510 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:51,512 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:29:51,693 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:00,671 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:54:55,812 Computing label dictionary. Progress:\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 50819.52it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2021-09-08 11:54:55,814 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about business', b'this text is about comedy']\n"
+      "2021-09-21 20:30:04,826 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "100%|██████████| 15/15 [00:00<00:00, 27389.88it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:56,295 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,297 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:30:04,829 [b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about science', b'this text is about divorce', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy', b'this text is about travel', b'this text is about crime']\n",
+      "2021-09-21 20:30:04,839 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,841 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -4684,275 +4686,289 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:54:56,298 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,298 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:54:56,298 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,299 Parameters:\n",
-      "2021-09-08 11:54:56,299  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:54:56,299  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:54:56,299  - patience: \"3\"\n",
-      "2021-09-08 11:54:56,300  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:54:56,300  - max_epochs: \"10\"\n",
-      "2021-09-08 11:54:56,300  - shuffle: \"True\"\n",
-      "2021-09-08 11:54:56,300  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:54:56,301  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:54:56,301 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,301 Model training base path: \"None1\"\n",
-      "2021-09-08 11:54:56,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,302 Device: cuda:0\n",
-      "2021-09-08 11:54:56,302 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,302 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:54:56,314 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,368 epoch 1 - iter 1/13 - loss 1.18687713 - samples/sec: 26.12 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,414 epoch 1 - iter 2/13 - loss 0.63737800 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,460 epoch 1 - iter 3/13 - loss 0.70102223 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,505 epoch 1 - iter 4/13 - loss 0.63787348 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,551 epoch 1 - iter 5/13 - loss 0.62811162 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,597 epoch 1 - iter 6/13 - loss 0.95348840 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,642 epoch 1 - iter 7/13 - loss 0.96020109 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,688 epoch 1 - iter 8/13 - loss 0.96485877 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,734 epoch 1 - iter 9/13 - loss 0.89499416 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,779 epoch 1 - iter 10/13 - loss 0.88844609 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,824 epoch 1 - iter 11/13 - loss 0.84201942 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,871 epoch 1 - iter 12/13 - loss 0.80889334 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,918 epoch 1 - iter 13/13 - loss 0.76735026 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 11:54:56,919 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:54:56,919 EPOCH 1 done: loss 0.7674 - lr 0.0200000\n",
-      "2021-09-08 11:54:56,952 DEV : loss 0.1687483787536621 - score 0.0\n",
-      "2021-09-08 11:54:56,953 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:30:04,842 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,842 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:30:04,843 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,843 Parameters:\n",
+      "2021-09-21 20:30:04,843  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:30:04,844  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:30:04,845  - patience: \"3\"\n",
+      "2021-09-21 20:30:04,845  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:30:04,846  - max_epochs: \"10\"\n",
+      "2021-09-21 20:30:04,846  - shuffle: \"True\"\n",
+      "2021-09-21 20:30:04,847  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:30:04,847  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:30:04,848 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,848 Model training base path: \"None1\"\n",
+      "2021-09-21 20:30:04,849 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,849 Device: cuda:0\n",
+      "2021-09-21 20:30:04,850 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,850 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:30:04,858 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:04,984 epoch 1 - iter 1/13 - loss 0.91418093 - samples/sec: 20.44 - lr: 0.020000\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2021-09-21 20:30:05,046 epoch 1 - iter 2/13 - loss 1.06619433 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,110 epoch 1 - iter 3/13 - loss 0.83549628 - samples/sec: 15.79 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,173 epoch 1 - iter 4/13 - loss 0.71224752 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,244 epoch 1 - iter 5/13 - loss 0.76609007 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,308 epoch 1 - iter 6/13 - loss 0.71640889 - samples/sec: 15.80 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,370 epoch 1 - iter 7/13 - loss 0.70915903 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,418 epoch 1 - iter 8/13 - loss 0.63718438 - samples/sec: 21.17 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,465 epoch 1 - iter 9/13 - loss 0.61690731 - samples/sec: 21.23 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,510 epoch 1 - iter 10/13 - loss 0.55844989 - samples/sec: 22.39 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,554 epoch 1 - iter 11/13 - loss 0.50888779 - samples/sec: 23.10 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,599 epoch 1 - iter 12/13 - loss 0.46731828 - samples/sec: 22.73 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,649 epoch 1 - iter 13/13 - loss 0.47420386 - samples/sec: 20.06 - lr: 0.020000\n",
+      "2021-09-21 20:30:05,650 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:05,650 EPOCH 1 done: loss 0.4742 - lr 0.0200000\n",
+      "2021-09-21 20:30:05,781 DEV : loss 1.1194108724594116 - score 0.0\n",
+      "2021-09-21 20:30:05,781 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:30:16,019 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:16,124 epoch 2 - iter 1/13 - loss 0.00270033 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,189 epoch 2 - iter 2/13 - loss 0.02458989 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,255 epoch 2 - iter 3/13 - loss 0.03155989 - samples/sec: 15.32 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,310 epoch 2 - iter 4/13 - loss 0.03397688 - samples/sec: 18.36 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,375 epoch 2 - iter 5/13 - loss 0.03089907 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,427 epoch 2 - iter 6/13 - loss 0.12726256 - samples/sec: 19.68 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,484 epoch 2 - iter 7/13 - loss 0.35678577 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,552 epoch 2 - iter 8/13 - loss 0.31899439 - samples/sec: 14.81 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,606 epoch 2 - iter 9/13 - loss 0.28477794 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,665 epoch 2 - iter 10/13 - loss 0.26898447 - samples/sec: 17.23 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,726 epoch 2 - iter 11/13 - loss 0.39722154 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,790 epoch 2 - iter 12/13 - loss 0.36689871 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,846 epoch 2 - iter 13/13 - loss 0.33907437 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 20:30:16,847 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:16,847 EPOCH 2 done: loss 0.3391 - lr 0.0200000\n",
+      "2021-09-21 20:30:16,903 DEV : loss 0.7910333871841431 - score 0.0\n",
+      "2021-09-21 20:30:16,904 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:55:06,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:06,333 epoch 2 - iter 1/13 - loss 0.31741449 - samples/sec: 21.39 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,379 epoch 2 - iter 2/13 - loss 0.56984909 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,425 epoch 2 - iter 3/13 - loss 0.58890697 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,470 epoch 2 - iter 4/13 - loss 0.49487920 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,516 epoch 2 - iter 5/13 - loss 0.61397602 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,561 epoch 2 - iter 6/13 - loss 0.54788342 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,607 epoch 2 - iter 7/13 - loss 0.57961335 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,653 epoch 2 - iter 8/13 - loss 0.57385773 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,698 epoch 2 - iter 9/13 - loss 0.51454084 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,744 epoch 2 - iter 10/13 - loss 0.54552024 - samples/sec: 22.00 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,786 epoch 2 - iter 11/13 - loss 0.49935254 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,832 epoch 2 - iter 12/13 - loss 0.49643341 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,878 epoch 2 - iter 13/13 - loss 0.48239749 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:55:06,879 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:06,879 EPOCH 2 done: loss 0.4824 - lr 0.0200000\n",
-      "2021-09-08 11:55:06,909 DEV : loss 0.15598516166210175 - score 0.0\n",
-      "2021-09-08 11:55:06,909 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:30:25,325 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:25,389 epoch 3 - iter 1/13 - loss 0.00715737 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,438 epoch 3 - iter 2/13 - loss 0.38937240 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,486 epoch 3 - iter 3/13 - loss 0.86516289 - samples/sec: 20.77 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,534 epoch 3 - iter 4/13 - loss 0.81953080 - samples/sec: 21.18 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,582 epoch 3 - iter 5/13 - loss 0.73490520 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,631 epoch 3 - iter 6/13 - loss 0.62163252 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,675 epoch 3 - iter 7/13 - loss 0.53455369 - samples/sec: 22.51 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,723 epoch 3 - iter 8/13 - loss 0.47654599 - samples/sec: 21.07 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,767 epoch 3 - iter 9/13 - loss 0.42380416 - samples/sec: 23.11 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,814 epoch 3 - iter 10/13 - loss 0.38346311 - samples/sec: 21.43 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,861 epoch 3 - iter 11/13 - loss 0.40440870 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,905 epoch 3 - iter 12/13 - loss 0.37100828 - samples/sec: 22.58 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,953 epoch 3 - iter 13/13 - loss 0.37645045 - samples/sec: 21.11 - lr: 0.020000\n",
+      "2021-09-21 20:30:25,954 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:25,955 EPOCH 3 done: loss 0.3765 - lr 0.0200000\n",
+      "2021-09-21 20:30:27,668 DEV : loss 0.8610376119613647 - score 0.0\n",
+      "2021-09-21 20:30:27,669 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:27,739 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:27,837 epoch 4 - iter 1/13 - loss 0.18233864 - samples/sec: 13.36 - lr: 0.020000\n",
+      "2021-09-21 20:30:27,895 epoch 4 - iter 2/13 - loss 1.99483200 - samples/sec: 17.40 - lr: 0.020000\n",
+      "2021-09-21 20:30:27,950 epoch 4 - iter 3/13 - loss 1.33060350 - samples/sec: 18.13 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,005 epoch 4 - iter 4/13 - loss 0.99897181 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,056 epoch 4 - iter 5/13 - loss 0.86670937 - samples/sec: 19.94 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,108 epoch 4 - iter 6/13 - loss 0.72368633 - samples/sec: 19.43 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,161 epoch 4 - iter 7/13 - loss 0.96333847 - samples/sec: 18.97 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,213 epoch 4 - iter 8/13 - loss 0.84768701 - samples/sec: 19.39 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,269 epoch 4 - iter 9/13 - loss 0.77678238 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,318 epoch 4 - iter 10/13 - loss 0.70054426 - samples/sec: 20.82 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,376 epoch 4 - iter 11/13 - loss 0.69064119 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,424 epoch 4 - iter 12/13 - loss 0.63339086 - samples/sec: 21.37 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,484 epoch 4 - iter 13/13 - loss 0.58535339 - samples/sec: 16.83 - lr: 0.020000\n",
+      "2021-09-21 20:30:28,485 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:28,485 EPOCH 4 done: loss 0.5854 - lr 0.0200000\n",
+      "2021-09-21 20:30:29,224 DEV : loss 0.22268347442150116 - score 0.0\n",
+      "2021-09-21 20:30:29,225 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:55:14,218 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:14,282 epoch 3 - iter 1/13 - loss 0.08506449 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,328 epoch 3 - iter 2/13 - loss 0.20088702 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,373 epoch 3 - iter 3/13 - loss 0.41339361 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,419 epoch 3 - iter 4/13 - loss 0.33133956 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,462 epoch 3 - iter 5/13 - loss 0.26631962 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,507 epoch 3 - iter 6/13 - loss 0.29776197 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,553 epoch 3 - iter 7/13 - loss 0.34624846 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,596 epoch 3 - iter 8/13 - loss 0.30642777 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,641 epoch 3 - iter 9/13 - loss 0.30150068 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,687 epoch 3 - iter 10/13 - loss 0.29453236 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,729 epoch 3 - iter 11/13 - loss 0.26811475 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,775 epoch 3 - iter 12/13 - loss 0.27847879 - samples/sec: 21.94 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,820 epoch 3 - iter 13/13 - loss 0.27040050 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:55:14,821 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:14,821 EPOCH 3 done: loss 0.2704 - lr 0.0200000\n",
-      "2021-09-08 11:55:16,264 DEV : loss 0.26229187846183777 - score 0.0\n",
-      "2021-09-08 11:55:16,264 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:55:16,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:16,327 epoch 4 - iter 1/13 - loss 0.00870038 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,371 epoch 4 - iter 2/13 - loss 0.00860494 - samples/sec: 23.32 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,413 epoch 4 - iter 3/13 - loss 0.01067878 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,459 epoch 4 - iter 4/13 - loss 0.07989724 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,505 epoch 4 - iter 5/13 - loss 0.08275659 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,551 epoch 4 - iter 6/13 - loss 0.07615659 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,593 epoch 4 - iter 7/13 - loss 0.06555589 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,639 epoch 4 - iter 8/13 - loss 0.10126706 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,682 epoch 4 - iter 9/13 - loss 0.09026080 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,727 epoch 4 - iter 10/13 - loss 0.18255852 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,773 epoch 4 - iter 11/13 - loss 0.22818158 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,819 epoch 4 - iter 12/13 - loss 0.24502021 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,861 epoch 4 - iter 13/13 - loss 0.22644965 - samples/sec: 23.82 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,862 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:16,862 EPOCH 4 done: loss 0.2264 - lr 0.0200000\n",
-      "2021-09-08 11:55:16,892 DEV : loss 0.27208107709884644 - score 0.0\n",
-      "2021-09-08 11:55:16,893 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:55:16,895 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:30:34,675 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:34,799 epoch 5 - iter 1/13 - loss 0.00518670 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 20:30:34,861 epoch 5 - iter 2/13 - loss 0.43210296 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 20:30:34,917 epoch 5 - iter 3/13 - loss 0.31908222 - samples/sec: 18.25 - lr: 0.020000\n",
+      "2021-09-21 20:30:34,965 epoch 5 - iter 4/13 - loss 0.24015148 - samples/sec: 20.84 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,023 epoch 5 - iter 5/13 - loss 0.19337560 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,068 epoch 5 - iter 6/13 - loss 0.16279788 - samples/sec: 22.59 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,112 epoch 5 - iter 7/13 - loss 0.13962505 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,160 epoch 5 - iter 8/13 - loss 0.13622529 - samples/sec: 21.37 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,213 epoch 5 - iter 9/13 - loss 0.12251566 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,276 epoch 5 - iter 10/13 - loss 0.11292255 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,342 epoch 5 - iter 11/13 - loss 0.10269223 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,402 epoch 5 - iter 12/13 - loss 0.10125176 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,453 epoch 5 - iter 13/13 - loss 0.09424784 - samples/sec: 19.82 - lr: 0.020000\n",
+      "2021-09-21 20:30:35,454 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:55:16,952 epoch 5 - iter 1/13 - loss 0.02515281 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:55:16,998 epoch 5 - iter 2/13 - loss 0.57147503 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,043 epoch 5 - iter 3/13 - loss 1.07170320 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,086 epoch 5 - iter 4/13 - loss 0.81883438 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,128 epoch 5 - iter 5/13 - loss 0.65778094 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,174 epoch 5 - iter 6/13 - loss 0.58154368 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,216 epoch 5 - iter 7/13 - loss 0.50249461 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,259 epoch 5 - iter 8/13 - loss 0.44004819 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,305 epoch 5 - iter 9/13 - loss 0.39799536 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,350 epoch 5 - iter 10/13 - loss 0.36719236 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,395 epoch 5 - iter 11/13 - loss 0.33451404 - samples/sec: 22.69 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,440 epoch 5 - iter 12/13 - loss 0.38479073 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,486 epoch 5 - iter 13/13 - loss 0.36012004 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:55:17,487 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:17,487 EPOCH 5 done: loss 0.3601 - lr 0.0200000\n",
-      "2021-09-08 11:55:17,516 DEV : loss 0.12338437139987946 - score 0.0\n",
-      "2021-09-08 11:55:17,517 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:30:35,454 EPOCH 5 done: loss 0.0942 - lr 0.0200000\n",
+      "2021-09-21 20:30:35,944 DEV : loss 0.2172025889158249 - score 0.0\n",
+      "2021-09-21 20:30:35,945 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:55:23,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:23,441 epoch 6 - iter 1/13 - loss 0.21123393 - samples/sec: 21.40 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,484 epoch 6 - iter 2/13 - loss 0.10599612 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,526 epoch 6 - iter 3/13 - loss 0.07414466 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,569 epoch 6 - iter 4/13 - loss 0.06292635 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,611 epoch 6 - iter 5/13 - loss 0.05156651 - samples/sec: 24.10 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,653 epoch 6 - iter 6/13 - loss 0.04345748 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,699 epoch 6 - iter 7/13 - loss 0.32834810 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,741 epoch 6 - iter 8/13 - loss 0.28748671 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,787 epoch 6 - iter 9/13 - loss 0.29499297 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,832 epoch 6 - iter 10/13 - loss 0.28457149 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,877 epoch 6 - iter 11/13 - loss 0.26415753 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,924 epoch 6 - iter 12/13 - loss 0.24459270 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,966 epoch 6 - iter 13/13 - loss 0.22597143 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:55:23,967 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:23,968 EPOCH 6 done: loss 0.2260 - lr 0.0200000\n",
-      "2021-09-08 11:55:24,248 DEV : loss 0.30443963408470154 - score 0.0\n",
-      "2021-09-08 11:55:24,249 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:55:24,264 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:24,325 epoch 7 - iter 1/13 - loss 1.19541490 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,368 epoch 7 - iter 2/13 - loss 0.59979893 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,414 epoch 7 - iter 3/13 - loss 0.40857314 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,456 epoch 7 - iter 4/13 - loss 0.30747379 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,498 epoch 7 - iter 5/13 - loss 0.24722030 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,544 epoch 7 - iter 6/13 - loss 0.31641808 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,586 epoch 7 - iter 7/13 - loss 0.27135708 - samples/sec: 23.63 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,632 epoch 7 - iter 8/13 - loss 0.24146181 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,674 epoch 7 - iter 9/13 - loss 0.21492331 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,716 epoch 7 - iter 10/13 - loss 0.19420019 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,759 epoch 7 - iter 11/13 - loss 0.17660731 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,804 epoch 7 - iter 12/13 - loss 0.17350408 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,850 epoch 7 - iter 13/13 - loss 0.20095184 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 11:55:24,851 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:24,851 EPOCH 7 done: loss 0.2010 - lr 0.0200000\n",
-      "2021-09-08 11:55:25,872 DEV : loss 0.16284450888633728 - score 0.0\n",
-      "2021-09-08 11:55:25,873 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:55:25,877 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:25,935 epoch 8 - iter 1/13 - loss 0.00076831 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:55:25,983 epoch 8 - iter 2/13 - loss 0.00063931 - samples/sec: 20.87 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,032 epoch 8 - iter 3/13 - loss 0.01911254 - samples/sec: 20.75 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,079 epoch 8 - iter 4/13 - loss 0.01439967 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,128 epoch 8 - iter 5/13 - loss 0.04892816 - samples/sec: 20.38 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,174 epoch 8 - iter 6/13 - loss 0.04090766 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,223 epoch 8 - iter 7/13 - loss 0.29930975 - samples/sec: 21.02 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,269 epoch 8 - iter 8/13 - loss 0.42834160 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,317 epoch 8 - iter 9/13 - loss 0.38289864 - samples/sec: 20.95 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,364 epoch 8 - iter 10/13 - loss 0.34482635 - samples/sec: 21.58 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,413 epoch 8 - iter 11/13 - loss 0.31625019 - samples/sec: 20.49 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,460 epoch 8 - iter 12/13 - loss 0.29219934 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,507 epoch 8 - iter 13/13 - loss 0.26976064 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,508 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:26,508 EPOCH 8 done: loss 0.2698 - lr 0.0200000\n",
-      "2021-09-08 11:55:26,642 DEV : loss 0.2745881676673889 - score 0.0\n",
-      "2021-09-08 11:55:26,643 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:55:26,720 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:26,779 epoch 9 - iter 1/13 - loss 0.00082529 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,822 epoch 9 - iter 2/13 - loss 0.00693596 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,865 epoch 9 - iter 3/13 - loss 0.00478575 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,907 epoch 9 - iter 4/13 - loss 0.00579901 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,950 epoch 9 - iter 5/13 - loss 0.00627967 - samples/sec: 23.58 - lr: 0.020000\n",
-      "2021-09-08 11:55:26,992 epoch 9 - iter 6/13 - loss 0.00580985 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,035 epoch 9 - iter 7/13 - loss 0.00510148 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,077 epoch 9 - iter 8/13 - loss 0.00462076 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,123 epoch 9 - iter 9/13 - loss 0.00723245 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,165 epoch 9 - iter 10/13 - loss 0.00664537 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,208 epoch 9 - iter 11/13 - loss 0.00717135 - samples/sec: 23.49 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,254 epoch 9 - iter 12/13 - loss 0.00830417 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,299 epoch 9 - iter 13/13 - loss 0.00771792 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 11:55:27,300 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:30:42,357 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:42,450 epoch 6 - iter 1/13 - loss 0.00668896 - samples/sec: 18.64 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,508 epoch 6 - iter 2/13 - loss 0.06007740 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,559 epoch 6 - iter 3/13 - loss 0.04038541 - samples/sec: 19.78 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,615 epoch 6 - iter 4/13 - loss 0.03181937 - samples/sec: 18.05 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,681 epoch 6 - iter 5/13 - loss 0.02662026 - samples/sec: 15.31 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,739 epoch 6 - iter 6/13 - loss 0.02250599 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,791 epoch 6 - iter 7/13 - loss 0.01941896 - samples/sec: 19.24 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,844 epoch 6 - iter 8/13 - loss 0.01717634 - samples/sec: 19.04 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,899 epoch 6 - iter 9/13 - loss 0.01531419 - samples/sec: 18.55 - lr: 0.020000\n",
+      "2021-09-21 20:30:42,963 epoch 6 - iter 10/13 - loss 0.25119239 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:30:43,022 epoch 6 - iter 11/13 - loss 0.22876684 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 20:30:43,079 epoch 6 - iter 12/13 - loss 0.21672694 - samples/sec: 17.52 - lr: 0.020000\n",
+      "2021-09-21 20:30:43,138 epoch 6 - iter 13/13 - loss 0.20008265 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 20:30:43,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:43,140 EPOCH 6 done: loss 0.2001 - lr 0.0200000\n",
+      "2021-09-21 20:30:46,577 DEV : loss 0.3204837739467621 - score 0.0\n",
+      "2021-09-21 20:30:46,580 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:46,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:46,677 epoch 7 - iter 1/13 - loss 0.00326174 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 20:30:46,723 epoch 7 - iter 2/13 - loss 0.00609539 - samples/sec: 22.28 - lr: 0.020000\n",
+      "2021-09-21 20:30:46,768 epoch 7 - iter 3/13 - loss 0.00481979 - samples/sec: 22.17 - lr: 0.020000\n",
+      "2021-09-21 20:30:46,814 epoch 7 - iter 4/13 - loss 0.00392816 - samples/sec: 22.17 - lr: 0.020000\n",
+      "2021-09-21 20:30:46,867 epoch 7 - iter 5/13 - loss 0.00364667 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 20:30:46,935 epoch 7 - iter 6/13 - loss 0.00315511 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 20:30:46,995 epoch 7 - iter 7/13 - loss 0.00286034 - samples/sec: 17.07 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,053 epoch 7 - iter 8/13 - loss 0.00262559 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,116 epoch 7 - iter 9/13 - loss 0.07690131 - samples/sec: 15.90 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,185 epoch 7 - iter 10/13 - loss 0.07300456 - samples/sec: 14.64 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,241 epoch 7 - iter 11/13 - loss 0.06642095 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,290 epoch 7 - iter 12/13 - loss 0.06113068 - samples/sec: 20.63 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,354 epoch 7 - iter 13/13 - loss 0.05653000 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,355 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:47,356 EPOCH 7 done: loss 0.0565 - lr 0.0200000\n",
+      "2021-09-21 20:30:47,410 DEV : loss 0.22864967584609985 - score 0.0\n",
+      "2021-09-21 20:30:47,412 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:30:47,414 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:47,529 epoch 8 - iter 1/13 - loss 0.29815665 - samples/sec: 14.29 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,588 epoch 8 - iter 2/13 - loss 0.14955937 - samples/sec: 16.92 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,641 epoch 8 - iter 3/13 - loss 0.10015716 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,694 epoch 8 - iter 4/13 - loss 0.07541450 - samples/sec: 19.19 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,742 epoch 8 - iter 5/13 - loss 0.06189052 - samples/sec: 20.94 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,803 epoch 8 - iter 6/13 - loss 0.05171193 - samples/sec: 16.57 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,859 epoch 8 - iter 7/13 - loss 0.04443962 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,925 epoch 8 - iter 8/13 - loss 0.08480692 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 20:30:47,989 epoch 8 - iter 9/13 - loss 0.12442580 - samples/sec: 15.68 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,039 epoch 8 - iter 10/13 - loss 0.11209556 - samples/sec: 20.01 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,102 epoch 8 - iter 11/13 - loss 0.23243941 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,162 epoch 8 - iter 12/13 - loss 0.21327683 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,212 epoch 8 - iter 13/13 - loss 0.19691224 - samples/sec: 20.40 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,213 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:48,213 EPOCH 8 done: loss 0.1969 - lr 0.0200000\n",
+      "2021-09-21 20:30:48,292 DEV : loss 0.34130874276161194 - score 0.0\n",
+      "2021-09-21 20:30:48,294 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:30:48,296 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:48,394 epoch 9 - iter 1/13 - loss 0.00201994 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,445 epoch 9 - iter 2/13 - loss 0.00123016 - samples/sec: 19.70 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,509 epoch 9 - iter 3/13 - loss 0.00095122 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,573 epoch 9 - iter 4/13 - loss 0.00807691 - samples/sec: 15.54 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,634 epoch 9 - iter 5/13 - loss 0.00724143 - samples/sec: 16.53 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,689 epoch 9 - iter 6/13 - loss 0.00607715 - samples/sec: 18.35 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,760 epoch 9 - iter 7/13 - loss 0.00642904 - samples/sec: 14.15 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,816 epoch 9 - iter 8/13 - loss 0.00571263 - samples/sec: 18.06 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,871 epoch 9 - iter 9/13 - loss 0.01707675 - samples/sec: 18.56 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,937 epoch 9 - iter 10/13 - loss 0.02733730 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 20:30:48,998 epoch 9 - iter 11/13 - loss 0.02501668 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 20:30:49,063 epoch 9 - iter 12/13 - loss 0.11390202 - samples/sec: 15.57 - lr: 0.020000\n",
+      "2021-09-21 20:30:49,121 epoch 9 - iter 13/13 - loss 0.10596097 - samples/sec: 17.20 - lr: 0.020000\n",
+      "2021-09-21 20:30:49,122 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:49,123 EPOCH 9 done: loss 0.1060 - lr 0.0200000\n",
+      "2021-09-21 20:30:49,162 DEV : loss 0.13796785473823547 - score 0.0\n",
+      "2021-09-21 20:30:49,165 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:30:53,302 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:53,466 epoch 10 - iter 1/13 - loss 0.00084396 - samples/sec: 14.17 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,521 epoch 10 - iter 2/13 - loss 0.00143692 - samples/sec: 18.44 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,595 epoch 10 - iter 3/13 - loss 0.00120513 - samples/sec: 13.57 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,655 epoch 10 - iter 4/13 - loss 0.00315102 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,710 epoch 10 - iter 5/13 - loss 0.00262610 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,761 epoch 10 - iter 6/13 - loss 0.00246362 - samples/sec: 20.08 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,824 epoch 10 - iter 7/13 - loss 0.00229493 - samples/sec: 15.92 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,886 epoch 10 - iter 8/13 - loss 0.00213377 - samples/sec: 16.21 - lr: 0.020000\n",
+      "2021-09-21 20:30:53,946 epoch 10 - iter 9/13 - loss 0.00226716 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 20:30:54,020 epoch 10 - iter 10/13 - loss 0.00224681 - samples/sec: 13.59 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:55:27,300 EPOCH 9 done: loss 0.0077 - lr 0.0200000\n",
-      "2021-09-08 11:55:27,446 DEV : loss 0.013552281074225903 - score 0.0\n",
-      "2021-09-08 11:55:27,446 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:55:32,463 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:32,526 epoch 10 - iter 1/13 - loss 0.64796597 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,569 epoch 10 - iter 2/13 - loss 0.32430434 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,615 epoch 10 - iter 3/13 - loss 0.22809171 - samples/sec: 22.02 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,657 epoch 10 - iter 4/13 - loss 0.17122209 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,700 epoch 10 - iter 5/13 - loss 0.13834410 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,742 epoch 10 - iter 6/13 - loss 0.11534700 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,784 epoch 10 - iter 7/13 - loss 0.09894797 - samples/sec: 24.00 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,827 epoch 10 - iter 8/13 - loss 0.08681759 - samples/sec: 23.72 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,869 epoch 10 - iter 9/13 - loss 0.07727235 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,912 epoch 10 - iter 10/13 - loss 0.06957007 - samples/sec: 23.64 - lr: 0.020000\n",
-      "2021-09-08 11:55:32,954 epoch 10 - iter 11/13 - loss 0.06336176 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:55:33,000 epoch 10 - iter 12/13 - loss 0.07476402 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:55:33,045 epoch 10 - iter 13/13 - loss 0.11907240 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:55:33,046 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:33,047 EPOCH 10 done: loss 0.1191 - lr 0.0200000\n",
-      "2021-09-08 11:55:33,077 DEV : loss 0.4856320023536682 - score 0.0\n",
-      "2021-09-08 11:55:33,078 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:55:40,138 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:40,138 Testing using best model ...\n",
-      "2021-09-08 11:55:40,140 loading file None1/best-model.pt\n",
+      "2021-09-21 20:30:54,099 epoch 10 - iter 11/13 - loss 0.00303740 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 20:30:54,159 epoch 10 - iter 12/13 - loss 0.00281642 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 20:30:54,220 epoch 10 - iter 13/13 - loss 0.00263670 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 20:30:54,221 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:54,222 EPOCH 10 done: loss 0.0026 - lr 0.0200000\n",
+      "2021-09-21 20:30:54,261 DEV : loss 0.1789863109588623 - score 0.0\n",
+      "2021-09-21 20:30:54,263 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:30:58,399 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:30:58,399 Testing using best model ...\n",
+      "2021-09-21 20:30:58,401 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:55:45,707 \t0.5\n",
-      "2021-09-08 11:55:45,707 \n",
+      "2021-09-21 20:31:03,783 \t0.5\n",
+      "2021-09-21 20:31:03,783 \n",
       "Results:\n",
       "- F-score (micro) 0.5\n",
       "- F-score (macro) 0.0667\n",
       "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
-      "                                               precision    recall  f1-score   support\n",
+      "                               precision    recall  f1-score   support\n",
       "\n",
-      "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
-      "                this text is about technology     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
-      "                     this text is about women     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
-      "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     1.0000    1.0000    1.0000         1\n",
-      "                    this text is about comedy     0.0000    0.0000    0.0000         1\n",
+      "this text is about technology     0.0000    0.0000    0.0000         0\n",
+      "  this text is about wellness     0.0000    0.0000    0.0000         0\n",
+      "     this text is about women     0.0000    0.0000    0.0000         0\n",
+      "   this text is about parents     0.0000    0.0000    0.0000         0\n",
+      "  this text is about business     0.0000    0.0000    0.0000         0\n",
+      "  this text is about weddings     0.0000    0.0000    0.0000         0\n",
+      "   this text is about fashion     0.0000    0.0000    0.0000         0\n",
+      "   this text is about science     0.0000    0.0000    0.0000         0\n",
+      "   this text is about divorce     0.0000    0.0000    0.0000         0\n",
+      "  this text is about religion     0.0000    0.0000    0.0000         0\n",
+      "    this text is about sports     0.0000    0.0000    0.0000         0\n",
+      "  this text is about politics     0.0000    0.0000    0.0000         0\n",
+      "    this text is about comedy     0.0000    0.0000    0.0000         0\n",
+      "    this text is about travel     0.0000    0.0000    0.0000         1\n",
+      "     this text is about crime     1.0000    1.0000    1.0000         1\n",
       "\n",
-      "                                    micro avg     0.5000    0.5000    0.5000         2\n",
-      "                                    macro avg     0.0667    0.0667    0.0667         2\n",
-      "                                 weighted avg     0.5000    0.5000    0.5000         2\n",
-      "                                  samples avg     0.5000    0.5000    0.5000         2\n",
+      "                    micro avg     0.5000    0.5000    0.5000         2\n",
+      "                    macro avg     0.0667    0.0667    0.0667         2\n",
+      "                 weighted avg     0.5000    0.5000    0.5000         2\n",
+      "                  samples avg     0.5000    0.5000    0.5000         2\n",
       "\n",
-      "2021-09-08 11:55:45,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:53,673 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:31:03,784 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:18,646 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:55:57,695 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:31:23,571 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 51233.36it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 48358.62it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:55:57,697 [b'this text is about travel', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about business', b'this text is about weddings', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy', b'this text is about technology', b'this text is about divorce']\n",
-      "2021-09-08 11:55:57,706 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:57,707 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:31:23,573 [b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about weddings', b'this text is about fashion', b'this text is about entertainmen,the attention', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy', b'this text is about business', b'this text is about science']\n",
+      "2021-09-21 20:31:23,581 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,583 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5265,28 +5281,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:55:57,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:57,708 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:55:57,708 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:57,709 Parameters:\n",
-      "2021-09-08 11:55:57,709  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:55:57,709  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:55:57,710  - patience: \"3\"\n",
-      "2021-09-08 11:55:57,710  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:55:57,710  - max_epochs: \"10\"\n",
-      "2021-09-08 11:55:57,710  - shuffle: \"True\"\n",
-      "2021-09-08 11:55:57,711  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:55:57,711  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:55:57,711 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:57,711 Model training base path: \"None1\"\n",
-      "2021-09-08 11:55:57,712 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:57,712 Device: cuda:0\n",
-      "2021-09-08 11:55:57,712 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:57,713 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:55:57,719 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:57,773 epoch 1 - iter 1/13 - loss 0.09324422 - samples/sec: 26.21 - lr: 0.020000\n",
-      "2021-09-08 11:55:57,819 epoch 1 - iter 2/13 - loss 0.22021253 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:55:57,865 epoch 1 - iter 3/13 - loss 0.17612121 - samples/sec: 22.02 - lr: 0.020000\n"
+      "2021-09-21 20:31:23,584 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,584 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:31:23,584 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,585 Parameters:\n",
+      "2021-09-21 20:31:23,585  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:31:23,585  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:31:23,585  - patience: \"3\"\n",
+      "2021-09-21 20:31:23,586  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:31:23,586  - max_epochs: \"10\"\n",
+      "2021-09-21 20:31:23,586  - shuffle: \"True\"\n",
+      "2021-09-21 20:31:23,587  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:31:23,587  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:31:23,587 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,587 Model training base path: \"None1\"\n",
+      "2021-09-21 20:31:23,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,588 Device: cuda:0\n",
+      "2021-09-21 20:31:23,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,589 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:31:23,595 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:23,683 epoch 1 - iter 1/13 - loss 1.11387444 - samples/sec: 20.04 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,736 epoch 1 - iter 2/13 - loss 0.84436077 - samples/sec: 18.93 - lr: 0.020000\n"
      ]
     },
     {
@@ -5300,204 +5315,204 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:55:57,907 epoch 1 - iter 4/13 - loss 0.14309513 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:55:57,953 epoch 1 - iter 5/13 - loss 0.16768723 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:55:57,999 epoch 1 - iter 6/13 - loss 0.14996435 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,044 epoch 1 - iter 7/13 - loss 0.25313494 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,090 epoch 1 - iter 8/13 - loss 0.36435901 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,133 epoch 1 - iter 9/13 - loss 0.32622425 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,178 epoch 1 - iter 10/13 - loss 0.31090993 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,224 epoch 1 - iter 11/13 - loss 0.32493470 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,269 epoch 1 - iter 12/13 - loss 0.38597342 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,315 epoch 1 - iter 13/13 - loss 0.42489709 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 11:55:58,315 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:55:58,316 EPOCH 1 done: loss 0.4249 - lr 0.0200000\n",
-      "2021-09-08 11:55:58,347 DEV : loss 1.116917371749878 - score 0.0\n",
-      "2021-09-08 11:55:58,348 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:56:01,824 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:01,886 epoch 2 - iter 1/13 - loss 0.00918407 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 11:56:01,929 epoch 2 - iter 2/13 - loss 0.00789434 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:56:01,974 epoch 2 - iter 3/13 - loss 0.11536567 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,017 epoch 2 - iter 4/13 - loss 0.09077963 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,062 epoch 2 - iter 5/13 - loss 0.29602733 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,108 epoch 2 - iter 6/13 - loss 0.38408710 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,151 epoch 2 - iter 7/13 - loss 0.33324925 - samples/sec: 23.66 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,196 epoch 2 - iter 8/13 - loss 0.30000652 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,239 epoch 2 - iter 9/13 - loss 0.27072847 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,284 epoch 2 - iter 10/13 - loss 0.29848145 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,330 epoch 2 - iter 11/13 - loss 0.43862570 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,375 epoch 2 - iter 12/13 - loss 0.40403705 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,420 epoch 2 - iter 13/13 - loss 0.44091340 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 11:56:02,421 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:02,421 EPOCH 2 done: loss 0.4409 - lr 0.0200000\n",
-      "2021-09-08 11:56:02,454 DEV : loss 0.6146382093429565 - score 0.0\n",
-      "2021-09-08 11:56:02,454 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:56:06,666 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:06,726 epoch 3 - iter 1/13 - loss 0.00674897 - samples/sec: 23.06 - lr: 0.020000\n",
-      "2021-09-08 11:56:06,770 epoch 3 - iter 2/13 - loss 0.00974539 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 11:56:06,816 epoch 3 - iter 3/13 - loss 0.33064384 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:56:06,861 epoch 3 - iter 4/13 - loss 0.29099028 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:56:06,908 epoch 3 - iter 5/13 - loss 0.44558736 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 11:56:06,954 epoch 3 - iter 6/13 - loss 0.40177903 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:56:06,996 epoch 3 - iter 7/13 - loss 0.34791109 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:56:07,043 epoch 3 - iter 8/13 - loss 0.32511438 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:56:07,088 epoch 3 - iter 9/13 - loss 0.29381670 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:56:07,131 epoch 3 - iter 10/13 - loss 0.26464123 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:56:07,177 epoch 3 - iter 11/13 - loss 0.28041885 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:56:07,223 epoch 3 - iter 12/13 - loss 0.28819687 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:56:07,268 epoch 3 - iter 13/13 - loss 0.30413120 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:56:07,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:07,269 EPOCH 3 done: loss 0.3041 - lr 0.0200000\n",
-      "2021-09-08 11:56:07,399 DEV : loss 0.42487892508506775 - score 0.0\n",
-      "2021-09-08 11:56:07,399 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:31:23,800 epoch 1 - iter 3/13 - loss 0.65071535 - samples/sec: 15.80 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,869 epoch 1 - iter 4/13 - loss 0.59391131 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,933 epoch 1 - iter 5/13 - loss 0.52837080 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 20:31:23,996 epoch 1 - iter 6/13 - loss 0.60795682 - samples/sec: 15.93 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,057 epoch 1 - iter 7/13 - loss 0.60207732 - samples/sec: 16.50 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,114 epoch 1 - iter 8/13 - loss 0.57935457 - samples/sec: 17.86 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,163 epoch 1 - iter 9/13 - loss 0.59419162 - samples/sec: 20.75 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,211 epoch 1 - iter 10/13 - loss 0.58883166 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,259 epoch 1 - iter 11/13 - loss 0.64752037 - samples/sec: 21.01 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,307 epoch 1 - iter 12/13 - loss 0.65567226 - samples/sec: 21.10 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,355 epoch 1 - iter 13/13 - loss 0.65036624 - samples/sec: 20.92 - lr: 0.020000\n",
+      "2021-09-21 20:31:24,356 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:24,357 EPOCH 1 done: loss 0.6504 - lr 0.0200000\n",
+      "2021-09-21 20:31:24,485 DEV : loss 0.07034279406070709 - score 0.0\n",
+      "2021-09-21 20:31:24,486 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:56:12,332 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:12,392 epoch 4 - iter 1/13 - loss 0.01338105 - samples/sec: 23.05 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,441 epoch 4 - iter 2/13 - loss 0.03623968 - samples/sec: 20.73 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,490 epoch 4 - iter 3/13 - loss 0.09651043 - samples/sec: 20.72 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,538 epoch 4 - iter 4/13 - loss 0.40751968 - samples/sec: 21.24 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,584 epoch 4 - iter 5/13 - loss 0.34141717 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,629 epoch 4 - iter 6/13 - loss 0.28491733 - samples/sec: 22.83 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,676 epoch 4 - iter 7/13 - loss 0.24478224 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,723 epoch 4 - iter 8/13 - loss 0.21479605 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,770 epoch 4 - iter 9/13 - loss 0.19118209 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,819 epoch 4 - iter 10/13 - loss 0.17929543 - samples/sec: 20.68 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,865 epoch 4 - iter 11/13 - loss 0.16326040 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,918 epoch 4 - iter 12/13 - loss 0.23039575 - samples/sec: 19.32 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,964 epoch 4 - iter 13/13 - loss 0.21506558 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:56:12,965 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:12,966 EPOCH 4 done: loss 0.2151 - lr 0.0200000\n",
-      "2021-09-08 11:56:13,099 DEV : loss 0.5480495691299438 - score 0.0\n",
-      "2021-09-08 11:56:13,100 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:56:13,180 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:13,242 epoch 5 - iter 1/13 - loss 0.11088267 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,285 epoch 5 - iter 2/13 - loss 0.05708939 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,331 epoch 5 - iter 3/13 - loss 0.18295956 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,376 epoch 5 - iter 4/13 - loss 0.36550246 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,420 epoch 5 - iter 5/13 - loss 0.29275984 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,462 epoch 5 - iter 6/13 - loss 0.24427525 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,505 epoch 5 - iter 7/13 - loss 0.20977463 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,547 epoch 5 - iter 8/13 - loss 0.18371249 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,593 epoch 5 - iter 9/13 - loss 0.16964674 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,638 epoch 5 - iter 10/13 - loss 0.15738941 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,685 epoch 5 - iter 11/13 - loss 0.14630942 - samples/sec: 21.83 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,727 epoch 5 - iter 12/13 - loss 0.13464912 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,770 epoch 5 - iter 13/13 - loss 0.12659381 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:56:13,771 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:13,771 EPOCH 5 done: loss 0.1266 - lr 0.0200000\n",
-      "2021-09-08 11:56:13,900 DEV : loss 0.7139323353767395 - score 0.0\n",
-      "2021-09-08 11:56:13,901 BAD EPOCHS (no improvement): 2\n"
+      "2021-09-21 20:31:33,117 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:33,234 epoch 2 - iter 1/13 - loss 1.19330168 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,313 epoch 2 - iter 2/13 - loss 0.98862922 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,376 epoch 2 - iter 3/13 - loss 0.92303999 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,456 epoch 2 - iter 4/13 - loss 0.91675101 - samples/sec: 12.59 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,517 epoch 2 - iter 5/13 - loss 0.87024964 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,596 epoch 2 - iter 6/13 - loss 0.83332540 - samples/sec: 12.69 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,672 epoch 2 - iter 7/13 - loss 0.81354173 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,746 epoch 2 - iter 8/13 - loss 0.79235943 - samples/sec: 13.65 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,826 epoch 2 - iter 9/13 - loss 0.77781404 - samples/sec: 12.57 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,923 epoch 2 - iter 10/13 - loss 0.75571989 - samples/sec: 10.32 - lr: 0.020000\n",
+      "2021-09-21 20:31:33,974 epoch 2 - iter 11/13 - loss 0.75160393 - samples/sec: 19.56 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,053 epoch 2 - iter 12/13 - loss 0.74588462 - samples/sec: 12.84 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,107 epoch 2 - iter 13/13 - loss 0.74805566 - samples/sec: 18.48 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,108 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:34,109 EPOCH 2 done: loss 0.7481 - lr 0.0200000\n",
+      "2021-09-21 20:31:34,167 DEV : loss 0.5987907648086548 - score 0.0\n",
+      "2021-09-21 20:31:34,168 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:34,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:34,289 epoch 3 - iter 1/13 - loss 0.63745207 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,347 epoch 3 - iter 2/13 - loss 0.66597259 - samples/sec: 17.32 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,416 epoch 3 - iter 3/13 - loss 0.69210714 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,473 epoch 3 - iter 4/13 - loss 0.69145599 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,531 epoch 3 - iter 5/13 - loss 0.68502308 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,589 epoch 3 - iter 6/13 - loss 0.67958109 - samples/sec: 17.44 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,639 epoch 3 - iter 7/13 - loss 0.67748084 - samples/sec: 20.14 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,708 epoch 3 - iter 8/13 - loss 0.67017423 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,760 epoch 3 - iter 9/13 - loss 0.67605961 - samples/sec: 19.46 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,811 epoch 3 - iter 10/13 - loss 0.67007232 - samples/sec: 19.91 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,865 epoch 3 - iter 11/13 - loss 0.66512316 - samples/sec: 18.72 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,934 epoch 3 - iter 12/13 - loss 0.66361683 - samples/sec: 14.46 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,996 epoch 3 - iter 13/13 - loss 0.66266234 - samples/sec: 16.26 - lr: 0.020000\n",
+      "2021-09-21 20:31:34,998 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:34,998 EPOCH 3 done: loss 0.6627 - lr 0.0200000\n",
+      "2021-09-21 20:31:36,083 DEV : loss 0.3411855697631836 - score 0.0\n",
+      "2021-09-21 20:31:36,085 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:31:36,241 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:36,358 epoch 4 - iter 1/13 - loss 0.59326565 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,416 epoch 4 - iter 2/13 - loss 0.60746488 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,469 epoch 4 - iter 3/13 - loss 0.62090051 - samples/sec: 19.29 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,526 epoch 4 - iter 4/13 - loss 0.61570434 - samples/sec: 17.49 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,580 epoch 4 - iter 5/13 - loss 0.61327713 - samples/sec: 18.89 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,633 epoch 4 - iter 6/13 - loss 0.61319258 - samples/sec: 19.03 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,682 epoch 4 - iter 7/13 - loss 0.62300169 - samples/sec: 20.26 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,731 epoch 4 - iter 8/13 - loss 0.62037215 - samples/sec: 20.61 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,784 epoch 4 - iter 9/13 - loss 0.62605829 - samples/sec: 19.11 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,834 epoch 4 - iter 10/13 - loss 0.64092628 - samples/sec: 20.37 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,895 epoch 4 - iter 11/13 - loss 0.64584886 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,944 epoch 4 - iter 12/13 - loss 0.64667490 - samples/sec: 20.54 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,988 epoch 4 - iter 13/13 - loss 0.65042405 - samples/sec: 22.74 - lr: 0.020000\n",
+      "2021-09-21 20:31:36,989 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:36,990 EPOCH 4 done: loss 0.6504 - lr 0.0200000\n",
+      "2021-09-21 20:31:37,253 DEV : loss 0.5066163539886475 - score 0.0\n",
+      "2021-09-21 20:31:37,255 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:31:39,674 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:39,788 epoch 5 - iter 1/13 - loss 0.62712216 - samples/sec: 14.60 - lr: 0.020000\n",
+      "2021-09-21 20:31:39,858 epoch 5 - iter 2/13 - loss 0.68062279 - samples/sec: 14.54 - lr: 0.020000\n",
+      "2021-09-21 20:31:39,931 epoch 5 - iter 3/13 - loss 0.68091687 - samples/sec: 13.72 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,008 epoch 5 - iter 4/13 - loss 0.70059402 - samples/sec: 12.96 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,084 epoch 5 - iter 5/13 - loss 0.71183045 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,155 epoch 5 - iter 6/13 - loss 0.69488654 - samples/sec: 14.14 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,226 epoch 5 - iter 7/13 - loss 0.68262360 - samples/sec: 14.12 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,297 epoch 5 - iter 8/13 - loss 0.67280246 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,366 epoch 5 - iter 9/13 - loss 0.66848361 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,430 epoch 5 - iter 10/13 - loss 0.67110478 - samples/sec: 15.63 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,525 epoch 5 - iter 11/13 - loss 0.67091862 - samples/sec: 10.64 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,587 epoch 5 - iter 12/13 - loss 0.66122891 - samples/sec: 16.16 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,639 epoch 5 - iter 13/13 - loss 0.65679749 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 20:31:40,640 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:40,641 EPOCH 5 done: loss 0.6568 - lr 0.0200000\n",
+      "2021-09-21 20:31:40,737 DEV : loss 0.2448778599500656 - score 0.0\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:13,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:14,032 epoch 6 - iter 1/13 - loss 0.00710718 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,075 epoch 6 - iter 2/13 - loss 0.00466093 - samples/sec: 23.60 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,117 epoch 6 - iter 3/13 - loss 0.00585888 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,160 epoch 6 - iter 4/13 - loss 0.00445454 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,205 epoch 6 - iter 5/13 - loss 0.00442264 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,247 epoch 6 - iter 6/13 - loss 0.00378038 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,293 epoch 6 - iter 7/13 - loss 0.00876780 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,336 epoch 6 - iter 8/13 - loss 0.00780788 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,378 epoch 6 - iter 9/13 - loss 0.00703954 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,421 epoch 6 - iter 10/13 - loss 0.00647333 - samples/sec: 23.56 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,463 epoch 6 - iter 11/13 - loss 0.00602996 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,506 epoch 6 - iter 12/13 - loss 0.00558482 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,549 epoch 6 - iter 13/13 - loss 0.00542938 - samples/sec: 23.54 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,550 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:14,550 EPOCH 6 done: loss 0.0054 - lr 0.0200000\n",
-      "2021-09-08 11:56:14,677 DEV : loss 0.5803110599517822 - score 0.0\n",
-      "2021-09-08 11:56:14,678 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:56:14,752 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:14,810 epoch 7 - iter 1/13 - loss 0.00129480 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,852 epoch 7 - iter 2/13 - loss 0.00114372 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,895 epoch 7 - iter 3/13 - loss 0.00108475 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,938 epoch 7 - iter 4/13 - loss 0.00104828 - samples/sec: 23.58 - lr: 0.020000\n",
-      "2021-09-08 11:56:14,983 epoch 7 - iter 5/13 - loss 0.00515570 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,025 epoch 7 - iter 6/13 - loss 0.00501461 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,068 epoch 7 - iter 7/13 - loss 0.00440495 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,110 epoch 7 - iter 8/13 - loss 0.00434449 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,156 epoch 7 - iter 9/13 - loss 0.00604554 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,199 epoch 7 - iter 10/13 - loss 0.00547416 - samples/sec: 23.76 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,242 epoch 7 - iter 11/13 - loss 0.00515666 - samples/sec: 23.62 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,284 epoch 7 - iter 12/13 - loss 0.00477954 - samples/sec: 23.82 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,330 epoch 7 - iter 13/13 - loss 0.01179221 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:56:15,331 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:15,332 EPOCH 7 done: loss 0.0118 - lr 0.0200000\n",
-      "2021-09-08 11:56:15,461 DEV : loss 0.0011208829237148166 - score 0.0\n",
-      "2021-09-08 11:56:15,462 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:56:20,446 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:20,507 epoch 8 - iter 1/13 - loss 0.00226989 - samples/sec: 22.88 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,553 epoch 8 - iter 2/13 - loss 0.05100733 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,599 epoch 8 - iter 3/13 - loss 0.05014093 - samples/sec: 22.12 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,642 epoch 8 - iter 4/13 - loss 0.03781250 - samples/sec: 23.44 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,684 epoch 8 - iter 5/13 - loss 0.03035348 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,730 epoch 8 - iter 6/13 - loss 0.04470863 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,776 epoch 8 - iter 7/13 - loss 0.04123955 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,819 epoch 8 - iter 8/13 - loss 0.03615576 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,865 epoch 8 - iter 9/13 - loss 0.33802840 - samples/sec: 21.93 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,908 epoch 8 - iter 10/13 - loss 0.30433235 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,953 epoch 8 - iter 11/13 - loss 0.49389590 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 11:56:20,996 epoch 8 - iter 12/13 - loss 0.45310939 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,038 epoch 8 - iter 13/13 - loss 0.41838679 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,040 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:21,040 EPOCH 8 done: loss 0.4184 - lr 0.0200000\n",
-      "2021-09-08 11:56:21,170 DEV : loss 0.27907031774520874 - score 0.0\n",
-      "2021-09-08 11:56:21,171 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:56:21,242 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:21,300 epoch 9 - iter 1/13 - loss 0.00334068 - samples/sec: 23.50 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,345 epoch 9 - iter 2/13 - loss 0.00195272 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,388 epoch 9 - iter 3/13 - loss 0.00222841 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,433 epoch 9 - iter 4/13 - loss 0.02593620 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,476 epoch 9 - iter 5/13 - loss 0.02165930 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,519 epoch 9 - iter 6/13 - loss 0.01819574 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,564 epoch 9 - iter 7/13 - loss 0.14112457 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,607 epoch 9 - iter 8/13 - loss 0.12395800 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,650 epoch 9 - iter 9/13 - loss 0.11066861 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,693 epoch 9 - iter 10/13 - loss 0.09966032 - samples/sec: 23.47 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,735 epoch 9 - iter 11/13 - loss 0.09091744 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,781 epoch 9 - iter 12/13 - loss 0.09040096 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,824 epoch 9 - iter 13/13 - loss 0.08390107 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 11:56:21,825 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:21,825 EPOCH 9 done: loss 0.0839 - lr 0.0200000\n",
-      "2021-09-08 11:56:21,959 DEV : loss 0.07427637279033661 - score 0.0\n",
-      "2021-09-08 11:56:21,959 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:56:22,044 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:22,106 epoch 10 - iter 1/13 - loss 0.40875068 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,148 epoch 10 - iter 2/13 - loss 0.20833440 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,191 epoch 10 - iter 3/13 - loss 0.13932955 - samples/sec: 23.57 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,233 epoch 10 - iter 4/13 - loss 0.10474015 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,279 epoch 10 - iter 5/13 - loss 0.12124009 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,325 epoch 10 - iter 6/13 - loss 0.29236765 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,367 epoch 10 - iter 7/13 - loss 0.25072450 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,410 epoch 10 - iter 8/13 - loss 0.21951353 - samples/sec: 23.71 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,452 epoch 10 - iter 9/13 - loss 0.19522070 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,498 epoch 10 - iter 10/13 - loss 0.17739783 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,541 epoch 10 - iter 11/13 - loss 0.16170984 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,583 epoch 10 - iter 12/13 - loss 0.14829311 - samples/sec: 24.06 - lr: 0.020000\n"
+      "Epoch     5: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:31:40,741 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:31:40,742 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:40,897 epoch 6 - iter 1/13 - loss 0.64727527 - samples/sec: 12.93 - lr: 0.010000\n",
+      "2021-09-21 20:31:40,993 epoch 6 - iter 2/13 - loss 0.63126993 - samples/sec: 10.39 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,056 epoch 6 - iter 3/13 - loss 0.64833778 - samples/sec: 16.02 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,129 epoch 6 - iter 4/13 - loss 0.64342609 - samples/sec: 13.88 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,203 epoch 6 - iter 5/13 - loss 0.60419233 - samples/sec: 13.48 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,270 epoch 6 - iter 6/13 - loss 0.58892901 - samples/sec: 14.95 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,335 epoch 6 - iter 7/13 - loss 0.58467723 - samples/sec: 15.66 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,396 epoch 6 - iter 8/13 - loss 0.59059150 - samples/sec: 16.38 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,463 epoch 6 - iter 9/13 - loss 0.59347652 - samples/sec: 15.16 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,517 epoch 6 - iter 10/13 - loss 0.60839187 - samples/sec: 18.73 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,569 epoch 6 - iter 11/13 - loss 0.61884922 - samples/sec: 19.32 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,634 epoch 6 - iter 12/13 - loss 0.62282428 - samples/sec: 15.54 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,686 epoch 6 - iter 13/13 - loss 0.62184482 - samples/sec: 19.26 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,687 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:41,687 EPOCH 6 done: loss 0.6218 - lr 0.0100000\n",
+      "2021-09-21 20:31:41,722 DEV : loss 0.33304980397224426 - score 0.0\n",
+      "2021-09-21 20:31:41,724 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:41,734 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:41,868 epoch 7 - iter 1/13 - loss 0.65114516 - samples/sec: 17.91 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,916 epoch 7 - iter 2/13 - loss 0.61716700 - samples/sec: 21.31 - lr: 0.010000\n",
+      "2021-09-21 20:31:41,965 epoch 7 - iter 3/13 - loss 0.61296052 - samples/sec: 20.49 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,034 epoch 7 - iter 4/13 - loss 0.59598540 - samples/sec: 14.48 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,092 epoch 7 - iter 5/13 - loss 0.60879679 - samples/sec: 17.39 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,145 epoch 7 - iter 6/13 - loss 0.61656764 - samples/sec: 19.10 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,207 epoch 7 - iter 7/13 - loss 0.60175013 - samples/sec: 16.20 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,262 epoch 7 - iter 8/13 - loss 0.61456736 - samples/sec: 18.49 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,330 epoch 7 - iter 9/13 - loss 0.61859036 - samples/sec: 14.77 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,396 epoch 7 - iter 10/13 - loss 0.61106780 - samples/sec: 15.23 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,464 epoch 7 - iter 11/13 - loss 0.61538459 - samples/sec: 14.89 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,517 epoch 7 - iter 12/13 - loss 0.61429161 - samples/sec: 18.81 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,575 epoch 7 - iter 13/13 - loss 0.62653223 - samples/sec: 17.40 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,576 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:42,577 EPOCH 7 done: loss 0.6265 - lr 0.0100000\n",
+      "2021-09-21 20:31:42,726 DEV : loss 0.6903124451637268 - score 0.0\n",
+      "2021-09-21 20:31:42,728 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:31:42,809 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:42,896 epoch 8 - iter 1/13 - loss 0.68505907 - samples/sec: 17.49 - lr: 0.010000\n",
+      "2021-09-21 20:31:42,949 epoch 8 - iter 2/13 - loss 0.65235284 - samples/sec: 18.73 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,003 epoch 8 - iter 3/13 - loss 0.63190130 - samples/sec: 18.76 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,063 epoch 8 - iter 4/13 - loss 0.63482301 - samples/sec: 16.77 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,119 epoch 8 - iter 5/13 - loss 0.62980988 - samples/sec: 18.11 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,188 epoch 8 - iter 6/13 - loss 0.66679000 - samples/sec: 14.67 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,245 epoch 8 - iter 7/13 - loss 0.66995166 - samples/sec: 17.66 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,303 epoch 8 - iter 8/13 - loss 0.65662821 - samples/sec: 17.23 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,353 epoch 8 - iter 9/13 - loss 0.65445698 - samples/sec: 20.33 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,406 epoch 8 - iter 10/13 - loss 0.66294028 - samples/sec: 19.04 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,465 epoch 8 - iter 11/13 - loss 0.65804815 - samples/sec: 17.08 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,517 epoch 8 - iter 12/13 - loss 0.66443722 - samples/sec: 19.45 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,573 epoch 8 - iter 13/13 - loss 0.66166439 - samples/sec: 17.80 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,574 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:43,575 EPOCH 8 done: loss 0.6617 - lr 0.0100000\n",
+      "2021-09-21 20:31:43,708 DEV : loss 0.46492835879325867 - score 0.0\n",
+      "2021-09-21 20:31:43,710 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:31:43,793 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:43,877 epoch 9 - iter 1/13 - loss 0.68182039 - samples/sec: 17.72 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,935 epoch 9 - iter 2/13 - loss 0.68648338 - samples/sec: 17.37 - lr: 0.010000\n",
+      "2021-09-21 20:31:43,994 epoch 9 - iter 3/13 - loss 0.67990486 - samples/sec: 17.19 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,056 epoch 9 - iter 4/13 - loss 0.66091982 - samples/sec: 16.20 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,107 epoch 9 - iter 5/13 - loss 0.66927793 - samples/sec: 19.76 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,161 epoch 9 - iter 6/13 - loss 0.65861333 - samples/sec: 18.52 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,213 epoch 9 - iter 7/13 - loss 0.65587212 - samples/sec: 19.48 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,260 epoch 9 - iter 8/13 - loss 0.64901299 - samples/sec: 21.41 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,311 epoch 9 - iter 9/13 - loss 0.64397954 - samples/sec: 20.01 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,373 epoch 9 - iter 10/13 - loss 0.63909990 - samples/sec: 16.21 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,429 epoch 9 - iter 11/13 - loss 0.63824494 - samples/sec: 17.98 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,484 epoch 9 - iter 12/13 - loss 0.63736622 - samples/sec: 18.21 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,534 epoch 9 - iter 13/13 - loss 0.63540076 - samples/sec: 20.17 - lr: 0.010000\n",
+      "2021-09-21 20:31:44,535 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:44,536 EPOCH 9 done: loss 0.6354 - lr 0.0100000\n",
+      "2021-09-21 20:31:44,642 DEV : loss 0.34970152378082275 - score 0.0\n",
+      "Epoch     9: reducing learning rate of group 0 to 5.0000e-03.\n",
+      "2021-09-21 20:31:44,644 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:31:45,404 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:45,531 epoch 10 - iter 1/13 - loss 0.60387141 - samples/sec: 15.26 - lr: 0.005000\n",
+      "2021-09-21 20:31:45,591 epoch 10 - iter 2/13 - loss 0.63365752 - samples/sec: 17.07 - lr: 0.005000\n",
+      "2021-09-21 20:31:45,673 epoch 10 - iter 3/13 - loss 0.64683213 - samples/sec: 12.27 - lr: 0.005000\n",
+      "2021-09-21 20:31:45,750 epoch 10 - iter 4/13 - loss 0.63434735 - samples/sec: 13.00 - lr: 0.005000\n",
+      "2021-09-21 20:31:45,825 epoch 10 - iter 5/13 - loss 0.62170099 - samples/sec: 13.41 - lr: 0.005000\n",
+      "2021-09-21 20:31:45,894 epoch 10 - iter 6/13 - loss 0.62514318 - samples/sec: 14.50 - lr: 0.005000\n",
+      "2021-09-21 20:31:45,963 epoch 10 - iter 7/13 - loss 0.62743447 - samples/sec: 14.52 - lr: 0.005000\n",
+      "2021-09-21 20:31:46,038 epoch 10 - iter 8/13 - loss 0.62300146 - samples/sec: 13.46 - lr: 0.005000\n",
+      "2021-09-21 20:31:46,099 epoch 10 - iter 9/13 - loss 0.61622865 - samples/sec: 16.51 - lr: 0.005000\n",
+      "2021-09-21 20:31:46,168 epoch 10 - iter 10/13 - loss 0.61200029 - samples/sec: 14.53 - lr: 0.005000\n",
+      "2021-09-21 20:31:46,246 epoch 10 - iter 11/13 - loss 0.61377587 - samples/sec: 13.00 - lr: 0.005000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:22,625 epoch 10 - iter 13/13 - loss 0.13692604 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 11:56:22,626 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:22,626 EPOCH 10 done: loss 0.1369 - lr 0.0200000\n",
-      "2021-09-08 11:56:26,918 DEV : loss 0.08499796688556671 - score 0.0\n",
-      "2021-09-08 11:56:26,919 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:56:32,644 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:32,645 Testing using best model ...\n",
-      "2021-09-08 11:56:32,693 loading file None1/best-model.pt\n",
+      "2021-09-21 20:31:46,304 epoch 10 - iter 12/13 - loss 0.61861000 - samples/sec: 17.38 - lr: 0.005000\n",
+      "2021-09-21 20:31:46,360 epoch 10 - iter 13/13 - loss 0.61824321 - samples/sec: 17.97 - lr: 0.005000\n",
+      "2021-09-21 20:31:46,361 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:46,361 EPOCH 10 done: loss 0.6182 - lr 0.0050000\n",
+      "2021-09-21 20:31:46,438 DEV : loss 0.4157433807849884 - score 0.0\n",
+      "2021-09-21 20:31:46,439 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:31:51,548 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:31:51,548 Testing using best model ...\n",
+      "2021-09-21 20:31:51,572 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:56:38,175 \t0.0\n",
-      "2021-09-08 11:56:38,175 \n",
+      "2021-09-21 20:32:01,567 \t0.0\n",
+      "2021-09-21 20:32:01,568 \n",
       "Results:\n",
       "- F-score (micro) 0.0\n",
       "- F-score (macro) 0.0\n",
@@ -5506,47 +5521,47 @@
       "By class:\n",
       "                                               precision    recall  f1-score   support\n",
       "\n",
-      "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
+      "                this text is about technology     0.0000    0.0000    0.0000         0\n",
       "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
       "                     this text is about women     0.0000    0.0000    0.0000         0\n",
       "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
       "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
       "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about science     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
       "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
       "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
       "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
       "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
       "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
-      "                this text is about technology     0.0000    0.0000    0.0000         1\n",
-      "                   this text is about divorce     0.0000    0.0000    0.0000         1\n",
+      "                  this text is about business     0.0000    0.0000    0.0000         1\n",
+      "                   this text is about science     0.0000    0.0000    0.0000         1\n",
       "\n",
       "                                    micro avg     0.0000    0.0000    0.0000         2\n",
       "                                    macro avg     0.0000    0.0000    0.0000         2\n",
       "                                 weighted avg     0.0000    0.0000    0.0000         2\n",
       "                                  samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 11:56:38,175 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:46,263 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:32:01,568 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:20,564 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:56:50,164 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:32:24,942 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 50901.75it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 47375.42it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:50,166 [b'this text is about travel', b'this text is about technology', b'this text is about wellness', b'this text is about women', b'this text is about parents', b'this text is about weddings', b'this text is about entertainmen,the attention', b'this text is about science', b'this text is about divorce', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy', b'this text is about business', b'this text is about fashion']\n",
-      "2021-09-08 11:56:50,174 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,176 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:32:24,944 [b'this text is about travel', b'this text is about technology', b'this text is about women', b'this text is about business', b'this text is about weddings', b'this text is about fashion', b'this text is about science', b'this text is about divorce', b'this text is about crime', b'this text is about religion', b'this text is about sports', b'this text is about politics', b'this text is about comedy', b'this text is about wellness', b'this text is about entertainmen,the attention']\n",
+      "2021-09-21 20:32:24,953 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,955 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -5859,28 +5874,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:50,176 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,177 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 11:56:50,177 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,177 Parameters:\n",
-      "2021-09-08 11:56:50,177  - learning_rate: \"0.02\"\n",
-      "2021-09-08 11:56:50,178  - mini_batch_size: \"1\"\n",
-      "2021-09-08 11:56:50,178  - patience: \"3\"\n",
-      "2021-09-08 11:56:50,178  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 11:56:50,178  - max_epochs: \"10\"\n",
-      "2021-09-08 11:56:50,179  - shuffle: \"True\"\n",
-      "2021-09-08 11:56:50,179  - train_with_dev: \"False\"\n",
-      "2021-09-08 11:56:50,179  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 11:56:50,180 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,180 Model training base path: \"None1\"\n",
-      "2021-09-08 11:56:50,180 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,180 Device: cuda:0\n",
-      "2021-09-08 11:56:50,181 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,181 Embeddings storage mode: cpu\n",
-      "2021-09-08 11:56:50,187 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,241 epoch 1 - iter 1/13 - loss 0.59331906 - samples/sec: 26.35 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,287 epoch 1 - iter 2/13 - loss 0.54703513 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,332 epoch 1 - iter 3/13 - loss 0.42701777 - samples/sec: 22.05 - lr: 0.020000\n"
+      "2021-09-21 20:32:24,955 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,956 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:32:24,956 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,956 Parameters:\n",
+      "2021-09-21 20:32:24,957  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:32:24,957  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:32:24,957  - patience: \"3\"\n",
+      "2021-09-21 20:32:24,957  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:32:24,958  - max_epochs: \"10\"\n",
+      "2021-09-21 20:32:24,958  - shuffle: \"True\"\n",
+      "2021-09-21 20:32:24,958  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:32:24,959  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:32:24,959 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,959 Model training base path: \"None1\"\n",
+      "2021-09-21 20:32:24,959 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,960 Device: cuda:0\n",
+      "2021-09-21 20:32:24,960 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:24,960 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:32:24,967 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:25,065 epoch 1 - iter 1/13 - loss 0.81576157 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,132 epoch 1 - iter 2/13 - loss 0.85068157 - samples/sec: 15.10 - lr: 0.020000\n"
      ]
     },
     {
@@ -5894,235 +5908,236 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:56:50,378 epoch 1 - iter 4/13 - loss 0.61132142 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,424 epoch 1 - iter 5/13 - loss 0.51117608 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,470 epoch 1 - iter 6/13 - loss 0.59248915 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,515 epoch 1 - iter 7/13 - loss 0.65716984 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,561 epoch 1 - iter 8/13 - loss 0.59627138 - samples/sec: 21.96 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,606 epoch 1 - iter 9/13 - loss 0.57842218 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,648 epoch 1 - iter 10/13 - loss 0.52233663 - samples/sec: 24.07 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,694 epoch 1 - iter 11/13 - loss 0.61771800 - samples/sec: 22.01 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,736 epoch 1 - iter 12/13 - loss 0.56763186 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,782 epoch 1 - iter 13/13 - loss 0.60435112 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 11:56:50,783 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:50,783 EPOCH 1 done: loss 0.6044 - lr 0.0200000\n",
-      "2021-09-08 11:56:50,816 DEV : loss 0.13760945200920105 - score 0.0\n",
-      "2021-09-08 11:56:50,816 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:32:25,199 epoch 1 - iter 3/13 - loss 0.79632501 - samples/sec: 15.04 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,271 epoch 1 - iter 4/13 - loss 0.81711377 - samples/sec: 14.10 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,339 epoch 1 - iter 5/13 - loss 0.70748014 - samples/sec: 14.85 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,411 epoch 1 - iter 6/13 - loss 0.64587712 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,478 epoch 1 - iter 7/13 - loss 0.58281028 - samples/sec: 15.18 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,539 epoch 1 - iter 8/13 - loss 0.53266791 - samples/sec: 16.52 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,611 epoch 1 - iter 9/13 - loss 0.63627882 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,660 epoch 1 - iter 10/13 - loss 0.58849868 - samples/sec: 20.65 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,707 epoch 1 - iter 11/13 - loss 0.70397768 - samples/sec: 21.37 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,754 epoch 1 - iter 12/13 - loss 0.65274926 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,804 epoch 1 - iter 13/13 - loss 0.79315853 - samples/sec: 20.27 - lr: 0.020000\n",
+      "2021-09-21 20:32:25,805 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:25,805 EPOCH 1 done: loss 0.7932 - lr 0.0200000\n",
+      "2021-09-21 20:32:25,950 DEV : loss 0.07181078940629959 - score 0.0\n",
+      "2021-09-21 20:32:25,951 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:56:55,584 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:55,647 epoch 2 - iter 1/13 - loss 0.14824675 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 11:56:55,693 epoch 2 - iter 2/13 - loss 0.08553156 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 11:56:55,738 epoch 2 - iter 3/13 - loss 0.24591954 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:56:55,784 epoch 2 - iter 4/13 - loss 0.19805384 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:56:55,830 epoch 2 - iter 5/13 - loss 0.38576119 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 11:56:55,875 epoch 2 - iter 6/13 - loss 0.43696524 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:56:55,921 epoch 2 - iter 7/13 - loss 0.48835020 - samples/sec: 21.99 - lr: 0.020000\n",
-      "2021-09-08 11:56:55,966 epoch 2 - iter 8/13 - loss 0.46379986 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 11:56:56,009 epoch 2 - iter 9/13 - loss 0.41438910 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 11:56:56,055 epoch 2 - iter 10/13 - loss 0.41663064 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:56:56,100 epoch 2 - iter 11/13 - loss 0.44408229 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 11:56:56,146 epoch 2 - iter 12/13 - loss 0.43968098 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 11:56:56,191 epoch 2 - iter 13/13 - loss 0.50985696 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 11:56:56,192 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:56:56,193 EPOCH 2 done: loss 0.5099 - lr 0.0200000\n",
-      "2021-09-08 11:56:56,223 DEV : loss 0.09565086662769318 - score 0.0\n",
-      "2021-09-08 11:56:56,224 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:32:37,846 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:37,997 epoch 2 - iter 1/13 - loss 1.27346802 - samples/sec: 14.18 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,059 epoch 2 - iter 2/13 - loss 1.10727370 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,140 epoch 2 - iter 3/13 - loss 0.76970537 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,220 epoch 2 - iter 4/13 - loss 0.60089219 - samples/sec: 12.49 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,302 epoch 2 - iter 5/13 - loss 0.70080764 - samples/sec: 12.28 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,369 epoch 2 - iter 6/13 - loss 0.59471932 - samples/sec: 14.95 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,459 epoch 2 - iter 7/13 - loss 0.65894009 - samples/sec: 11.18 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,529 epoch 2 - iter 8/13 - loss 0.68884170 - samples/sec: 14.51 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,613 epoch 2 - iter 9/13 - loss 0.63981298 - samples/sec: 11.94 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,675 epoch 2 - iter 10/13 - loss 0.59067400 - samples/sec: 16.19 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,751 epoch 2 - iter 11/13 - loss 0.56239793 - samples/sec: 13.32 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,825 epoch 2 - iter 12/13 - loss 0.52237578 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,899 epoch 2 - iter 13/13 - loss 0.51133331 - samples/sec: 13.61 - lr: 0.020000\n",
+      "2021-09-21 20:32:38,900 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:38,900 EPOCH 2 done: loss 0.5113 - lr 0.0200000\n",
+      "2021-09-21 20:32:38,981 DEV : loss 0.4332210421562195 - score 0.0\n",
+      "2021-09-21 20:32:38,983 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:32:38,985 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:39,101 epoch 3 - iter 1/13 - loss 0.02704588 - samples/sec: 16.81 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,152 epoch 3 - iter 2/13 - loss 0.01646471 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,218 epoch 3 - iter 3/13 - loss 0.11191693 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,286 epoch 3 - iter 4/13 - loss 0.17651519 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,352 epoch 3 - iter 5/13 - loss 0.53685450 - samples/sec: 15.23 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,414 epoch 3 - iter 6/13 - loss 0.45617657 - samples/sec: 16.20 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,488 epoch 3 - iter 7/13 - loss 0.54488045 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,550 epoch 3 - iter 8/13 - loss 0.62368678 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,595 epoch 3 - iter 9/13 - loss 0.55575976 - samples/sec: 22.46 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,647 epoch 3 - iter 10/13 - loss 0.50657874 - samples/sec: 19.56 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,718 epoch 3 - iter 11/13 - loss 0.46577897 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,772 epoch 3 - iter 12/13 - loss 0.42801724 - samples/sec: 18.50 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,833 epoch 3 - iter 13/13 - loss 0.45083839 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 20:32:39,834 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:39,834 EPOCH 3 done: loss 0.4508 - lr 0.0200000\n",
+      "2021-09-21 20:32:39,971 DEV : loss 0.13621416687965393 - score 0.0\n",
+      "2021-09-21 20:32:39,973 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:32:40,045 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:40,139 epoch 4 - iter 1/13 - loss 0.21900368 - samples/sec: 15.67 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,193 epoch 4 - iter 2/13 - loss 0.17666896 - samples/sec: 18.71 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,267 epoch 4 - iter 3/13 - loss 0.22885299 - samples/sec: 13.66 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,326 epoch 4 - iter 4/13 - loss 0.22438481 - samples/sec: 17.25 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,394 epoch 4 - iter 5/13 - loss 0.18045028 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,448 epoch 4 - iter 6/13 - loss 0.35122876 - samples/sec: 18.77 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,501 epoch 4 - iter 7/13 - loss 0.34804498 - samples/sec: 19.02 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,556 epoch 4 - iter 8/13 - loss 0.32417667 - samples/sec: 18.41 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,610 epoch 4 - iter 9/13 - loss 0.28992006 - samples/sec: 18.70 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,666 epoch 4 - iter 10/13 - loss 0.26137452 - samples/sec: 17.90 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,718 epoch 4 - iter 11/13 - loss 0.23952158 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,789 epoch 4 - iter 12/13 - loss 0.22028887 - samples/sec: 14.27 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,854 epoch 4 - iter 13/13 - loss 0.20382882 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 20:32:40,855 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:40,856 EPOCH 4 done: loss 0.2038 - lr 0.0200000\n",
+      "2021-09-21 20:32:41,000 DEV : loss 0.021864712238311768 - score 0.0\n",
+      "2021-09-21 20:32:41,001 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 11:57:00,124 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:00,188 epoch 3 - iter 1/13 - loss 0.29746360 - samples/sec: 21.19 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,235 epoch 3 - iter 2/13 - loss 0.18248264 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,281 epoch 3 - iter 3/13 - loss 0.16203772 - samples/sec: 21.82 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,324 epoch 3 - iter 4/13 - loss 0.12761376 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,369 epoch 3 - iter 5/13 - loss 0.12224693 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,415 epoch 3 - iter 6/13 - loss 0.20046475 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,457 epoch 3 - iter 7/13 - loss 0.17265934 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,503 epoch 3 - iter 8/13 - loss 0.19253308 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,548 epoch 3 - iter 9/13 - loss 0.21897736 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,593 epoch 3 - iter 10/13 - loss 0.32545337 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,639 epoch 3 - iter 11/13 - loss 0.31392462 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,684 epoch 3 - iter 12/13 - loss 0.31188320 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,730 epoch 3 - iter 13/13 - loss 0.28977571 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 11:57:00,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:00,731 EPOCH 3 done: loss 0.2898 - lr 0.0200000\n",
-      "2021-09-08 11:57:00,762 DEV : loss 0.04569094628095627 - score 0.0\n",
-      "2021-09-08 11:57:00,763 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 11:57:04,685 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:04,749 epoch 4 - iter 1/13 - loss 0.26448399 - samples/sec: 21.16 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,796 epoch 4 - iter 2/13 - loss 0.14415339 - samples/sec: 21.72 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,848 epoch 4 - iter 3/13 - loss 0.21943916 - samples/sec: 19.53 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,898 epoch 4 - iter 4/13 - loss 0.30250051 - samples/sec: 20.32 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,944 epoch 4 - iter 5/13 - loss 0.24259393 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:57:04,995 epoch 4 - iter 6/13 - loss 0.29938212 - samples/sec: 20.09 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,045 epoch 4 - iter 7/13 - loss 0.31875474 - samples/sec: 20.51 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,091 epoch 4 - iter 8/13 - loss 0.28024036 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,140 epoch 4 - iter 9/13 - loss 0.36587130 - samples/sec: 20.83 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,183 epoch 4 - iter 10/13 - loss 0.32979769 - samples/sec: 23.40 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,230 epoch 4 - iter 11/13 - loss 0.34885528 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,278 epoch 4 - iter 12/13 - loss 0.35876761 - samples/sec: 21.34 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,328 epoch 4 - iter 13/13 - loss 0.42841994 - samples/sec: 20.39 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,329 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:05,330 EPOCH 4 done: loss 0.4284 - lr 0.0200000\n",
-      "2021-09-08 11:57:05,488 DEV : loss 0.348312646150589 - score 0.0\n",
-      "2021-09-08 11:57:05,489 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:57:05,573 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:05,634 epoch 5 - iter 1/13 - loss 0.03846515 - samples/sec: 22.97 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,677 epoch 5 - iter 2/13 - loss 0.02237513 - samples/sec: 23.30 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,721 epoch 5 - iter 3/13 - loss 0.01812307 - samples/sec: 23.50 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,767 epoch 5 - iter 4/13 - loss 0.02041614 - samples/sec: 21.67 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,814 epoch 5 - iter 5/13 - loss 0.04151708 - samples/sec: 21.73 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,859 epoch 5 - iter 6/13 - loss 0.10955262 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,903 epoch 5 - iter 7/13 - loss 0.09439559 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,949 epoch 5 - iter 8/13 - loss 0.08577347 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 11:57:05,993 epoch 5 - iter 9/13 - loss 0.07640467 - samples/sec: 23.27 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,040 epoch 5 - iter 10/13 - loss 0.08351007 - samples/sec: 21.57 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,087 epoch 5 - iter 11/13 - loss 0.07783116 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,135 epoch 5 - iter 12/13 - loss 0.08988687 - samples/sec: 20.84 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,181 epoch 5 - iter 13/13 - loss 0.08320227 - samples/sec: 21.97 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,182 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:06,183 EPOCH 5 done: loss 0.0832 - lr 0.0200000\n",
-      "2021-09-08 11:57:06,335 DEV : loss 0.7090694308280945 - score 0.0\n",
-      "2021-09-08 11:57:06,336 BAD EPOCHS (no improvement): 2\n"
+      "2021-09-21 20:32:45,467 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:45,562 epoch 5 - iter 1/13 - loss 0.05771811 - samples/sec: 17.31 - lr: 0.020000\n",
+      "2021-09-21 20:32:45,630 epoch 5 - iter 2/13 - loss 0.51690746 - samples/sec: 14.84 - lr: 0.020000\n",
+      "2021-09-21 20:32:45,687 epoch 5 - iter 3/13 - loss 0.34887590 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 20:32:45,746 epoch 5 - iter 4/13 - loss 0.29813794 - samples/sec: 17.06 - lr: 0.020000\n",
+      "2021-09-21 20:32:45,797 epoch 5 - iter 5/13 - loss 0.23899680 - samples/sec: 19.87 - lr: 0.020000\n",
+      "2021-09-21 20:32:45,844 epoch 5 - iter 6/13 - loss 0.20080949 - samples/sec: 21.62 - lr: 0.020000\n",
+      "2021-09-21 20:32:45,916 epoch 5 - iter 7/13 - loss 0.20363716 - samples/sec: 13.91 - lr: 0.020000\n",
+      "2021-09-21 20:32:45,982 epoch 5 - iter 8/13 - loss 0.24795012 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 20:32:46,038 epoch 5 - iter 9/13 - loss 0.22350057 - samples/sec: 17.99 - lr: 0.020000\n",
+      "2021-09-21 20:32:46,091 epoch 5 - iter 10/13 - loss 0.20136600 - samples/sec: 19.02 - lr: 0.020000\n",
+      "2021-09-21 20:32:46,146 epoch 5 - iter 11/13 - loss 0.30835095 - samples/sec: 18.24 - lr: 0.020000\n",
+      "2021-09-21 20:32:46,217 epoch 5 - iter 12/13 - loss 0.29539075 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 20:32:46,268 epoch 5 - iter 13/13 - loss 0.30397194 - samples/sec: 19.68 - lr: 0.020000\n",
+      "2021-09-21 20:32:46,269 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:46,270 EPOCH 5 done: loss 0.3040 - lr 0.0200000\n",
+      "2021-09-21 20:32:46,412 DEV : loss 0.0020481939427554607 - score 0.0\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:57:06,427 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:06,489 epoch 6 - iter 1/13 - loss 0.26138386 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,532 epoch 6 - iter 2/13 - loss 0.13528219 - samples/sec: 23.58 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,575 epoch 6 - iter 3/13 - loss 0.09378144 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,618 epoch 6 - iter 4/13 - loss 0.07055672 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,661 epoch 6 - iter 5/13 - loss 0.05660012 - samples/sec: 23.50 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,707 epoch 6 - iter 6/13 - loss 0.12812046 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,753 epoch 6 - iter 7/13 - loss 0.15770488 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,797 epoch 6 - iter 8/13 - loss 0.13810074 - samples/sec: 23.03 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,840 epoch 6 - iter 9/13 - loss 0.12293270 - samples/sec: 23.29 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,883 epoch 6 - iter 10/13 - loss 0.11107554 - samples/sec: 23.59 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,927 epoch 6 - iter 11/13 - loss 0.10226393 - samples/sec: 23.20 - lr: 0.020000\n",
-      "2021-09-08 11:57:06,970 epoch 6 - iter 12/13 - loss 0.09423379 - samples/sec: 23.39 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,014 epoch 6 - iter 13/13 - loss 0.08703036 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,015 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:07,016 EPOCH 6 done: loss 0.0870 - lr 0.0200000\n",
-      "2021-09-08 11:57:07,279 DEV : loss 0.4875955879688263 - score 0.0\n",
-      "2021-09-08 11:57:07,280 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:57:07,430 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:07,492 epoch 7 - iter 1/13 - loss 0.33422172 - samples/sec: 21.74 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,537 epoch 7 - iter 2/13 - loss 0.16763708 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,580 epoch 7 - iter 3/13 - loss 0.11202403 - samples/sec: 23.58 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,622 epoch 7 - iter 4/13 - loss 0.08584658 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,669 epoch 7 - iter 5/13 - loss 0.50967987 - samples/sec: 21.90 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,711 epoch 7 - iter 6/13 - loss 0.42506413 - samples/sec: 23.65 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,758 epoch 7 - iter 7/13 - loss 0.37004960 - samples/sec: 21.91 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,800 epoch 7 - iter 8/13 - loss 0.32398592 - samples/sec: 23.75 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,846 epoch 7 - iter 9/13 - loss 0.29629472 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,892 epoch 7 - iter 10/13 - loss 0.32879778 - samples/sec: 21.85 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,935 epoch 7 - iter 11/13 - loss 0.29895564 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 11:57:07,980 epoch 7 - iter 12/13 - loss 0.27865622 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 11:57:08,023 epoch 7 - iter 13/13 - loss 0.25730875 - samples/sec: 23.51 - lr: 0.020000\n",
-      "2021-09-08 11:57:08,024 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:08,024 EPOCH 7 done: loss 0.2573 - lr 0.0200000\n",
-      "2021-09-08 11:57:08,206 DEV : loss 0.5231066346168518 - score 0.0\n",
-      "Epoch     7: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 11:57:08,206 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 11:57:08,312 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:08,370 epoch 8 - iter 1/13 - loss 0.00109404 - samples/sec: 23.68 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,416 epoch 8 - iter 2/13 - loss 0.01036691 - samples/sec: 21.89 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,459 epoch 8 - iter 3/13 - loss 0.01207038 - samples/sec: 23.67 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,502 epoch 8 - iter 4/13 - loss 0.01040225 - samples/sec: 23.58 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,545 epoch 8 - iter 5/13 - loss 0.00843199 - samples/sec: 23.64 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,588 epoch 8 - iter 6/13 - loss 0.00711639 - samples/sec: 23.62 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,630 epoch 8 - iter 7/13 - loss 0.00619863 - samples/sec: 23.72 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,673 epoch 8 - iter 8/13 - loss 0.00557803 - samples/sec: 23.59 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,716 epoch 8 - iter 9/13 - loss 0.00530004 - samples/sec: 23.66 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,762 epoch 8 - iter 10/13 - loss 0.09084193 - samples/sec: 21.88 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,806 epoch 8 - iter 11/13 - loss 0.08279563 - samples/sec: 22.92 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,848 epoch 8 - iter 12/13 - loss 0.07614283 - samples/sec: 24.00 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,890 epoch 8 - iter 13/13 - loss 0.07035052 - samples/sec: 23.91 - lr: 0.010000\n",
-      "2021-09-08 11:57:08,891 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:08,892 EPOCH 8 done: loss 0.0704 - lr 0.0100000\n",
-      "2021-09-08 11:57:09,562 DEV : loss 0.5265138149261475 - score 0.0\n",
-      "2021-09-08 11:57:09,563 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 11:57:09,566 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:09,630 epoch 9 - iter 1/13 - loss 0.00202537 - samples/sec: 20.73 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,680 epoch 9 - iter 2/13 - loss 0.00134064 - samples/sec: 20.23 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,729 epoch 9 - iter 3/13 - loss 0.00100737 - samples/sec: 20.46 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,778 epoch 9 - iter 4/13 - loss 0.00107586 - samples/sec: 20.51 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,821 epoch 9 - iter 5/13 - loss 0.00152158 - samples/sec: 23.80 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,864 epoch 9 - iter 6/13 - loss 0.00139560 - samples/sec: 23.52 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,906 epoch 9 - iter 7/13 - loss 0.00126702 - samples/sec: 23.94 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,949 epoch 9 - iter 8/13 - loss 0.00116385 - samples/sec: 23.70 - lr: 0.010000\n",
-      "2021-09-08 11:57:09,992 epoch 9 - iter 9/13 - loss 0.00109405 - samples/sec: 23.57 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,037 epoch 9 - iter 10/13 - loss 0.00498142 - samples/sec: 22.39 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,080 epoch 9 - iter 11/13 - loss 0.00460106 - samples/sec: 23.68 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,122 epoch 9 - iter 12/13 - loss 0.00432877 - samples/sec: 24.10 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,167 epoch 9 - iter 13/13 - loss 0.00550373 - samples/sec: 22.35 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,168 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:10,169 EPOCH 9 done: loss 0.0055 - lr 0.0100000\n",
-      "2021-09-08 11:57:10,292 DEV : loss 0.5324593782424927 - score 0.0\n",
-      "2021-09-08 11:57:10,293 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 11:57:10,317 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:10,381 epoch 10 - iter 1/13 - loss 0.36189762 - samples/sec: 20.91 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,428 epoch 10 - iter 2/13 - loss 0.18124379 - samples/sec: 21.82 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,477 epoch 10 - iter 3/13 - loss 0.12254189 - samples/sec: 20.92 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,524 epoch 10 - iter 4/13 - loss 0.09316584 - samples/sec: 21.60 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,573 epoch 10 - iter 5/13 - loss 0.11310907 - samples/sec: 20.47 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,621 epoch 10 - iter 6/13 - loss 0.09437814 - samples/sec: 21.32 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,669 epoch 10 - iter 7/13 - loss 0.08098727 - samples/sec: 21.24 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,716 epoch 10 - iter 8/13 - loss 0.07094905 - samples/sec: 21.52 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,767 epoch 10 - iter 9/13 - loss 0.16283846 - samples/sec: 19.83 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,812 epoch 10 - iter 10/13 - loss 0.14663181 - samples/sec: 22.53 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,859 epoch 10 - iter 11/13 - loss 0.16956292 - samples/sec: 21.36 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,905 epoch 10 - iter 12/13 - loss 0.15578949 - samples/sec: 22.10 - lr: 0.010000\n"
+      "2021-09-21 20:32:46,414 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:32:53,556 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:53,648 epoch 6 - iter 1/13 - loss 0.00248338 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 20:32:53,714 epoch 6 - iter 2/13 - loss 0.02361350 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 20:32:53,766 epoch 6 - iter 3/13 - loss 0.01667946 - samples/sec: 19.39 - lr: 0.020000\n",
+      "2021-09-21 20:32:53,813 epoch 6 - iter 4/13 - loss 0.01350304 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:32:53,868 epoch 6 - iter 5/13 - loss 0.01118280 - samples/sec: 18.44 - lr: 0.020000\n",
+      "2021-09-21 20:32:53,919 epoch 6 - iter 6/13 - loss 0.01545744 - samples/sec: 19.72 - lr: 0.020000\n",
+      "2021-09-21 20:32:53,976 epoch 6 - iter 7/13 - loss 0.03057770 - samples/sec: 17.58 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,030 epoch 6 - iter 8/13 - loss 0.02765080 - samples/sec: 18.73 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,083 epoch 6 - iter 9/13 - loss 0.09449883 - samples/sec: 19.24 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,137 epoch 6 - iter 10/13 - loss 0.08524266 - samples/sec: 18.37 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,203 epoch 6 - iter 11/13 - loss 0.07757350 - samples/sec: 15.30 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,269 epoch 6 - iter 12/13 - loss 0.07127250 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,320 epoch 6 - iter 13/13 - loss 0.06643187 - samples/sec: 19.77 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,321 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:54,322 EPOCH 6 done: loss 0.0664 - lr 0.0200000\n",
+      "2021-09-21 20:32:54,452 DEV : loss 0.008766410872340202 - score 0.0\n",
+      "2021-09-21 20:32:54,454 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:32:54,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:54,639 epoch 7 - iter 1/13 - loss 0.01020034 - samples/sec: 21.99 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,699 epoch 7 - iter 2/13 - loss 0.00735365 - samples/sec: 16.75 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,750 epoch 7 - iter 3/13 - loss 0.00518051 - samples/sec: 19.85 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,807 epoch 7 - iter 4/13 - loss 0.00427536 - samples/sec: 17.80 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,862 epoch 7 - iter 5/13 - loss 0.00502255 - samples/sec: 18.11 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,918 epoch 7 - iter 6/13 - loss 0.00448209 - samples/sec: 18.03 - lr: 0.020000\n",
+      "2021-09-21 20:32:54,981 epoch 7 - iter 7/13 - loss 0.00402845 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,039 epoch 7 - iter 8/13 - loss 0.00360909 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,097 epoch 7 - iter 9/13 - loss 0.01099822 - samples/sec: 17.14 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,147 epoch 7 - iter 10/13 - loss 0.01178049 - samples/sec: 20.47 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,199 epoch 7 - iter 11/13 - loss 0.01077778 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,247 epoch 7 - iter 12/13 - loss 0.00991042 - samples/sec: 21.13 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,305 epoch 7 - iter 13/13 - loss 0.00920526 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,306 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:55,306 EPOCH 7 done: loss 0.0092 - lr 0.0200000\n",
+      "2021-09-21 20:32:55,441 DEV : loss 0.016724377870559692 - score 0.0\n",
+      "2021-09-21 20:32:55,442 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:32:55,517 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:55,658 epoch 8 - iter 1/13 - loss 0.00838130 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,742 epoch 8 - iter 2/13 - loss 0.00484068 - samples/sec: 19.74 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,795 epoch 8 - iter 3/13 - loss 0.00408799 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,847 epoch 8 - iter 4/13 - loss 0.00332340 - samples/sec: 19.74 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,917 epoch 8 - iter 5/13 - loss 0.00419786 - samples/sec: 14.22 - lr: 0.020000\n",
+      "2021-09-21 20:32:55,975 epoch 8 - iter 6/13 - loss 0.00369818 - samples/sec: 17.56 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,033 epoch 8 - iter 7/13 - loss 0.00341068 - samples/sec: 17.47 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,092 epoch 8 - iter 8/13 - loss 0.00315310 - samples/sec: 16.94 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,159 epoch 8 - iter 9/13 - loss 0.05145102 - samples/sec: 14.98 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,215 epoch 8 - iter 10/13 - loss 0.04636430 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,284 epoch 8 - iter 11/13 - loss 0.04819574 - samples/sec: 14.59 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,342 epoch 8 - iter 12/13 - loss 0.04421437 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,396 epoch 8 - iter 13/13 - loss 0.04100250 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 20:32:56,397 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:32:56,398 EPOCH 8 done: loss 0.0410 - lr 0.0200000\n",
+      "2021-09-21 20:32:56,554 DEV : loss 0.00024845602456480265 - score 0.0\n",
+      "2021-09-21 20:32:56,556 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:33:00,766 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:00,860 epoch 9 - iter 1/13 - loss 0.00133532 - samples/sec: 17.51 - lr: 0.020000\n",
+      "2021-09-21 20:33:00,911 epoch 9 - iter 2/13 - loss 0.01839693 - samples/sec: 19.80 - lr: 0.020000\n",
+      "2021-09-21 20:33:00,958 epoch 9 - iter 3/13 - loss 0.01241178 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,015 epoch 9 - iter 4/13 - loss 0.00971160 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,069 epoch 9 - iter 5/13 - loss 0.00813768 - samples/sec: 18.44 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,121 epoch 9 - iter 6/13 - loss 0.00744792 - samples/sec: 19.74 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,176 epoch 9 - iter 7/13 - loss 0.00649283 - samples/sec: 18.34 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,233 epoch 9 - iter 8/13 - loss 0.01963953 - samples/sec: 17.67 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,286 epoch 9 - iter 9/13 - loss 0.01752730 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,354 epoch 9 - iter 10/13 - loss 0.01830592 - samples/sec: 14.88 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,407 epoch 9 - iter 11/13 - loss 0.01679972 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,465 epoch 9 - iter 12/13 - loss 0.01552278 - samples/sec: 17.41 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,512 epoch 9 - iter 13/13 - loss 0.01437019 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,514 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:01,514 EPOCH 9 done: loss 0.0144 - lr 0.0200000\n",
+      "2021-09-21 20:33:01,674 DEV : loss 0.004413609392940998 - score 0.0\n",
+      "2021-09-21 20:33:01,675 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:33:01,753 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:01,844 epoch 10 - iter 1/13 - loss 0.00117497 - samples/sec: 18.02 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,903 epoch 10 - iter 2/13 - loss 0.00076541 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 20:33:01,955 epoch 10 - iter 3/13 - loss 0.00089306 - samples/sec: 19.16 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,001 epoch 10 - iter 4/13 - loss 0.00072816 - samples/sec: 22.39 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,049 epoch 10 - iter 5/13 - loss 0.00124656 - samples/sec: 20.76 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,107 epoch 10 - iter 6/13 - loss 0.00117514 - samples/sec: 17.46 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,153 epoch 10 - iter 7/13 - loss 0.00103281 - samples/sec: 22.17 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,222 epoch 10 - iter 8/13 - loss 0.00097333 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,285 epoch 10 - iter 9/13 - loss 0.19156292 - samples/sec: 16.13 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,334 epoch 10 - iter 10/13 - loss 0.17250354 - samples/sec: 20.35 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,389 epoch 10 - iter 11/13 - loss 0.15703443 - samples/sec: 18.60 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,437 epoch 10 - iter 12/13 - loss 0.14402823 - samples/sec: 20.66 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 11:57:10,952 epoch 10 - iter 13/13 - loss 0.14384305 - samples/sec: 21.49 - lr: 0.010000\n",
-      "2021-09-08 11:57:10,954 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:10,954 EPOCH 10 done: loss 0.1438 - lr 0.0100000\n",
-      "2021-09-08 11:57:11,670 DEV : loss 0.5333767533302307 - score 0.0\n",
-      "2021-09-08 11:57:11,671 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 11:57:18,373 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 11:57:18,373 Testing using best model ...\n",
-      "2021-09-08 11:57:18,375 loading file None1/best-model.pt\n",
+      "2021-09-21 20:33:02,499 epoch 10 - iter 13/13 - loss 0.13297949 - samples/sec: 16.23 - lr: 0.020000\n",
+      "2021-09-21 20:33:02,501 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:02,501 EPOCH 10 done: loss 0.1330 - lr 0.0200000\n",
+      "2021-09-21 20:33:02,618 DEV : loss 0.0008943184511736035 - score 0.0\n",
+      "2021-09-21 20:33:02,620 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:33:08,604 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:08,605 Testing using best model ...\n",
+      "2021-09-21 20:33:08,607 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 11:57:23,368 \t0.5\n",
-      "2021-09-08 11:57:23,369 \n",
+      "2021-09-21 20:33:13,964 \t0.0\n",
+      "2021-09-21 20:33:13,965 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.0667\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                               precision    recall  f1-score   support\n",
       "\n",
       "                    this text is about travel     0.0000    0.0000    0.0000         0\n",
       "                this text is about technology     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about wellness     0.0000    0.0000    0.0000         0\n",
       "                     this text is about women     0.0000    0.0000    0.0000         0\n",
-      "                   this text is about parents     0.0000    0.0000    0.0000         0\n",
+      "                  this text is about business     0.0000    0.0000    0.0000         0\n",
       "                  this text is about weddings     0.0000    0.0000    0.0000         0\n",
-      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         0\n",
+      "                   this text is about fashion     0.0000    0.0000    0.0000         0\n",
       "                   this text is about science     0.0000    0.0000    0.0000         0\n",
       "                   this text is about divorce     0.0000    0.0000    0.0000         0\n",
+      "                     this text is about crime     0.0000    0.0000    0.0000         0\n",
       "                  this text is about religion     0.0000    0.0000    0.0000         0\n",
       "                    this text is about sports     0.0000    0.0000    0.0000         0\n",
       "                  this text is about politics     0.0000    0.0000    0.0000         0\n",
       "                    this text is about comedy     0.0000    0.0000    0.0000         0\n",
-      "                  this text is about business     0.0000    0.0000    0.0000         1\n",
-      "                   this text is about fashion     1.0000    1.0000    1.0000         1\n",
+      "                  this text is about wellness     0.0000    0.0000    0.0000         1\n",
+      "this text is about entertainmen,the attention     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                    micro avg     0.5000    0.5000    0.5000         2\n",
-      "                                    macro avg     0.0667    0.0667    0.0667         2\n",
-      "                                 weighted avg     0.5000    0.5000    0.5000         2\n",
-      "                                  samples avg     0.5000    0.5000    0.5000         2\n",
+      "                                    micro avg     0.0000    0.0000    0.0000         2\n",
+      "                                    macro avg     0.0000    0.0000    0.0000         2\n",
+      "                                 weighted avg     0.0000    0.0000    0.0000         2\n",
+      "                                  samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 11:57:23,369 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.41924528301886793\n"
+      "2021-09-21 20:33:13,965 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.3611320754716981\n"
      ]
     }
    ],
@@ -6206,11 +6221,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "86988d74",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.41509433962264153, 0.4528301886792453, 0.439622641509434, 0.07358490566037736, 0.42452830188679247]\n",
+      "0.14434936457443723\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -6230,25 +6257,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:50:14,843 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:33:27,657 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:50:18,744 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:33:32,001 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 56987.83it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 48433.07it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:50:18,746 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'the legal dissolution of a marriage', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'light and humorous drama with a happy ending', b'a particular branch of scientific knowledge', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law']\n",
-      "2021-09-08 14:50:18,755 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:18,757 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:33:32,003 [b'undertake a journey or trip', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'the practical application of science to commerce or industry', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 20:33:32,136 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:32,138 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -6561,26 +6588,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:50:18,757 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:18,757 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 14:50:18,758 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:18,758 Parameters:\n",
-      "2021-09-08 14:50:18,758  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:50:18,759  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:50:18,759  - patience: \"3\"\n",
-      "2021-09-08 14:50:18,759  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:50:18,759  - max_epochs: \"10\"\n",
-      "2021-09-08 14:50:18,760  - shuffle: \"True\"\n",
-      "2021-09-08 14:50:18,760  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:50:18,760  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:50:18,760 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:18,761 Model training base path: \"None1\"\n",
-      "2021-09-08 14:50:18,761 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:18,761 Device: cuda:0\n",
-      "2021-09-08 14:50:18,762 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:18,762 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:50:18,769 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:18,927 epoch 1 - iter 1/13 - loss 0.41849235 - samples/sec: 26.87 - lr: 0.020000\n"
+      "2021-09-21 20:33:32,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:32,139 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:33:32,139 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:32,140 Parameters:\n",
+      "2021-09-21 20:33:32,140  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:33:32,140  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:33:32,140  - patience: \"3\"\n",
+      "2021-09-21 20:33:32,141  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:33:32,141  - max_epochs: \"10\"\n",
+      "2021-09-21 20:33:32,141  - shuffle: \"True\"\n",
+      "2021-09-21 20:33:32,142  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:33:32,142  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:33:32,142 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:32,142 Model training base path: \"None1\"\n",
+      "2021-09-21 20:33:32,143 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:32,143 Device: cuda:0\n",
+      "2021-09-21 20:33:32,143 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:32,143 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -6594,216 +6619,220 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:50:18,972 epoch 1 - iter 2/13 - loss 0.67257129 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,018 epoch 1 - iter 3/13 - loss 0.50185129 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,062 epoch 1 - iter 4/13 - loss 0.59346865 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,107 epoch 1 - iter 5/13 - loss 0.61242221 - samples/sec: 22.64 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,151 epoch 1 - iter 6/13 - loss 0.67801380 - samples/sec: 22.75 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,196 epoch 1 - iter 7/13 - loss 0.68130415 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,243 epoch 1 - iter 8/13 - loss 0.68096563 - samples/sec: 21.84 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,289 epoch 1 - iter 9/13 - loss 0.63638423 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,335 epoch 1 - iter 10/13 - loss 0.58952059 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,380 epoch 1 - iter 11/13 - loss 0.61251943 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,422 epoch 1 - iter 12/13 - loss 0.56301952 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,467 epoch 1 - iter 13/13 - loss 0.56971268 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 14:50:19,468 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:19,468 EPOCH 1 done: loss 0.5697 - lr 0.0200000\n",
-      "2021-09-08 14:50:19,504 DEV : loss 0.21852150559425354 - score 0.0\n",
-      "2021-09-08 14:50:19,504 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:33:32,322 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:32,429 epoch 1 - iter 1/13 - loss 0.40616930 - samples/sec: 16.58 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,494 epoch 1 - iter 2/13 - loss 0.47514260 - samples/sec: 15.42 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,554 epoch 1 - iter 3/13 - loss 0.51018574 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,613 epoch 1 - iter 4/13 - loss 0.79716866 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,689 epoch 1 - iter 5/13 - loss 0.79748349 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,746 epoch 1 - iter 6/13 - loss 0.90653747 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,793 epoch 1 - iter 7/13 - loss 0.82286289 - samples/sec: 21.31 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,841 epoch 1 - iter 8/13 - loss 0.75573298 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,909 epoch 1 - iter 9/13 - loss 0.80171485 - samples/sec: 14.65 - lr: 0.020000\n",
+      "2021-09-21 20:33:32,972 epoch 1 - iter 10/13 - loss 0.77089727 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 20:33:33,047 epoch 1 - iter 11/13 - loss 0.73292908 - samples/sec: 13.43 - lr: 0.020000\n",
+      "2021-09-21 20:33:33,095 epoch 1 - iter 12/13 - loss 0.67266730 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 20:33:33,153 epoch 1 - iter 13/13 - loss 0.70040043 - samples/sec: 17.39 - lr: 0.020000\n",
+      "2021-09-21 20:33:33,154 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:33,154 EPOCH 1 done: loss 0.7004 - lr 0.0200000\n",
+      "2021-09-21 20:33:33,314 DEV : loss 0.41617661714553833 - score 0.0\n",
+      "2021-09-21 20:33:33,314 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:33:38,365 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:38,531 epoch 2 - iter 1/13 - loss 0.63395107 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 20:33:38,608 epoch 2 - iter 2/13 - loss 0.65033883 - samples/sec: 13.14 - lr: 0.020000\n",
+      "2021-09-21 20:33:38,685 epoch 2 - iter 3/13 - loss 0.56758586 - samples/sec: 12.98 - lr: 0.020000\n",
+      "2021-09-21 20:33:38,750 epoch 2 - iter 4/13 - loss 0.51318964 - samples/sec: 15.47 - lr: 0.020000\n",
+      "2021-09-21 20:33:38,801 epoch 2 - iter 5/13 - loss 0.52129985 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 20:33:38,871 epoch 2 - iter 6/13 - loss 0.44667272 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 20:33:38,942 epoch 2 - iter 7/13 - loss 0.47166467 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 20:33:39,042 epoch 2 - iter 8/13 - loss 0.51465636 - samples/sec: 10.06 - lr: 0.020000\n",
+      "2021-09-21 20:33:39,112 epoch 2 - iter 9/13 - loss 0.46850678 - samples/sec: 14.36 - lr: 0.020000\n",
+      "2021-09-21 20:33:39,179 epoch 2 - iter 10/13 - loss 0.42210393 - samples/sec: 14.92 - lr: 0.020000\n",
+      "2021-09-21 20:33:39,260 epoch 2 - iter 11/13 - loss 0.39547306 - samples/sec: 12.41 - lr: 0.020000\n",
+      "2021-09-21 20:33:39,328 epoch 2 - iter 12/13 - loss 0.37850299 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 20:33:39,384 epoch 2 - iter 13/13 - loss 0.43373544 - samples/sec: 17.95 - lr: 0.020000\n",
+      "2021-09-21 20:33:39,385 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:39,385 EPOCH 2 done: loss 0.4337 - lr 0.0200000\n",
+      "2021-09-21 20:33:39,450 DEV : loss 0.19125135242938995 - score 0.0\n",
+      "2021-09-21 20:33:39,453 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:50:23,712 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:23,777 epoch 2 - iter 1/13 - loss 0.01272501 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 14:50:23,822 epoch 2 - iter 2/13 - loss 0.39642356 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:50:23,867 epoch 2 - iter 3/13 - loss 0.40071243 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:50:23,912 epoch 2 - iter 4/13 - loss 0.53515827 - samples/sec: 22.80 - lr: 0.020000\n",
-      "2021-09-08 14:50:23,956 epoch 2 - iter 5/13 - loss 0.51246575 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 14:50:23,998 epoch 2 - iter 6/13 - loss 0.43022518 - samples/sec: 24.23 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,042 epoch 2 - iter 7/13 - loss 0.43419547 - samples/sec: 22.81 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,087 epoch 2 - iter 8/13 - loss 0.41749671 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,131 epoch 2 - iter 9/13 - loss 0.46986023 - samples/sec: 22.95 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,176 epoch 2 - iter 10/13 - loss 0.42848506 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,221 epoch 2 - iter 11/13 - loss 0.40196279 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,266 epoch 2 - iter 12/13 - loss 0.45671429 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,310 epoch 2 - iter 13/13 - loss 0.49059880 - samples/sec: 22.67 - lr: 0.020000\n",
-      "2021-09-08 14:50:24,311 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:24,312 EPOCH 2 done: loss 0.4906 - lr 0.0200000\n",
-      "2021-09-08 14:50:24,370 DEV : loss 0.21457867324352264 - score 0.0\n",
-      "2021-09-08 14:50:24,371 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:33:43,624 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:43,719 epoch 3 - iter 1/13 - loss 0.08043294 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 20:33:43,774 epoch 3 - iter 2/13 - loss 0.28962336 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 20:33:43,828 epoch 3 - iter 3/13 - loss 0.19630100 - samples/sec: 18.55 - lr: 0.020000\n",
+      "2021-09-21 20:33:43,888 epoch 3 - iter 4/13 - loss 0.15758714 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 20:33:43,954 epoch 3 - iter 5/13 - loss 0.12796674 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,010 epoch 3 - iter 6/13 - loss 0.40207692 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,060 epoch 3 - iter 7/13 - loss 0.35731089 - samples/sec: 20.27 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,123 epoch 3 - iter 8/13 - loss 0.40199479 - samples/sec: 15.88 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,175 epoch 3 - iter 9/13 - loss 0.40063334 - samples/sec: 19.43 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,220 epoch 3 - iter 10/13 - loss 0.36129298 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,280 epoch 3 - iter 11/13 - loss 0.32962764 - samples/sec: 16.84 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,343 epoch 3 - iter 12/13 - loss 0.30966403 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,401 epoch 3 - iter 13/13 - loss 0.33768174 - samples/sec: 17.33 - lr: 0.020000\n",
+      "2021-09-21 20:33:44,402 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:44,403 EPOCH 3 done: loss 0.3377 - lr 0.0200000\n",
+      "2021-09-21 20:33:44,453 DEV : loss 0.01671626791357994 - score 0.0\n",
+      "2021-09-21 20:33:44,456 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:50:29,079 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:29,147 epoch 3 - iter 1/13 - loss 0.15682606 - samples/sec: 21.86 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,192 epoch 3 - iter 2/13 - loss 0.10081358 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,238 epoch 3 - iter 3/13 - loss 0.30137872 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,279 epoch 3 - iter 4/13 - loss 0.24020044 - samples/sec: 24.29 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,325 epoch 3 - iter 5/13 - loss 0.20927374 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,369 epoch 3 - iter 6/13 - loss 0.24494148 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,414 epoch 3 - iter 7/13 - loss 0.47843980 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,460 epoch 3 - iter 8/13 - loss 0.42635635 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,506 epoch 3 - iter 9/13 - loss 0.39204116 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,553 epoch 3 - iter 10/13 - loss 0.36119537 - samples/sec: 21.21 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,600 epoch 3 - iter 11/13 - loss 0.43550582 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,648 epoch 3 - iter 12/13 - loss 0.40709904 - samples/sec: 21.07 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,696 epoch 3 - iter 13/13 - loss 0.38529428 - samples/sec: 21.23 - lr: 0.020000\n",
-      "2021-09-08 14:50:29,698 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:29,698 EPOCH 3 done: loss 0.3853 - lr 0.0200000\n",
-      "2021-09-08 14:50:34,906 DEV : loss 0.01354705449193716 - score 0.0\n",
-      "2021-09-08 14:50:34,907 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:33:48,491 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:48,598 epoch 4 - iter 1/13 - loss 0.95170522 - samples/sec: 16.42 - lr: 0.020000\n",
+      "2021-09-21 20:33:48,664 epoch 4 - iter 2/13 - loss 0.50388925 - samples/sec: 15.40 - lr: 0.020000\n",
+      "2021-09-21 20:33:48,728 epoch 4 - iter 3/13 - loss 0.47848227 - samples/sec: 15.69 - lr: 0.020000\n",
+      "2021-09-21 20:33:48,776 epoch 4 - iter 4/13 - loss 0.36664312 - samples/sec: 20.85 - lr: 0.020000\n",
+      "2021-09-21 20:33:48,836 epoch 4 - iter 5/13 - loss 0.38164410 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 20:33:48,904 epoch 4 - iter 6/13 - loss 0.42287910 - samples/sec: 14.69 - lr: 0.020000\n",
+      "2021-09-21 20:33:48,967 epoch 4 - iter 7/13 - loss 0.38541180 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 20:33:49,024 epoch 4 - iter 8/13 - loss 0.33967769 - samples/sec: 17.55 - lr: 0.020000\n",
+      "2021-09-21 20:33:49,077 epoch 4 - iter 9/13 - loss 0.30329719 - samples/sec: 19.00 - lr: 0.020000\n",
+      "2021-09-21 20:33:49,126 epoch 4 - iter 10/13 - loss 0.27377708 - samples/sec: 20.80 - lr: 0.020000\n",
+      "2021-09-21 20:33:49,185 epoch 4 - iter 11/13 - loss 0.32236956 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:33:49,241 epoch 4 - iter 12/13 - loss 0.42007073 - samples/sec: 17.96 - lr: 0.020000\n",
+      "2021-09-21 20:33:49,307 epoch 4 - iter 13/13 - loss 0.41279131 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 20:33:49,308 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:33:49,309 EPOCH 4 done: loss 0.4128 - lr 0.0200000\n",
+      "2021-09-21 20:33:49,463 DEV : loss 0.006906591821461916 - score 0.0\n",
+      "2021-09-21 20:33:49,464 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:50:39,397 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:39,465 epoch 4 - iter 1/13 - loss 0.56539589 - samples/sec: 21.71 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,508 epoch 4 - iter 2/13 - loss 0.28784826 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,552 epoch 4 - iter 3/13 - loss 0.30695052 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,598 epoch 4 - iter 4/13 - loss 0.49268638 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,639 epoch 4 - iter 5/13 - loss 0.39704220 - samples/sec: 24.36 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,681 epoch 4 - iter 6/13 - loss 0.33114032 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,726 epoch 4 - iter 7/13 - loss 0.43133075 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,771 epoch 4 - iter 8/13 - loss 0.39841366 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,816 epoch 4 - iter 9/13 - loss 0.51062360 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,861 epoch 4 - iter 10/13 - loss 0.51833616 - samples/sec: 22.68 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,904 epoch 4 - iter 11/13 - loss 0.47310361 - samples/sec: 23.43 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,948 epoch 4 - iter 12/13 - loss 0.51298283 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,993 epoch 4 - iter 13/13 - loss 0.53701086 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:50:39,994 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:39,995 EPOCH 4 done: loss 0.5370 - lr 0.0200000\n",
-      "2021-09-08 14:50:40,029 DEV : loss 0.04750287905335426 - score 0.0\n",
-      "2021-09-08 14:50:40,030 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:50:40,032 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:40,092 epoch 5 - iter 1/13 - loss 0.02974685 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,137 epoch 5 - iter 2/13 - loss 0.22951972 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,181 epoch 5 - iter 3/13 - loss 0.26711662 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,226 epoch 5 - iter 4/13 - loss 0.21535789 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,268 epoch 5 - iter 5/13 - loss 0.17615037 - samples/sec: 24.34 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,313 epoch 5 - iter 6/13 - loss 0.16238514 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,355 epoch 5 - iter 7/13 - loss 0.13940054 - samples/sec: 24.22 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,400 epoch 5 - iter 8/13 - loss 0.14114794 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,445 epoch 5 - iter 9/13 - loss 0.16759805 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,487 epoch 5 - iter 10/13 - loss 0.15312619 - samples/sec: 24.15 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,531 epoch 5 - iter 11/13 - loss 0.17131809 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,573 epoch 5 - iter 12/13 - loss 0.15958180 - samples/sec: 24.08 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,615 epoch 5 - iter 13/13 - loss 0.14789207 - samples/sec: 24.17 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,616 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:34:04,878 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:05,004 epoch 5 - iter 1/13 - loss 0.03360986 - samples/sec: 13.26 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,083 epoch 5 - iter 2/13 - loss 0.02564097 - samples/sec: 12.61 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,148 epoch 5 - iter 3/13 - loss 0.01890484 - samples/sec: 15.71 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,213 epoch 5 - iter 4/13 - loss 0.02285769 - samples/sec: 15.35 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,305 epoch 5 - iter 5/13 - loss 0.03497587 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,366 epoch 5 - iter 6/13 - loss 0.17912098 - samples/sec: 16.29 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,443 epoch 5 - iter 7/13 - loss 0.25419051 - samples/sec: 13.09 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,528 epoch 5 - iter 8/13 - loss 0.41241646 - samples/sec: 11.88 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,607 epoch 5 - iter 9/13 - loss 0.36755634 - samples/sec: 12.65 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,680 epoch 5 - iter 10/13 - loss 0.33124002 - samples/sec: 13.84 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,759 epoch 5 - iter 11/13 - loss 0.35333714 - samples/sec: 12.80 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,818 epoch 5 - iter 12/13 - loss 0.32507841 - samples/sec: 16.87 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:50:40,616 EPOCH 5 done: loss 0.1479 - lr 0.0200000\n",
-      "2021-09-08 14:50:40,652 DEV : loss 0.3884381651878357 - score 0.0\n",
-      "2021-09-08 14:50:40,652 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:50:40,655 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:40,718 epoch 6 - iter 1/13 - loss 0.55776411 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,762 epoch 6 - iter 2/13 - loss 0.61194235 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,804 epoch 6 - iter 3/13 - loss 0.40827564 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,850 epoch 6 - iter 4/13 - loss 0.31559683 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,895 epoch 6 - iter 5/13 - loss 0.26783698 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,941 epoch 6 - iter 6/13 - loss 0.34212798 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:50:40,986 epoch 6 - iter 7/13 - loss 0.32414104 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,031 epoch 6 - iter 8/13 - loss 0.41293000 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,076 epoch 6 - iter 9/13 - loss 0.41338110 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,121 epoch 6 - iter 10/13 - loss 0.45609726 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,164 epoch 6 - iter 11/13 - loss 0.41476520 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,206 epoch 6 - iter 12/13 - loss 0.38075996 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,248 epoch 6 - iter 13/13 - loss 0.35159116 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,249 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:41,249 EPOCH 6 done: loss 0.3516 - lr 0.0200000\n",
-      "2021-09-08 14:50:41,286 DEV : loss 0.37153923511505127 - score 0.0\n",
-      "2021-09-08 14:50:41,286 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:50:41,307 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:41,368 epoch 7 - iter 1/13 - loss 0.00653216 - samples/sec: 23.68 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,413 epoch 7 - iter 2/13 - loss 0.66014294 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,455 epoch 7 - iter 3/13 - loss 0.44197274 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,498 epoch 7 - iter 4/13 - loss 0.33241532 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,543 epoch 7 - iter 5/13 - loss 0.34907948 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,585 epoch 7 - iter 6/13 - loss 0.29126792 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,630 epoch 7 - iter 7/13 - loss 0.28520386 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,672 epoch 7 - iter 8/13 - loss 0.25903677 - samples/sec: 24.14 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,715 epoch 7 - iter 9/13 - loss 0.23067754 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,759 epoch 7 - iter 10/13 - loss 0.21315624 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,805 epoch 7 - iter 11/13 - loss 0.21465295 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,850 epoch 7 - iter 12/13 - loss 0.27020814 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,895 epoch 7 - iter 13/13 - loss 0.26478886 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 14:50:41,896 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:41,897 EPOCH 7 done: loss 0.2648 - lr 0.0200000\n",
-      "2021-09-08 14:50:42,016 DEV : loss 0.0007817443110980093 - score 0.0\n",
-      "2021-09-08 14:50:42,016 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:34:05,870 epoch 5 - iter 13/13 - loss 0.31163189 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 20:34:05,871 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:05,871 EPOCH 5 done: loss 0.3116 - lr 0.0200000\n",
+      "2021-09-21 20:34:05,946 DEV : loss 0.007367831654846668 - score 0.0\n",
+      "2021-09-21 20:34:05,946 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:05,950 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:06,056 epoch 6 - iter 1/13 - loss 0.01570294 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,136 epoch 6 - iter 2/13 - loss 0.19312874 - samples/sec: 12.51 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,228 epoch 6 - iter 3/13 - loss 0.12967076 - samples/sec: 10.97 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,298 epoch 6 - iter 4/13 - loss 0.09778721 - samples/sec: 14.24 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,379 epoch 6 - iter 5/13 - loss 0.08222722 - samples/sec: 12.40 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,462 epoch 6 - iter 6/13 - loss 0.08458346 - samples/sec: 12.24 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,547 epoch 6 - iter 7/13 - loss 0.08556481 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,619 epoch 6 - iter 8/13 - loss 0.11239333 - samples/sec: 13.81 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,691 epoch 6 - iter 9/13 - loss 0.12501224 - samples/sec: 14.07 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,750 epoch 6 - iter 10/13 - loss 0.12005335 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,823 epoch 6 - iter 11/13 - loss 0.24038169 - samples/sec: 13.78 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,907 epoch 6 - iter 12/13 - loss 0.22144403 - samples/sec: 11.87 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,990 epoch 6 - iter 13/13 - loss 0.20624749 - samples/sec: 12.23 - lr: 0.020000\n",
+      "2021-09-21 20:34:06,991 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:06,991 EPOCH 6 done: loss 0.2062 - lr 0.0200000\n",
+      "2021-09-21 20:34:07,106 DEV : loss 0.027820130810141563 - score 0.0\n",
+      "2021-09-21 20:34:07,110 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:34:07,113 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:07,299 epoch 7 - iter 1/13 - loss 0.00209309 - samples/sec: 15.48 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,369 epoch 7 - iter 2/13 - loss 0.00248614 - samples/sec: 14.38 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,458 epoch 7 - iter 3/13 - loss 0.00442498 - samples/sec: 11.22 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,537 epoch 7 - iter 4/13 - loss 0.00556698 - samples/sec: 12.78 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,611 epoch 7 - iter 5/13 - loss 0.11744435 - samples/sec: 13.62 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,673 epoch 7 - iter 6/13 - loss 0.24899553 - samples/sec: 16.22 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,734 epoch 7 - iter 7/13 - loss 0.21373352 - samples/sec: 16.45 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,823 epoch 7 - iter 8/13 - loss 0.18773423 - samples/sec: 11.28 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,889 epoch 7 - iter 9/13 - loss 0.25213574 - samples/sec: 15.27 - lr: 0.020000\n",
+      "2021-09-21 20:34:07,966 epoch 7 - iter 10/13 - loss 0.27661082 - samples/sec: 13.00 - lr: 0.020000\n",
+      "2021-09-21 20:34:08,026 epoch 7 - iter 11/13 - loss 0.25675290 - samples/sec: 16.90 - lr: 0.020000\n",
+      "2021-09-21 20:34:08,093 epoch 7 - iter 12/13 - loss 0.23781945 - samples/sec: 14.94 - lr: 0.020000\n",
+      "2021-09-21 20:34:08,190 epoch 7 - iter 13/13 - loss 0.21957486 - samples/sec: 10.40 - lr: 0.020000\n",
+      "2021-09-21 20:34:08,191 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:08,191 EPOCH 7 done: loss 0.2196 - lr 0.0200000\n",
+      "2021-09-21 20:34:08,337 DEV : loss 0.004303127061575651 - score 0.0\n",
+      "2021-09-21 20:34:08,338 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:34:12,039 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:12,137 epoch 8 - iter 1/13 - loss 0.60193068 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,192 epoch 8 - iter 2/13 - loss 0.30617303 - samples/sec: 18.62 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,255 epoch 8 - iter 3/13 - loss 0.36450973 - samples/sec: 15.96 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,311 epoch 8 - iter 4/13 - loss 0.27514301 - samples/sec: 17.97 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,372 epoch 8 - iter 5/13 - loss 0.22158511 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,427 epoch 8 - iter 6/13 - loss 0.18514938 - samples/sec: 18.58 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,492 epoch 8 - iter 7/13 - loss 0.25720857 - samples/sec: 15.41 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,551 epoch 8 - iter 8/13 - loss 0.22691418 - samples/sec: 17.09 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,621 epoch 8 - iter 9/13 - loss 0.21216935 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,681 epoch 8 - iter 10/13 - loss 0.31122522 - samples/sec: 16.85 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,729 epoch 8 - iter 11/13 - loss 0.28319985 - samples/sec: 20.94 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,792 epoch 8 - iter 12/13 - loss 0.25999958 - samples/sec: 16.01 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,850 epoch 8 - iter 13/13 - loss 0.24020601 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:34:12,851 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:12,851 EPOCH 8 done: loss 0.2402 - lr 0.0200000\n",
+      "2021-09-21 20:34:13,011 DEV : loss 0.010792789049446583 - score 0.0\n",
+      "2021-09-21 20:34:13,013 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:34:13,089 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:13,169 epoch 9 - iter 1/13 - loss 0.30276442 - samples/sec: 20.75 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,214 epoch 9 - iter 2/13 - loss 0.15476643 - samples/sec: 22.78 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,258 epoch 9 - iter 3/13 - loss 0.10375920 - samples/sec: 22.92 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,303 epoch 9 - iter 4/13 - loss 0.07850310 - samples/sec: 22.58 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,347 epoch 9 - iter 5/13 - loss 0.06329755 - samples/sec: 22.97 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,395 epoch 9 - iter 6/13 - loss 0.10625833 - samples/sec: 21.03 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,439 epoch 9 - iter 7/13 - loss 0.09118723 - samples/sec: 22.69 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,488 epoch 9 - iter 8/13 - loss 0.09536337 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,532 epoch 9 - iter 9/13 - loss 0.08481347 - samples/sec: 22.70 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,578 epoch 9 - iter 10/13 - loss 0.07638187 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,624 epoch 9 - iter 11/13 - loss 0.06968737 - samples/sec: 22.08 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,672 epoch 9 - iter 12/13 - loss 0.07824078 - samples/sec: 20.78 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,720 epoch 9 - iter 13/13 - loss 0.10711858 - samples/sec: 21.28 - lr: 0.020000\n",
+      "2021-09-21 20:34:13,721 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:13,721 EPOCH 9 done: loss 0.1071 - lr 0.0200000\n",
+      "2021-09-21 20:34:13,880 DEV : loss 0.002103513805195689 - score 0.0\n",
+      "2021-09-21 20:34:13,881 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:50:48,244 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:48,313 epoch 8 - iter 1/13 - loss 0.38903001 - samples/sec: 21.80 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,356 epoch 8 - iter 2/13 - loss 0.19776285 - samples/sec: 23.53 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,399 epoch 8 - iter 3/13 - loss 0.13893249 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,444 epoch 8 - iter 4/13 - loss 0.13292617 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,489 epoch 8 - iter 5/13 - loss 0.17372032 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,535 epoch 8 - iter 6/13 - loss 0.18696926 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,577 epoch 8 - iter 7/13 - loss 0.16110856 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,619 epoch 8 - iter 8/13 - loss 0.14149928 - samples/sec: 24.00 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,665 epoch 8 - iter 9/13 - loss 0.22592618 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,707 epoch 8 - iter 10/13 - loss 0.20625746 - samples/sec: 24.07 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,752 epoch 8 - iter 11/13 - loss 0.28440542 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,798 epoch 8 - iter 12/13 - loss 0.27370410 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,840 epoch 8 - iter 13/13 - loss 0.25276040 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:50:48,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:48,841 EPOCH 8 done: loss 0.2528 - lr 0.0200000\n",
-      "2021-09-08 14:50:49,963 DEV : loss 0.03685006871819496 - score 0.0\n",
-      "2021-09-08 14:50:49,964 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:50:49,999 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:50,062 epoch 9 - iter 1/13 - loss 0.01399942 - samples/sec: 24.16 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,104 epoch 9 - iter 2/13 - loss 0.00867366 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,146 epoch 9 - iter 3/13 - loss 0.00597398 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,188 epoch 9 - iter 4/13 - loss 0.00468872 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,234 epoch 9 - iter 5/13 - loss 0.03216480 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,279 epoch 9 - iter 6/13 - loss 0.10164220 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,321 epoch 9 - iter 7/13 - loss 0.08722269 - samples/sec: 23.72 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,363 epoch 9 - iter 8/13 - loss 0.07699338 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,409 epoch 9 - iter 9/13 - loss 0.26785311 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,454 epoch 9 - iter 10/13 - loss 0.42164006 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,496 epoch 9 - iter 11/13 - loss 0.38417367 - samples/sec: 23.74 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,542 epoch 9 - iter 12/13 - loss 0.36403762 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,584 epoch 9 - iter 13/13 - loss 0.33625274 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,585 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:50,586 EPOCH 9 done: loss 0.3363 - lr 0.0200000\n",
-      "2021-09-08 14:50:50,621 DEV : loss 0.0018394481157884002 - score 0.0\n",
-      "2021-09-08 14:50:50,622 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:50:50,627 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:50,693 epoch 10 - iter 1/13 - loss 0.40753707 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,735 epoch 10 - iter 2/13 - loss 0.20627700 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,780 epoch 10 - iter 3/13 - loss 0.26741309 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,822 epoch 10 - iter 4/13 - loss 0.20332575 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,868 epoch 10 - iter 5/13 - loss 0.20211363 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,910 epoch 10 - iter 6/13 - loss 0.16856211 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 14:50:50,956 epoch 10 - iter 7/13 - loss 0.16575886 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 14:50:51,001 epoch 10 - iter 8/13 - loss 0.20029440 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:50:51,043 epoch 10 - iter 9/13 - loss 0.17880853 - samples/sec: 23.78 - lr: 0.020000\n",
-      "2021-09-08 14:50:51,085 epoch 10 - iter 10/13 - loss 0.16110501 - samples/sec: 23.83 - lr: 0.020000\n",
-      "2021-09-08 14:50:51,131 epoch 10 - iter 11/13 - loss 0.15177989 - samples/sec: 22.16 - lr: 0.020000\n"
+      "2021-09-21 20:34:19,643 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:19,715 epoch 10 - iter 1/13 - loss 2.13901973 - samples/sec: 20.27 - lr: 0.020000\n",
+      "2021-09-21 20:34:19,760 epoch 10 - iter 2/13 - loss 1.07205289 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 20:34:19,805 epoch 10 - iter 3/13 - loss 0.73392167 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 20:34:19,849 epoch 10 - iter 4/13 - loss 0.55057606 - samples/sec: 23.08 - lr: 0.020000\n",
+      "2021-09-21 20:34:19,896 epoch 10 - iter 5/13 - loss 0.54195670 - samples/sec: 21.25 - lr: 0.020000\n",
+      "2021-09-21 20:34:19,943 epoch 10 - iter 6/13 - loss 0.54850250 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:34:19,990 epoch 10 - iter 7/13 - loss 0.47348767 - samples/sec: 21.24 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,034 epoch 10 - iter 8/13 - loss 0.41458443 - samples/sec: 23.26 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:50:51,173 epoch 10 - iter 12/13 - loss 0.13926024 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:50:51,215 epoch 10 - iter 13/13 - loss 0.12941200 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 14:50:51,216 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:51,216 EPOCH 10 done: loss 0.1294 - lr 0.0200000\n",
-      "2021-09-08 14:50:51,252 DEV : loss 0.0012190331472083926 - score 0.0\n",
-      "2021-09-08 14:50:51,253 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:50:55,329 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:50:55,329 Testing using best model ...\n",
-      "2021-09-08 14:50:55,331 loading file None1/best-model.pt\n",
+      "2021-09-21 20:34:20,078 epoch 10 - iter 9/13 - loss 0.36962798 - samples/sec: 22.88 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,122 epoch 10 - iter 10/13 - loss 0.33309179 - samples/sec: 22.98 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,169 epoch 10 - iter 11/13 - loss 0.32011130 - samples/sec: 21.35 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,213 epoch 10 - iter 12/13 - loss 0.29355622 - samples/sec: 22.95 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,257 epoch 10 - iter 13/13 - loss 0.27101262 - samples/sec: 23.01 - lr: 0.020000\n",
+      "2021-09-21 20:34:20,258 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:20,258 EPOCH 10 done: loss 0.2710 - lr 0.0200000\n",
+      "2021-09-21 20:34:20,300 DEV : loss 0.001620881026610732 - score 0.0\n",
+      "2021-09-21 20:34:20,300 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:34:46,146 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:34:46,147 Testing using best model ...\n",
+      "2021-09-21 20:34:46,149 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:50:59,753 \t0.5\n",
-      "2021-09-08 14:50:59,753 \n",
+      "2021-09-21 20:34:52,017 \t0.0\n",
+      "2021-09-21 20:34:52,018 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.0667\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
       "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
-      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
       "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         0\n",
       "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
       " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
@@ -6811,38 +6840,39 @@
       "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         0\n",
       "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
       "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
       "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
+      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
       "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
-      "                                                                                           a particular branch of scientific knowledge     1.0000    1.0000    1.0000         1\n",
-      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         1\n",
+      "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
+      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         1\n",
+      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                                                                             micro avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                             macro avg     0.0667    0.0667    0.0667         2\n",
-      "                                                                                                                          weighted avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                           samples avg     0.5000    0.5000    0.5000         2\n",
+      "                                                                                                                             micro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                             macro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                          weighted avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                           samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 14:50:59,753 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:11,296 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:34:52,018 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:18,669 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:51:15,114 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:35:22,989 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 49971.85it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 47698.68it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:51:15,115 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'a particular branch of scientific knowledge']\n",
-      "2021-09-08 14:51:15,124 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,126 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:35:22,991 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'the latest and most admired style in clothes and cosmetics and behavior', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 20:35:23,000 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:23,001 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7155,28 +7185,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:51:15,126 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,127 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 14:51:15,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,127 Parameters:\n",
-      "2021-09-08 14:51:15,127  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:51:15,128  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:51:15,128  - patience: \"3\"\n",
-      "2021-09-08 14:51:15,128  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:51:15,128  - max_epochs: \"10\"\n",
-      "2021-09-08 14:51:15,129  - shuffle: \"True\"\n",
-      "2021-09-08 14:51:15,129  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:51:15,129  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:51:15,130 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,130 Model training base path: \"None1\"\n",
-      "2021-09-08 14:51:15,130 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,130 Device: cuda:0\n",
-      "2021-09-08 14:51:15,131 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,131 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:51:15,137 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,197 epoch 1 - iter 1/13 - loss 0.68661505 - samples/sec: 26.58 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,242 epoch 1 - iter 2/13 - loss 0.77726534 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,287 epoch 1 - iter 3/13 - loss 0.82651387 - samples/sec: 22.35 - lr: 0.020000\n"
+      "2021-09-21 20:35:23,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:23,002 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:35:23,003 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:23,003 Parameters:\n",
+      "2021-09-21 20:35:23,003  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:35:23,004  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:35:23,004  - patience: \"3\"\n",
+      "2021-09-21 20:35:23,004  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:35:23,004  - max_epochs: \"10\"\n",
+      "2021-09-21 20:35:23,005  - shuffle: \"True\"\n",
+      "2021-09-21 20:35:23,005  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:35:23,005  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:35:23,005 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:23,006 Model training base path: \"None1\"\n",
+      "2021-09-21 20:35:23,006 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:23,006 Device: cuda:0\n",
+      "2021-09-21 20:35:23,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:23,007 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:35:23,013 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:23,146 epoch 1 - iter 1/13 - loss 0.74711537 - samples/sec: 20.52 - lr: 0.020000\n"
      ]
     },
     {
@@ -7190,210 +7218,210 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:51:15,332 epoch 1 - iter 4/13 - loss 0.79605581 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,377 epoch 1 - iter 5/13 - loss 0.74942794 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,422 epoch 1 - iter 6/13 - loss 0.77993434 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,467 epoch 1 - iter 7/13 - loss 0.71909027 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,512 epoch 1 - iter 8/13 - loss 0.66715018 - samples/sec: 22.58 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,557 epoch 1 - iter 9/13 - loss 0.63092638 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,601 epoch 1 - iter 10/13 - loss 0.63087007 - samples/sec: 22.59 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,643 epoch 1 - iter 11/13 - loss 0.57530607 - samples/sec: 24.00 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,688 epoch 1 - iter 12/13 - loss 0.73930436 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,733 epoch 1 - iter 13/13 - loss 0.74403605 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:51:15,734 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:15,735 EPOCH 1 done: loss 0.7440 - lr 0.0200000\n",
-      "2021-09-08 14:51:15,768 DEV : loss 0.38144734501838684 - score 0.0\n",
-      "2021-09-08 14:51:15,769 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:35:23,207 epoch 1 - iter 2/13 - loss 0.84255174 - samples/sec: 16.48 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,258 epoch 1 - iter 3/13 - loss 0.81737634 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,333 epoch 1 - iter 4/13 - loss 0.96294522 - samples/sec: 13.38 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,426 epoch 1 - iter 5/13 - loss 0.89145590 - samples/sec: 10.87 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,522 epoch 1 - iter 6/13 - loss 0.81532804 - samples/sec: 10.41 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,578 epoch 1 - iter 7/13 - loss 0.83003194 - samples/sec: 18.12 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,647 epoch 1 - iter 8/13 - loss 0.77066336 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,752 epoch 1 - iter 9/13 - loss 0.73632216 - samples/sec: 9.60 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,838 epoch 1 - iter 10/13 - loss 0.68471608 - samples/sec: 11.65 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,901 epoch 1 - iter 11/13 - loss 0.68683577 - samples/sec: 16.18 - lr: 0.020000\n",
+      "2021-09-21 20:35:23,957 epoch 1 - iter 12/13 - loss 0.63308225 - samples/sec: 17.77 - lr: 0.020000\n",
+      "2021-09-21 20:35:24,054 epoch 1 - iter 13/13 - loss 0.63145043 - samples/sec: 10.43 - lr: 0.020000\n",
+      "2021-09-21 20:35:24,055 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:24,055 EPOCH 1 done: loss 0.6315 - lr 0.0200000\n",
+      "2021-09-21 20:35:24,185 DEV : loss 0.9946970343589783 - score 0.0\n",
+      "2021-09-21 20:35:24,185 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:51:20,096 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:20,165 epoch 2 - iter 1/13 - loss 0.64765841 - samples/sec: 21.75 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,210 epoch 2 - iter 2/13 - loss 0.61371413 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,252 epoch 2 - iter 3/13 - loss 0.41709630 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,297 epoch 2 - iter 4/13 - loss 0.35077677 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,342 epoch 2 - iter 5/13 - loss 0.41800383 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,387 epoch 2 - iter 6/13 - loss 0.42442273 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,432 epoch 2 - iter 7/13 - loss 0.40745954 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,477 epoch 2 - iter 8/13 - loss 0.50855851 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,522 epoch 2 - iter 9/13 - loss 0.53993866 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,567 epoch 2 - iter 10/13 - loss 0.56493332 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,612 epoch 2 - iter 11/13 - loss 0.57127010 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,657 epoch 2 - iter 12/13 - loss 0.63621127 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,702 epoch 2 - iter 13/13 - loss 0.62670920 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:51:20,703 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:20,703 EPOCH 2 done: loss 0.6267 - lr 0.0200000\n",
-      "2021-09-08 14:51:20,738 DEV : loss 0.2849750816822052 - score 0.0\n",
-      "2021-09-08 14:51:20,739 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:35:28,398 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:28,615 epoch 2 - iter 1/13 - loss 0.52277672 - samples/sec: 9.45 - lr: 0.020000\n",
+      "2021-09-21 20:35:28,682 epoch 2 - iter 2/13 - loss 1.08050740 - samples/sec: 15.24 - lr: 0.020000\n",
+      "2021-09-21 20:35:28,762 epoch 2 - iter 3/13 - loss 0.80720270 - samples/sec: 12.46 - lr: 0.020000\n",
+      "2021-09-21 20:35:28,833 epoch 2 - iter 4/13 - loss 0.81436412 - samples/sec: 14.33 - lr: 0.020000\n",
+      "2021-09-21 20:35:28,935 epoch 2 - iter 5/13 - loss 0.79136348 - samples/sec: 9.77 - lr: 0.020000\n",
+      "2021-09-21 20:35:28,995 epoch 2 - iter 6/13 - loss 0.74735510 - samples/sec: 16.79 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,062 epoch 2 - iter 7/13 - loss 0.72889143 - samples/sec: 15.09 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,148 epoch 2 - iter 8/13 - loss 0.71052910 - samples/sec: 11.69 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,219 epoch 2 - iter 9/13 - loss 0.68246070 - samples/sec: 14.26 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,301 epoch 2 - iter 10/13 - loss 0.67601449 - samples/sec: 12.14 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,367 epoch 2 - iter 11/13 - loss 0.61664429 - samples/sec: 15.38 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,427 epoch 2 - iter 12/13 - loss 0.59162271 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,497 epoch 2 - iter 13/13 - loss 0.63530425 - samples/sec: 14.48 - lr: 0.020000\n",
+      "2021-09-21 20:35:29,498 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:29,498 EPOCH 2 done: loss 0.6353 - lr 0.0200000\n",
+      "2021-09-21 20:35:29,579 DEV : loss 0.6593390703201294 - score 0.0\n",
+      "2021-09-21 20:35:29,584 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:51:25,127 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:25,193 epoch 3 - iter 1/13 - loss 0.03730739 - samples/sec: 23.36 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,238 epoch 3 - iter 2/13 - loss 0.29183552 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,283 epoch 3 - iter 3/13 - loss 0.39211327 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,328 epoch 3 - iter 4/13 - loss 0.42131937 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,373 epoch 3 - iter 5/13 - loss 0.47011012 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,418 epoch 3 - iter 6/13 - loss 0.43109335 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,464 epoch 3 - iter 7/13 - loss 0.41066599 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,505 epoch 3 - iter 8/13 - loss 0.36542325 - samples/sec: 24.33 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,550 epoch 3 - iter 9/13 - loss 0.47520322 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,595 epoch 3 - iter 10/13 - loss 0.47081828 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,640 epoch 3 - iter 11/13 - loss 0.49777248 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,686 epoch 3 - iter 12/13 - loss 0.52748023 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,731 epoch 3 - iter 13/13 - loss 0.55785849 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:51:25,731 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:25,732 EPOCH 3 done: loss 0.5579 - lr 0.0200000\n",
-      "2021-09-08 14:51:25,767 DEV : loss 0.23584823310375214 - score 0.0\n",
-      "2021-09-08 14:51:25,767 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:35:33,578 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:33,699 epoch 3 - iter 1/13 - loss 0.56840283 - samples/sec: 11.42 - lr: 0.020000\n",
+      "2021-09-21 20:35:33,790 epoch 3 - iter 2/13 - loss 0.58993205 - samples/sec: 10.98 - lr: 0.020000\n",
+      "2021-09-21 20:35:33,875 epoch 3 - iter 3/13 - loss 0.48860418 - samples/sec: 11.83 - lr: 0.020000\n",
+      "2021-09-21 20:35:33,946 epoch 3 - iter 4/13 - loss 0.37012939 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,013 epoch 3 - iter 5/13 - loss 0.57740072 - samples/sec: 14.99 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,070 epoch 3 - iter 6/13 - loss 0.60484683 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,134 epoch 3 - iter 7/13 - loss 0.60706739 - samples/sec: 15.52 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,206 epoch 3 - iter 8/13 - loss 0.59403423 - samples/sec: 14.02 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,259 epoch 3 - iter 9/13 - loss 0.58889108 - samples/sec: 19.21 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,310 epoch 3 - iter 10/13 - loss 0.59586236 - samples/sec: 19.64 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,372 epoch 3 - iter 11/13 - loss 0.58956208 - samples/sec: 16.24 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,440 epoch 3 - iter 12/13 - loss 0.57308094 - samples/sec: 14.74 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,530 epoch 3 - iter 13/13 - loss 0.56871363 - samples/sec: 11.17 - lr: 0.020000\n",
+      "2021-09-21 20:35:34,531 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:34,532 EPOCH 3 done: loss 0.5687 - lr 0.0200000\n",
+      "2021-09-21 20:35:34,641 DEV : loss 0.4673175513744354 - score 0.0\n",
+      "2021-09-21 20:35:34,646 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:51:29,970 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:30,039 epoch 4 - iter 1/13 - loss 0.40367723 - samples/sec: 21.62 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,085 epoch 4 - iter 2/13 - loss 0.39466113 - samples/sec: 21.98 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,130 epoch 4 - iter 3/13 - loss 0.32733099 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,175 epoch 4 - iter 4/13 - loss 0.27957547 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,221 epoch 4 - iter 5/13 - loss 0.32563851 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,266 epoch 4 - iter 6/13 - loss 0.40058965 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,311 epoch 4 - iter 7/13 - loss 0.35334099 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,356 epoch 4 - iter 8/13 - loss 0.36512873 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,402 epoch 4 - iter 9/13 - loss 0.38342016 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,447 epoch 4 - iter 10/13 - loss 0.53845688 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,493 epoch 4 - iter 11/13 - loss 0.49325939 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,538 epoch 4 - iter 12/13 - loss 0.48912774 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,585 epoch 4 - iter 13/13 - loss 0.46960087 - samples/sec: 21.54 - lr: 0.020000\n",
-      "2021-09-08 14:51:30,586 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:30,586 EPOCH 4 done: loss 0.4696 - lr 0.0200000\n",
-      "2021-09-08 14:51:30,620 DEV : loss 0.13555359840393066 - score 0.0\n",
-      "2021-09-08 14:51:30,620 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:51:34,768 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:34,836 epoch 5 - iter 1/13 - loss 1.59450305 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 14:51:34,881 epoch 5 - iter 2/13 - loss 0.86586213 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 14:51:34,927 epoch 5 - iter 3/13 - loss 0.64625369 - samples/sec: 22.06 - lr: 0.020000\n",
-      "2021-09-08 14:51:34,972 epoch 5 - iter 4/13 - loss 0.50301286 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,018 epoch 5 - iter 5/13 - loss 0.43381286 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,060 epoch 5 - iter 6/13 - loss 0.36196779 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,101 epoch 5 - iter 7/13 - loss 0.31203301 - samples/sec: 24.22 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,147 epoch 5 - iter 8/13 - loss 0.32930447 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,188 epoch 5 - iter 9/13 - loss 0.29460993 - samples/sec: 24.18 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,232 epoch 5 - iter 10/13 - loss 0.26596378 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,274 epoch 5 - iter 11/13 - loss 0.24206674 - samples/sec: 24.17 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,319 epoch 5 - iter 12/13 - loss 0.42743158 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,361 epoch 5 - iter 13/13 - loss 0.39699164 - samples/sec: 24.18 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,362 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:35,362 EPOCH 5 done: loss 0.3970 - lr 0.0200000\n",
-      "2021-09-08 14:51:35,398 DEV : loss 0.24257546663284302 - score 0.0\n",
-      "2021-09-08 14:51:35,399 BAD EPOCHS (no improvement): 1\n"
+      "2021-09-21 20:35:38,703 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:38,806 epoch 4 - iter 1/13 - loss 0.56763554 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 20:35:38,866 epoch 4 - iter 2/13 - loss 0.39222419 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 20:35:38,927 epoch 4 - iter 3/13 - loss 0.32713516 - samples/sec: 16.55 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,004 epoch 4 - iter 4/13 - loss 0.29438768 - samples/sec: 12.99 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,058 epoch 4 - iter 5/13 - loss 0.24584210 - samples/sec: 18.84 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,115 epoch 4 - iter 6/13 - loss 0.20633589 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,162 epoch 4 - iter 7/13 - loss 0.18113560 - samples/sec: 21.56 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,236 epoch 4 - iter 8/13 - loss 0.28579240 - samples/sec: 13.56 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,298 epoch 4 - iter 9/13 - loss 0.27619412 - samples/sec: 16.31 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,373 epoch 4 - iter 10/13 - loss 0.26311323 - samples/sec: 13.33 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,432 epoch 4 - iter 11/13 - loss 0.26672905 - samples/sec: 17.22 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,482 epoch 4 - iter 12/13 - loss 0.31457553 - samples/sec: 19.96 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,546 epoch 4 - iter 13/13 - loss 0.33153736 - samples/sec: 15.74 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,547 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:39,548 EPOCH 4 done: loss 0.3315 - lr 0.0200000\n",
+      "2021-09-21 20:35:39,602 DEV : loss 0.5264644026756287 - score 0.0\n",
+      "2021-09-21 20:35:39,603 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:35:39,605 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:39,729 epoch 5 - iter 1/13 - loss 0.22542036 - samples/sec: 14.30 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,792 epoch 5 - iter 2/13 - loss 0.35156138 - samples/sec: 15.83 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,849 epoch 5 - iter 3/13 - loss 0.57283331 - samples/sec: 17.74 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,906 epoch 5 - iter 4/13 - loss 0.50385460 - samples/sec: 17.62 - lr: 0.020000\n",
+      "2021-09-21 20:35:39,969 epoch 5 - iter 5/13 - loss 0.40924743 - samples/sec: 16.09 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,043 epoch 5 - iter 6/13 - loss 0.37443838 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,097 epoch 5 - iter 7/13 - loss 0.32180200 - samples/sec: 18.49 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,153 epoch 5 - iter 8/13 - loss 0.28910128 - samples/sec: 18.14 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,216 epoch 5 - iter 9/13 - loss 0.25807787 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,285 epoch 5 - iter 10/13 - loss 0.38884532 - samples/sec: 14.67 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,357 epoch 5 - iter 11/13 - loss 0.49964093 - samples/sec: 14.00 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,413 epoch 5 - iter 12/13 - loss 0.48861736 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,474 epoch 5 - iter 13/13 - loss 0.45303268 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,475 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:51:35,401 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:35,461 epoch 6 - iter 1/13 - loss 0.01634724 - samples/sec: 24.15 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,506 epoch 6 - iter 2/13 - loss 0.51793191 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,548 epoch 6 - iter 3/13 - loss 0.34813260 - samples/sec: 24.15 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,590 epoch 6 - iter 4/13 - loss 0.26149918 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,635 epoch 6 - iter 5/13 - loss 0.26773097 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,677 epoch 6 - iter 6/13 - loss 0.22403416 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,719 epoch 6 - iter 7/13 - loss 0.19320762 - samples/sec: 24.12 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,764 epoch 6 - iter 8/13 - loss 0.19745377 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,806 epoch 6 - iter 9/13 - loss 0.17616955 - samples/sec: 24.04 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,848 epoch 6 - iter 10/13 - loss 0.15901662 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,893 epoch 6 - iter 11/13 - loss 0.24820815 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,939 epoch 6 - iter 12/13 - loss 0.23536142 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,985 epoch 6 - iter 13/13 - loss 0.26590741 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:51:35,986 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:35,986 EPOCH 6 done: loss 0.2659 - lr 0.0200000\n",
-      "2021-09-08 14:51:36,020 DEV : loss 0.01208802405744791 - score 0.0\n",
-      "2021-09-08 14:51:36,020 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:51:40,425 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:40,494 epoch 7 - iter 1/13 - loss 0.18901850 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,536 epoch 7 - iter 2/13 - loss 0.09602380 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,581 epoch 7 - iter 3/13 - loss 0.46888666 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,624 epoch 7 - iter 4/13 - loss 0.35221791 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,666 epoch 7 - iter 5/13 - loss 0.28220271 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,711 epoch 7 - iter 6/13 - loss 0.30536427 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,753 epoch 7 - iter 7/13 - loss 0.26224382 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,795 epoch 7 - iter 8/13 - loss 0.23119256 - samples/sec: 24.19 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,837 epoch 7 - iter 9/13 - loss 0.20557327 - samples/sec: 23.87 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,879 epoch 7 - iter 10/13 - loss 0.18513623 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,922 epoch 7 - iter 11/13 - loss 0.17109454 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 14:51:40,964 epoch 7 - iter 12/13 - loss 0.15750057 - samples/sec: 24.00 - lr: 0.020000\n",
-      "2021-09-08 14:51:41,006 epoch 7 - iter 13/13 - loss 0.14564304 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:51:41,007 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:41,008 EPOCH 7 done: loss 0.1456 - lr 0.0200000\n",
-      "2021-09-08 14:51:41,041 DEV : loss 0.01123783364892006 - score 0.0\n",
-      "2021-09-08 14:51:41,042 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:35:40,476 EPOCH 5 done: loss 0.4530 - lr 0.0200000\n",
+      "2021-09-21 20:35:40,579 DEV : loss 0.9004479646682739 - score 0.0\n",
+      "2021-09-21 20:35:40,580 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:35:40,582 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:40,680 epoch 6 - iter 1/13 - loss 0.34169617 - samples/sec: 12.95 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,742 epoch 6 - iter 2/13 - loss 0.86154641 - samples/sec: 16.12 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,815 epoch 6 - iter 3/13 - loss 0.63522994 - samples/sec: 13.76 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,875 epoch 6 - iter 4/13 - loss 0.54057395 - samples/sec: 16.91 - lr: 0.020000\n",
+      "2021-09-21 20:35:40,947 epoch 6 - iter 5/13 - loss 0.47029561 - samples/sec: 13.97 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,004 epoch 6 - iter 6/13 - loss 0.39257908 - samples/sec: 17.71 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,059 epoch 6 - iter 7/13 - loss 0.46740279 - samples/sec: 18.17 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,120 epoch 6 - iter 8/13 - loss 0.41163393 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,185 epoch 6 - iter 9/13 - loss 0.37372694 - samples/sec: 15.49 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,245 epoch 6 - iter 10/13 - loss 0.34227606 - samples/sec: 17.05 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,301 epoch 6 - iter 11/13 - loss 0.31142720 - samples/sec: 17.87 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,367 epoch 6 - iter 12/13 - loss 0.37071067 - samples/sec: 15.17 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,426 epoch 6 - iter 13/13 - loss 0.35606765 - samples/sec: 17.16 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,427 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:41,427 EPOCH 6 done: loss 0.3561 - lr 0.0200000\n",
+      "2021-09-21 20:35:41,506 DEV : loss 1.1533644199371338 - score 0.0\n",
+      "2021-09-21 20:35:41,508 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:35:41,510 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:41,631 epoch 7 - iter 1/13 - loss 0.09179146 - samples/sec: 14.61 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,688 epoch 7 - iter 2/13 - loss 0.07430304 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,744 epoch 7 - iter 3/13 - loss 0.35595459 - samples/sec: 18.18 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,795 epoch 7 - iter 4/13 - loss 0.28998970 - samples/sec: 19.71 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,856 epoch 7 - iter 5/13 - loss 0.23248726 - samples/sec: 16.46 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,923 epoch 7 - iter 6/13 - loss 0.33278748 - samples/sec: 15.10 - lr: 0.020000\n",
+      "2021-09-21 20:35:41,992 epoch 7 - iter 7/13 - loss 0.29486175 - samples/sec: 14.49 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,055 epoch 7 - iter 8/13 - loss 0.32672637 - samples/sec: 16.07 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,111 epoch 7 - iter 9/13 - loss 0.29722900 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,171 epoch 7 - iter 10/13 - loss 0.26771413 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,231 epoch 7 - iter 11/13 - loss 0.25029558 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,311 epoch 7 - iter 12/13 - loss 0.22958575 - samples/sec: 12.62 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,391 epoch 7 - iter 13/13 - loss 0.22338198 - samples/sec: 12.55 - lr: 0.020000\n",
+      "2021-09-21 20:35:42,392 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:42,393 EPOCH 7 done: loss 0.2234 - lr 0.0200000\n",
+      "2021-09-21 20:35:42,502 DEV : loss 0.23506492376327515 - score 0.0\n",
+      "2021-09-21 20:35:42,504 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:51:45,981 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:46,046 epoch 8 - iter 1/13 - loss 0.00064147 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,092 epoch 8 - iter 2/13 - loss 0.04772082 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,137 epoch 8 - iter 3/13 - loss 0.68213556 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,179 epoch 8 - iter 4/13 - loss 0.51331070 - samples/sec: 24.15 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,224 epoch 8 - iter 5/13 - loss 0.41748890 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,270 epoch 8 - iter 6/13 - loss 0.35758258 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,312 epoch 8 - iter 7/13 - loss 0.30727407 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,356 epoch 8 - iter 8/13 - loss 0.49892110 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,403 epoch 8 - iter 9/13 - loss 0.59503864 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,445 epoch 8 - iter 10/13 - loss 0.53591887 - samples/sec: 24.22 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,490 epoch 8 - iter 11/13 - loss 0.49017691 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,535 epoch 8 - iter 12/13 - loss 0.46753421 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,580 epoch 8 - iter 13/13 - loss 0.43448445 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,581 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:46,581 EPOCH 8 done: loss 0.4345 - lr 0.0200000\n",
-      "2021-09-08 14:51:46,615 DEV : loss 0.01459588948637247 - score 0.0\n",
-      "2021-09-08 14:51:46,616 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:51:46,617 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:46,681 epoch 9 - iter 1/13 - loss 0.84523875 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,722 epoch 9 - iter 2/13 - loss 0.42535938 - samples/sec: 24.24 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,765 epoch 9 - iter 3/13 - loss 0.28459017 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,806 epoch 9 - iter 4/13 - loss 0.21575772 - samples/sec: 24.16 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,848 epoch 9 - iter 5/13 - loss 0.17314823 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,894 epoch 9 - iter 6/13 - loss 0.40260919 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,939 epoch 9 - iter 7/13 - loss 0.38893687 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:51:46,981 epoch 9 - iter 8/13 - loss 0.34053424 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,025 epoch 9 - iter 9/13 - loss 0.53062481 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,068 epoch 9 - iter 10/13 - loss 0.47841767 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,110 epoch 9 - iter 11/13 - loss 0.43501726 - samples/sec: 24.04 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,156 epoch 9 - iter 12/13 - loss 0.42536538 - samples/sec: 21.64 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,201 epoch 9 - iter 13/13 - loss 0.46735125 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,202 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:47,202 EPOCH 9 done: loss 0.4674 - lr 0.0200000\n",
-      "2021-09-08 14:51:47,237 DEV : loss 0.046725306659936905 - score 0.0\n",
-      "2021-09-08 14:51:47,237 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:51:47,239 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:47,302 epoch 10 - iter 1/13 - loss 0.73311019 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,344 epoch 10 - iter 2/13 - loss 0.37215093 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,389 epoch 10 - iter 3/13 - loss 0.52240670 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,434 epoch 10 - iter 4/13 - loss 0.41175219 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,480 epoch 10 - iter 5/13 - loss 0.53806185 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,521 epoch 10 - iter 6/13 - loss 0.45294614 - samples/sec: 24.25 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,563 epoch 10 - iter 7/13 - loss 0.39337228 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,608 epoch 10 - iter 8/13 - loss 0.35667955 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,651 epoch 10 - iter 9/13 - loss 0.31813734 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,696 epoch 10 - iter 10/13 - loss 0.33123457 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,738 epoch 10 - iter 11/13 - loss 0.30164605 - samples/sec: 24.00 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,780 epoch 10 - iter 12/13 - loss 0.27673847 - samples/sec: 24.07 - lr: 0.020000\n"
+      "2021-09-21 20:35:48,767 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:48,838 epoch 8 - iter 1/13 - loss 0.10089874 - samples/sec: 20.86 - lr: 0.020000\n",
+      "2021-09-21 20:35:48,882 epoch 8 - iter 2/13 - loss 0.05110999 - samples/sec: 22.95 - lr: 0.020000\n",
+      "2021-09-21 20:35:48,929 epoch 8 - iter 3/13 - loss 0.04531190 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 20:35:48,972 epoch 8 - iter 4/13 - loss 0.03598307 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,016 epoch 8 - iter 5/13 - loss 0.02916071 - samples/sec: 23.06 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,059 epoch 8 - iter 6/13 - loss 0.02451832 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,105 epoch 8 - iter 7/13 - loss 0.36318570 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,152 epoch 8 - iter 8/13 - loss 0.54292802 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,199 epoch 8 - iter 9/13 - loss 0.60089699 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,242 epoch 8 - iter 10/13 - loss 0.54090984 - samples/sec: 23.51 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,289 epoch 8 - iter 11/13 - loss 0.63071367 - samples/sec: 21.49 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,332 epoch 8 - iter 12/13 - loss 0.58187451 - samples/sec: 23.46 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,379 epoch 8 - iter 13/13 - loss 0.55341767 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,380 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:49,380 EPOCH 8 done: loss 0.5534 - lr 0.0200000\n",
+      "2021-09-21 20:35:49,514 DEV : loss 0.23674564063549042 - score 0.0\n",
+      "2021-09-21 20:35:49,515 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:35:49,517 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:49,587 epoch 9 - iter 1/13 - loss 0.00356773 - samples/sec: 20.02 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,654 epoch 9 - iter 2/13 - loss 0.01575197 - samples/sec: 14.89 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,711 epoch 9 - iter 3/13 - loss 0.01091546 - samples/sec: 17.89 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,768 epoch 9 - iter 4/13 - loss 0.01113824 - samples/sec: 17.50 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,824 epoch 9 - iter 5/13 - loss 0.32742723 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,902 epoch 9 - iter 6/13 - loss 0.28393076 - samples/sec: 13.01 - lr: 0.020000\n",
+      "2021-09-21 20:35:49,953 epoch 9 - iter 7/13 - loss 0.24400317 - samples/sec: 19.51 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,000 epoch 9 - iter 8/13 - loss 0.21644541 - samples/sec: 21.45 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,060 epoch 9 - iter 9/13 - loss 0.29322867 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,131 epoch 9 - iter 10/13 - loss 0.26433805 - samples/sec: 14.01 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,201 epoch 9 - iter 11/13 - loss 0.24042167 - samples/sec: 14.53 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,250 epoch 9 - iter 12/13 - loss 0.35020971 - samples/sec: 20.44 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,305 epoch 9 - iter 13/13 - loss 0.32665665 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,306 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:50,307 EPOCH 9 done: loss 0.3267 - lr 0.0200000\n",
+      "2021-09-21 20:35:50,344 DEV : loss 0.5266174077987671 - score 0.0\n",
+      "2021-09-21 20:35:50,345 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:35:50,347 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:50,410 epoch 10 - iter 1/13 - loss 0.00159677 - samples/sec: 22.69 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,454 epoch 10 - iter 2/13 - loss 0.01310278 - samples/sec: 22.76 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,499 epoch 10 - iter 3/13 - loss 0.00939012 - samples/sec: 22.67 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,547 epoch 10 - iter 4/13 - loss 0.35515068 - samples/sec: 21.15 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,593 epoch 10 - iter 5/13 - loss 0.55023759 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,640 epoch 10 - iter 6/13 - loss 0.49389681 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,684 epoch 10 - iter 7/13 - loss 0.42350956 - samples/sec: 22.77 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,728 epoch 10 - iter 8/13 - loss 0.37243346 - samples/sec: 22.93 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,773 epoch 10 - iter 9/13 - loss 0.33383020 - samples/sec: 22.51 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,818 epoch 10 - iter 10/13 - loss 0.30086101 - samples/sec: 22.71 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,863 epoch 10 - iter 11/13 - loss 0.27366945 - samples/sec: 22.59 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:51:47,825 epoch 10 - iter 13/13 - loss 0.35258112 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:51:47,826 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:47,826 EPOCH 10 done: loss 0.3526 - lr 0.0200000\n",
-      "2021-09-08 14:51:47,861 DEV : loss 0.16057270765304565 - score 0.0\n",
-      "2021-09-08 14:51:47,862 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:51:55,646 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:51:55,647 Testing using best model ...\n",
-      "2021-09-08 14:51:55,648 loading file None1/best-model.pt\n",
+      "2021-09-21 20:35:50,907 epoch 10 - iter 12/13 - loss 0.25134699 - samples/sec: 22.79 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,952 epoch 10 - iter 13/13 - loss 0.23244068 - samples/sec: 22.62 - lr: 0.020000\n",
+      "2021-09-21 20:35:50,953 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:50,953 EPOCH 10 done: loss 0.2324 - lr 0.0200000\n",
+      "2021-09-21 20:35:51,106 DEV : loss 0.6022650003433228 - score 0.0\n",
+      "2021-09-21 20:35:51,107 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:35:57,286 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:35:57,287 Testing using best model ...\n",
+      "2021-09-21 20:35:57,289 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:52:00,302 \t0.5\n",
-      "2021-09-08 14:52:00,303 \n",
+      "2021-09-21 20:36:04,608 \t0.0\n",
+      "2021-09-21 20:36:04,609 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.0667\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
@@ -7401,44 +7429,44 @@
       "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
       "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
       "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         0\n",
+      "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
       " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
-      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         0\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
+      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
       "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
       "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
       "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
       "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
       "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
       "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
-      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         1\n",
-      "                                                                                           a particular branch of scientific knowledge     1.0000    1.0000    1.0000         1\n",
+      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         1\n",
+      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                                                                             micro avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                             macro avg     0.0667    0.0667    0.0667         2\n",
-      "                                                                                                                          weighted avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                           samples avg     0.5000    0.5000    0.5000         2\n",
+      "                                                                                                                             micro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                             macro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                          weighted avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                           samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 14:52:00,303 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:11,838 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:36:04,609 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:18,079 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:52:15,661 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:36:22,506 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 49190.43it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 50452.73it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:52:15,663 [b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending', b'the social event at which the ceremony of marriage is performed', b'an active diversion requiring physical exertion and competition']\n",
-      "2021-09-08 14:52:15,673 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:15,675 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:36:22,508 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'the latest and most admired style in clothes and cosmetics and behavior', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 20:36:22,648 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:22,650 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -7751,28 +7779,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:52:15,675 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:15,676 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 14:52:15,676 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:15,676 Parameters:\n",
-      "2021-09-08 14:52:15,677  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:52:15,677  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:52:15,677  - patience: \"3\"\n",
-      "2021-09-08 14:52:15,677  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:52:15,678  - max_epochs: \"10\"\n",
-      "2021-09-08 14:52:15,678  - shuffle: \"True\"\n",
-      "2021-09-08 14:52:15,678  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:52:15,678  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:52:15,679 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:15,679 Model training base path: \"None1\"\n",
-      "2021-09-08 14:52:15,679 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:15,680 Device: cuda:0\n",
-      "2021-09-08 14:52:15,680 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:15,680 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:52:15,688 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:15,748 epoch 1 - iter 1/13 - loss 0.79293376 - samples/sec: 26.31 - lr: 0.020000\n",
-      "2021-09-08 14:52:15,793 epoch 1 - iter 2/13 - loss 0.86773112 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:52:15,839 epoch 1 - iter 3/13 - loss 0.82350487 - samples/sec: 21.74 - lr: 0.020000\n"
+      "2021-09-21 20:36:22,650 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:22,650 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:36:22,651 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:22,651 Parameters:\n",
+      "2021-09-21 20:36:22,651  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:36:22,652  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:36:22,652  - patience: \"3\"\n",
+      "2021-09-21 20:36:22,652  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:36:22,653  - max_epochs: \"10\"\n",
+      "2021-09-21 20:36:22,653  - shuffle: \"True\"\n",
+      "2021-09-21 20:36:22,653  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:36:22,653  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:36:22,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:22,654 Model training base path: \"None1\"\n",
+      "2021-09-21 20:36:22,654 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:22,654 Device: cuda:0\n",
+      "2021-09-21 20:36:22,655 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:22,655 Embeddings storage mode: cpu\n"
      ]
     },
     {
@@ -7786,253 +7810,259 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:52:15,884 epoch 1 - iter 4/13 - loss 0.88316344 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:52:15,929 epoch 1 - iter 5/13 - loss 0.91229764 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:52:15,975 epoch 1 - iter 6/13 - loss 0.86541045 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,020 epoch 1 - iter 7/13 - loss 0.85263000 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,065 epoch 1 - iter 8/13 - loss 0.79380516 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,110 epoch 1 - iter 9/13 - loss 0.72135844 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,155 epoch 1 - iter 10/13 - loss 0.68026100 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,201 epoch 1 - iter 11/13 - loss 0.68438710 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,246 epoch 1 - iter 12/13 - loss 0.75903902 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,291 epoch 1 - iter 13/13 - loss 0.78240685 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:16,292 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:16,292 EPOCH 1 done: loss 0.7824 - lr 0.0200000\n",
-      "2021-09-08 14:52:16,326 DEV : loss 0.5413246154785156 - score 0.0\n",
-      "2021-09-08 14:52:16,326 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:36:22,837 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:22,900 epoch 1 - iter 1/13 - loss 0.44200209 - samples/sec: 24.86 - lr: 0.020000\n",
+      "2021-09-21 20:36:22,949 epoch 1 - iter 2/13 - loss 0.48207299 - samples/sec: 20.56 - lr: 0.020000\n",
+      "2021-09-21 20:36:22,999 epoch 1 - iter 3/13 - loss 0.60468406 - samples/sec: 20.32 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,048 epoch 1 - iter 4/13 - loss 0.58671535 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,097 epoch 1 - iter 5/13 - loss 0.58327758 - samples/sec: 20.54 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,146 epoch 1 - iter 6/13 - loss 0.60315830 - samples/sec: 20.69 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,195 epoch 1 - iter 7/13 - loss 0.59579651 - samples/sec: 20.45 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,243 epoch 1 - iter 8/13 - loss 0.70622629 - samples/sec: 20.94 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,292 epoch 1 - iter 9/13 - loss 0.70054920 - samples/sec: 20.59 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,340 epoch 1 - iter 10/13 - loss 0.69457819 - samples/sec: 21.41 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,387 epoch 1 - iter 11/13 - loss 0.67029015 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,433 epoch 1 - iter 12/13 - loss 0.62712163 - samples/sec: 21.54 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,480 epoch 1 - iter 13/13 - loss 0.64383671 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:36:23,481 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:23,482 EPOCH 1 done: loss 0.6438 - lr 0.0200000\n",
+      "2021-09-21 20:36:23,606 DEV : loss 0.2178044319152832 - score 0.0\n",
+      "2021-09-21 20:36:23,607 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:52:20,689 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:20,757 epoch 2 - iter 1/13 - loss 0.40566185 - samples/sec: 21.66 - lr: 0.020000\n",
-      "2021-09-08 14:52:20,803 epoch 2 - iter 2/13 - loss 0.60549550 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:52:20,848 epoch 2 - iter 3/13 - loss 0.55347545 - samples/sec: 22.10 - lr: 0.020000\n",
-      "2021-09-08 14:52:20,893 epoch 2 - iter 4/13 - loss 0.52069473 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:52:20,939 epoch 2 - iter 5/13 - loss 0.50623056 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 14:52:20,984 epoch 2 - iter 6/13 - loss 0.62211875 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,029 epoch 2 - iter 7/13 - loss 0.65695623 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,074 epoch 2 - iter 8/13 - loss 0.61992294 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,119 epoch 2 - iter 9/13 - loss 0.58908593 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,165 epoch 2 - iter 10/13 - loss 0.58581134 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,209 epoch 2 - iter 11/13 - loss 0.57347162 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,255 epoch 2 - iter 12/13 - loss 0.60971604 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,296 epoch 2 - iter 13/13 - loss 0.56985479 - samples/sec: 24.20 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,297 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:21,298 EPOCH 2 done: loss 0.5699 - lr 0.0200000\n",
-      "2021-09-08 14:52:21,339 DEV : loss 0.5751858353614807 - score 0.0\n",
-      "2021-09-08 14:52:21,340 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:52:21,342 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:21,405 epoch 3 - iter 1/13 - loss 0.73444366 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,447 epoch 3 - iter 2/13 - loss 0.40619574 - samples/sec: 24.11 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,493 epoch 3 - iter 3/13 - loss 0.43067854 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,538 epoch 3 - iter 4/13 - loss 0.48499033 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,583 epoch 3 - iter 5/13 - loss 0.63123579 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,628 epoch 3 - iter 6/13 - loss 0.65011796 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,674 epoch 3 - iter 7/13 - loss 0.62223040 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,719 epoch 3 - iter 8/13 - loss 0.65757082 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,764 epoch 3 - iter 9/13 - loss 0.64928172 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,810 epoch 3 - iter 10/13 - loss 0.63981911 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,855 epoch 3 - iter 11/13 - loss 0.59208780 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,900 epoch 3 - iter 12/13 - loss 0.57779940 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,945 epoch 3 - iter 13/13 - loss 0.53854111 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:52:21,946 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:21,947 EPOCH 3 done: loss 0.5385 - lr 0.0200000\n",
-      "2021-09-08 14:52:21,981 DEV : loss 0.5317959189414978 - score 0.0\n",
-      "2021-09-08 14:52:21,981 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:36:31,351 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:31,424 epoch 2 - iter 1/13 - loss 0.76613575 - samples/sec: 19.13 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,487 epoch 2 - iter 2/13 - loss 0.61140402 - samples/sec: 15.84 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,558 epoch 2 - iter 3/13 - loss 0.62473729 - samples/sec: 14.20 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,618 epoch 2 - iter 4/13 - loss 0.69379135 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,676 epoch 2 - iter 5/13 - loss 0.68105088 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,736 epoch 2 - iter 6/13 - loss 0.65805250 - samples/sec: 16.69 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,801 epoch 2 - iter 7/13 - loss 0.65025058 - samples/sec: 15.39 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,882 epoch 2 - iter 8/13 - loss 0.66444620 - samples/sec: 12.45 - lr: 0.020000\n",
+      "2021-09-21 20:36:31,952 epoch 2 - iter 9/13 - loss 0.64626311 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 20:36:32,002 epoch 2 - iter 10/13 - loss 0.63519815 - samples/sec: 20.16 - lr: 0.020000\n",
+      "2021-09-21 20:36:32,063 epoch 2 - iter 11/13 - loss 0.62004325 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 20:36:32,120 epoch 2 - iter 12/13 - loss 0.57904595 - samples/sec: 17.68 - lr: 0.020000\n",
+      "2021-09-21 20:36:32,168 epoch 2 - iter 13/13 - loss 0.56558121 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 20:36:32,169 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:32,169 EPOCH 2 done: loss 0.5656 - lr 0.0200000\n",
+      "2021-09-21 20:36:32,206 DEV : loss 0.17074202001094818 - score 0.0\n",
+      "2021-09-21 20:36:32,207 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:52:26,524 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:26,590 epoch 4 - iter 1/13 - loss 0.09260405 - samples/sec: 23.19 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,636 epoch 4 - iter 2/13 - loss 1.06307835 - samples/sec: 22.07 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,682 epoch 4 - iter 3/13 - loss 0.96311989 - samples/sec: 21.87 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,727 epoch 4 - iter 4/13 - loss 0.80857081 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,772 epoch 4 - iter 5/13 - loss 0.77177513 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,818 epoch 4 - iter 6/13 - loss 0.69048854 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,863 epoch 4 - iter 7/13 - loss 0.65649753 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,908 epoch 4 - iter 8/13 - loss 0.68804211 - samples/sec: 22.16 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,950 epoch 4 - iter 9/13 - loss 0.61540757 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 14:52:26,996 epoch 4 - iter 10/13 - loss 0.56005618 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,041 epoch 4 - iter 11/13 - loss 0.52753255 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,086 epoch 4 - iter 12/13 - loss 0.55763399 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,131 epoch 4 - iter 13/13 - loss 0.54160138 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,132 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:27,132 EPOCH 4 done: loss 0.5416 - lr 0.0200000\n",
-      "2021-09-08 14:52:27,167 DEV : loss 0.5944918394088745 - score 0.0\n",
-      "2021-09-08 14:52:27,168 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:52:27,170 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:27,234 epoch 5 - iter 1/13 - loss 0.41369012 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,279 epoch 5 - iter 2/13 - loss 0.52592988 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,324 epoch 5 - iter 3/13 - loss 0.59920087 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,369 epoch 5 - iter 4/13 - loss 0.50666313 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,411 epoch 5 - iter 5/13 - loss 0.41207434 - samples/sec: 24.09 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,457 epoch 5 - iter 6/13 - loss 0.37913920 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,502 epoch 5 - iter 7/13 - loss 0.46952858 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,547 epoch 5 - iter 8/13 - loss 0.49110268 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,592 epoch 5 - iter 9/13 - loss 0.54505498 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,638 epoch 5 - iter 10/13 - loss 0.52503430 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,683 epoch 5 - iter 11/13 - loss 0.49886076 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,725 epoch 5 - iter 12/13 - loss 0.45998166 - samples/sec: 24.26 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,767 epoch 5 - iter 13/13 - loss 0.42500219 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,768 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:27,768 EPOCH 5 done: loss 0.4250 - lr 0.0200000\n",
-      "2021-09-08 14:52:27,802 DEV : loss 0.6080916523933411 - score 0.0\n",
-      "2021-09-08 14:52:27,802 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:52:27,807 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:36:39,000 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:39,077 epoch 3 - iter 1/13 - loss 0.57922119 - samples/sec: 18.45 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,128 epoch 3 - iter 2/13 - loss 0.54043445 - samples/sec: 19.56 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,180 epoch 3 - iter 3/13 - loss 0.57407302 - samples/sec: 19.79 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,231 epoch 3 - iter 4/13 - loss 0.47912131 - samples/sec: 19.69 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,277 epoch 3 - iter 5/13 - loss 0.39797925 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,326 epoch 3 - iter 6/13 - loss 0.36227066 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,373 epoch 3 - iter 7/13 - loss 0.34382551 - samples/sec: 21.47 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,420 epoch 3 - iter 8/13 - loss 0.36567978 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,465 epoch 3 - iter 9/13 - loss 0.33152140 - samples/sec: 22.58 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,512 epoch 3 - iter 10/13 - loss 0.39593569 - samples/sec: 21.27 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,560 epoch 3 - iter 11/13 - loss 0.38471066 - samples/sec: 21.10 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,608 epoch 3 - iter 12/13 - loss 0.43947791 - samples/sec: 21.08 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,656 epoch 3 - iter 13/13 - loss 0.49349075 - samples/sec: 21.20 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,657 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:39,657 EPOCH 3 done: loss 0.4935 - lr 0.0200000\n",
+      "2021-09-21 20:36:39,780 DEV : loss 0.2487316131591797 - score 0.0\n",
+      "2021-09-21 20:36:39,781 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:36:39,791 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:39,858 epoch 4 - iter 1/13 - loss 0.13102745 - samples/sec: 21.21 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,904 epoch 4 - iter 2/13 - loss 0.17561554 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,951 epoch 4 - iter 3/13 - loss 0.14402984 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:36:39,997 epoch 4 - iter 4/13 - loss 0.12938357 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,041 epoch 4 - iter 5/13 - loss 0.10773633 - samples/sec: 23.29 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,087 epoch 4 - iter 6/13 - loss 0.09799020 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,130 epoch 4 - iter 7/13 - loss 0.08450413 - samples/sec: 23.26 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,177 epoch 4 - iter 8/13 - loss 0.23517085 - samples/sec: 21.75 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,226 epoch 4 - iter 9/13 - loss 0.33128033 - samples/sec: 20.56 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,272 epoch 4 - iter 10/13 - loss 0.42789780 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,319 epoch 4 - iter 11/13 - loss 0.45468672 - samples/sec: 21.68 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,362 epoch 4 - iter 12/13 - loss 0.42124104 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,408 epoch 4 - iter 13/13 - loss 0.39412140 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,409 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:40,410 EPOCH 4 done: loss 0.3941 - lr 0.0200000\n",
+      "2021-09-21 20:36:40,446 DEV : loss 0.1759294867515564 - score 0.0\n",
+      "2021-09-21 20:36:40,447 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:36:40,449 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:40,514 epoch 5 - iter 1/13 - loss 0.51818067 - samples/sec: 21.53 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,561 epoch 5 - iter 2/13 - loss 0.29441161 - samples/sec: 21.53 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,608 epoch 5 - iter 3/13 - loss 0.64718851 - samples/sec: 21.44 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,656 epoch 5 - iter 4/13 - loss 0.80535472 - samples/sec: 21.33 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,703 epoch 5 - iter 5/13 - loss 0.70601473 - samples/sec: 21.39 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,747 epoch 5 - iter 6/13 - loss 0.59231830 - samples/sec: 23.16 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,790 epoch 5 - iter 7/13 - loss 0.50801885 - samples/sec: 23.45 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,833 epoch 5 - iter 8/13 - loss 0.44472672 - samples/sec: 23.21 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,880 epoch 5 - iter 9/13 - loss 0.52307495 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,926 epoch 5 - iter 10/13 - loss 0.48142759 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:36:40,972 epoch 5 - iter 11/13 - loss 0.48073244 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 20:36:41,021 epoch 5 - iter 12/13 - loss 0.50293929 - samples/sec: 20.55 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:52:27,870 epoch 6 - iter 1/13 - loss 0.01514258 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,915 epoch 6 - iter 2/13 - loss 0.22019733 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 14:52:27,960 epoch 6 - iter 3/13 - loss 0.28842345 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,006 epoch 6 - iter 4/13 - loss 0.33908611 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,051 epoch 6 - iter 5/13 - loss 0.29144706 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,093 epoch 6 - iter 6/13 - loss 0.24996139 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,138 epoch 6 - iter 7/13 - loss 0.27474123 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,183 epoch 6 - iter 8/13 - loss 0.32737704 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,228 epoch 6 - iter 9/13 - loss 0.46008873 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,270 epoch 6 - iter 10/13 - loss 0.41533761 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,315 epoch 6 - iter 11/13 - loss 0.39870869 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,360 epoch 6 - iter 12/13 - loss 0.43005707 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,405 epoch 6 - iter 13/13 - loss 0.41272337 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:52:28,406 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:28,407 EPOCH 6 done: loss 0.4127 - lr 0.0200000\n",
-      "2021-09-08 14:52:28,440 DEV : loss 0.34091371297836304 - score 0.0\n",
-      "2021-09-08 14:52:28,441 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:36:41,068 epoch 5 - iter 13/13 - loss 0.55307153 - samples/sec: 21.61 - lr: 0.020000\n",
+      "2021-09-21 20:36:41,069 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:41,069 EPOCH 5 done: loss 0.5531 - lr 0.0200000\n",
+      "2021-09-21 20:36:41,105 DEV : loss 0.12288515269756317 - score 0.0\n",
+      "2021-09-21 20:36:41,105 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:52:36,871 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:36,937 epoch 7 - iter 1/13 - loss 0.00321944 - samples/sec: 23.23 - lr: 0.020000\n",
-      "2021-09-08 14:52:36,983 epoch 7 - iter 2/13 - loss 0.60852692 - samples/sec: 22.15 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,025 epoch 7 - iter 3/13 - loss 0.40665524 - samples/sec: 23.94 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,067 epoch 7 - iter 4/13 - loss 0.31486027 - samples/sec: 24.06 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,112 epoch 7 - iter 5/13 - loss 0.39747434 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,157 epoch 7 - iter 6/13 - loss 0.35235384 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,202 epoch 7 - iter 7/13 - loss 0.36942454 - samples/sec: 22.21 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,244 epoch 7 - iter 8/13 - loss 0.33011700 - samples/sec: 24.04 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,286 epoch 7 - iter 9/13 - loss 0.29444843 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,331 epoch 7 - iter 10/13 - loss 0.26750500 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,374 epoch 7 - iter 11/13 - loss 0.24323389 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,419 epoch 7 - iter 12/13 - loss 0.26928203 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,464 epoch 7 - iter 13/13 - loss 0.26630515 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 14:52:37,465 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:37,465 EPOCH 7 done: loss 0.2663 - lr 0.0200000\n",
-      "2021-09-08 14:52:37,513 DEV : loss 0.11833281069993973 - score 0.0\n",
-      "2021-09-08 14:52:37,513 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:36:45,095 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:45,192 epoch 6 - iter 1/13 - loss 0.51836747 - samples/sec: 13.50 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,239 epoch 6 - iter 2/13 - loss 0.27725749 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,296 epoch 6 - iter 3/13 - loss 0.31085785 - samples/sec: 17.81 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,361 epoch 6 - iter 4/13 - loss 0.27976276 - samples/sec: 15.43 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,435 epoch 6 - iter 5/13 - loss 0.43685726 - samples/sec: 13.59 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,497 epoch 6 - iter 6/13 - loss 0.38070451 - samples/sec: 16.14 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,561 epoch 6 - iter 7/13 - loss 0.34721035 - samples/sec: 15.89 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,630 epoch 6 - iter 8/13 - loss 0.31107504 - samples/sec: 14.45 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,688 epoch 6 - iter 9/13 - loss 0.27677244 - samples/sec: 17.37 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,741 epoch 6 - iter 10/13 - loss 0.25626384 - samples/sec: 19.31 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,806 epoch 6 - iter 11/13 - loss 0.23764608 - samples/sec: 15.46 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,865 epoch 6 - iter 12/13 - loss 0.26962751 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,931 epoch 6 - iter 13/13 - loss 0.30483385 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 20:36:45,932 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:45,932 EPOCH 6 done: loss 0.3048 - lr 0.0200000\n",
+      "2021-09-21 20:36:46,019 DEV : loss 0.28980010747909546 - score 0.0\n",
+      "2021-09-21 20:36:46,019 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:36:46,021 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:46,117 epoch 7 - iter 1/13 - loss 1.62239027 - samples/sec: 13.31 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,178 epoch 7 - iter 2/13 - loss 0.82456762 - samples/sec: 16.44 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,235 epoch 7 - iter 3/13 - loss 0.57398949 - samples/sec: 17.60 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,295 epoch 7 - iter 4/13 - loss 0.65532148 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,367 epoch 7 - iter 5/13 - loss 0.52915817 - samples/sec: 13.87 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,442 epoch 7 - iter 6/13 - loss 0.44609605 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,505 epoch 7 - iter 7/13 - loss 0.44772726 - samples/sec: 15.87 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,568 epoch 7 - iter 8/13 - loss 0.39222939 - samples/sec: 16.02 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,624 epoch 7 - iter 9/13 - loss 0.39665736 - samples/sec: 17.91 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,692 epoch 7 - iter 10/13 - loss 0.36129596 - samples/sec: 14.79 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,747 epoch 7 - iter 11/13 - loss 0.33393203 - samples/sec: 18.43 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,821 epoch 7 - iter 12/13 - loss 0.30877615 - samples/sec: 13.71 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,867 epoch 7 - iter 13/13 - loss 0.28534296 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 20:36:46,868 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:46,869 EPOCH 7 done: loss 0.2853 - lr 0.0200000\n",
+      "2021-09-21 20:36:46,908 DEV : loss 0.09709487855434418 - score 0.0\n",
+      "2021-09-21 20:36:46,910 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:52:42,176 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:42,245 epoch 8 - iter 1/13 - loss 0.17523050 - samples/sec: 21.30 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,292 epoch 8 - iter 2/13 - loss 0.16736787 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,336 epoch 8 - iter 3/13 - loss 0.11919856 - samples/sec: 23.00 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,378 epoch 8 - iter 4/13 - loss 0.10246128 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,424 epoch 8 - iter 5/13 - loss 0.11321215 - samples/sec: 22.05 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,470 epoch 8 - iter 6/13 - loss 0.35718141 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,512 epoch 8 - iter 7/13 - loss 0.30623440 - samples/sec: 23.79 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,555 epoch 8 - iter 8/13 - loss 0.26950034 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,597 epoch 8 - iter 9/13 - loss 0.23986815 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,642 epoch 8 - iter 10/13 - loss 0.26949572 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,687 epoch 8 - iter 11/13 - loss 0.32294538 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,733 epoch 8 - iter 12/13 - loss 0.30813164 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,778 epoch 8 - iter 13/13 - loss 0.36476909 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,779 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:42,779 EPOCH 8 done: loss 0.3648 - lr 0.0200000\n",
-      "2021-09-08 14:52:42,814 DEV : loss 0.25547370314598083 - score 0.0\n",
-      "2021-09-08 14:52:42,815 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:52:42,819 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:42,879 epoch 9 - iter 1/13 - loss 0.09095243 - samples/sec: 23.81 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,922 epoch 9 - iter 2/13 - loss 0.04651975 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:52:42,964 epoch 9 - iter 3/13 - loss 0.03276890 - samples/sec: 23.67 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,007 epoch 9 - iter 4/13 - loss 0.02525715 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,053 epoch 9 - iter 5/13 - loss 0.16990423 - samples/sec: 21.77 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,099 epoch 9 - iter 6/13 - loss 0.16742556 - samples/sec: 22.04 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,145 epoch 9 - iter 7/13 - loss 0.15359566 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,187 epoch 9 - iter 8/13 - loss 0.13660701 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,233 epoch 9 - iter 9/13 - loss 0.14609671 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,278 epoch 9 - iter 10/13 - loss 0.20039416 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,324 epoch 9 - iter 11/13 - loss 0.21132771 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,366 epoch 9 - iter 12/13 - loss 0.19383160 - samples/sec: 23.61 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,409 epoch 9 - iter 13/13 - loss 0.17921789 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,410 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:43,410 EPOCH 9 done: loss 0.1792 - lr 0.0200000\n",
-      "2021-09-08 14:52:43,444 DEV : loss 0.4475448429584503 - score 0.0\n",
-      "2021-09-08 14:52:43,444 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:52:43,446 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:43,509 epoch 10 - iter 1/13 - loss 0.00693399 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,551 epoch 10 - iter 2/13 - loss 0.01395763 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,596 epoch 10 - iter 3/13 - loss 0.58556522 - samples/sec: 22.41 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,642 epoch 10 - iter 4/13 - loss 0.44377842 - samples/sec: 22.20 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,684 epoch 10 - iter 5/13 - loss 0.35521348 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,729 epoch 10 - iter 6/13 - loss 0.30066526 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,771 epoch 10 - iter 7/13 - loss 0.26103147 - samples/sec: 24.20 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,813 epoch 10 - iter 8/13 - loss 0.23059256 - samples/sec: 23.90 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,858 epoch 10 - iter 9/13 - loss 0.25460887 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,900 epoch 10 - iter 10/13 - loss 0.23595185 - samples/sec: 24.11 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,942 epoch 10 - iter 11/13 - loss 0.21459977 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 14:52:43,987 epoch 10 - iter 12/13 - loss 0.27073917 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 14:52:44,030 epoch 10 - iter 13/13 - loss 0.24994086 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 14:52:44,031 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:36:51,078 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:51,160 epoch 8 - iter 1/13 - loss 0.00641187 - samples/sec: 17.54 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,234 epoch 8 - iter 2/13 - loss 0.34992565 - samples/sec: 13.60 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,308 epoch 8 - iter 3/13 - loss 0.23487626 - samples/sec: 13.64 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,360 epoch 8 - iter 4/13 - loss 0.49859517 - samples/sec: 19.25 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,434 epoch 8 - iter 5/13 - loss 0.50039056 - samples/sec: 13.52 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,493 epoch 8 - iter 6/13 - loss 0.41752872 - samples/sec: 17.19 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,551 epoch 8 - iter 7/13 - loss 0.35883919 - samples/sec: 17.30 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,618 epoch 8 - iter 8/13 - loss 0.36449321 - samples/sec: 14.96 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,678 epoch 8 - iter 9/13 - loss 0.32546541 - samples/sec: 16.80 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,729 epoch 8 - iter 10/13 - loss 0.29327747 - samples/sec: 20.05 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,788 epoch 8 - iter 11/13 - loss 0.26735091 - samples/sec: 16.89 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,864 epoch 8 - iter 12/13 - loss 0.25854000 - samples/sec: 13.23 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,924 epoch 8 - iter 13/13 - loss 0.31745828 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 20:36:51,925 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:51,925 EPOCH 8 done: loss 0.3175 - lr 0.0200000\n",
+      "2021-09-21 20:36:51,969 DEV : loss 0.03611720725893974 - score 0.0\n",
+      "2021-09-21 20:36:51,971 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:36:56,135 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:56,227 epoch 9 - iter 1/13 - loss 0.04384327 - samples/sec: 15.01 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,302 epoch 9 - iter 2/13 - loss 0.97918600 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,364 epoch 9 - iter 3/13 - loss 0.91168741 - samples/sec: 16.27 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,416 epoch 9 - iter 4/13 - loss 0.68485743 - samples/sec: 19.36 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,494 epoch 9 - iter 5/13 - loss 0.67581433 - samples/sec: 12.93 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,547 epoch 9 - iter 6/13 - loss 0.56675586 - samples/sec: 18.76 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,617 epoch 9 - iter 7/13 - loss 0.57626512 - samples/sec: 14.37 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,678 epoch 9 - iter 8/13 - loss 0.50667790 - samples/sec: 16.59 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,739 epoch 9 - iter 9/13 - loss 0.45071232 - samples/sec: 16.54 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,813 epoch 9 - iter 10/13 - loss 0.41257004 - samples/sec: 13.70 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,873 epoch 9 - iter 11/13 - loss 0.37987218 - samples/sec: 16.71 - lr: 0.020000\n",
+      "2021-09-21 20:36:56,932 epoch 9 - iter 12/13 - loss 0.34906379 - samples/sec: 17.15 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,001 epoch 9 - iter 13/13 - loss 0.32229533 - samples/sec: 14.42 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,002 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:57,003 EPOCH 9 done: loss 0.3223 - lr 0.0200000\n",
+      "2021-09-21 20:36:57,054 DEV : loss 0.13412076234817505 - score 0.0\n",
+      "2021-09-21 20:36:57,056 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:36:57,059 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:57,179 epoch 10 - iter 1/13 - loss 0.00099583 - samples/sec: 17.76 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,243 epoch 10 - iter 2/13 - loss 0.00196924 - samples/sec: 15.73 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,289 epoch 10 - iter 3/13 - loss 0.81102079 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,332 epoch 10 - iter 4/13 - loss 0.61142414 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,379 epoch 10 - iter 5/13 - loss 0.49710050 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,432 epoch 10 - iter 6/13 - loss 0.41759927 - samples/sec: 18.93 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,506 epoch 10 - iter 7/13 - loss 0.35905006 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,577 epoch 10 - iter 8/13 - loss 0.36987951 - samples/sec: 14.16 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:52:44,031 EPOCH 10 done: loss 0.2499 - lr 0.0200000\n",
-      "2021-09-08 14:52:44,064 DEV : loss 0.8708053827285767 - score 0.0\n",
-      "2021-09-08 14:52:44,065 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:52:48,242 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:52:48,243 Testing using best model ...\n",
-      "2021-09-08 14:52:48,244 loading file None1/best-model.pt\n",
+      "2021-09-21 20:36:57,634 epoch 10 - iter 9/13 - loss 0.32995784 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,689 epoch 10 - iter 10/13 - loss 0.29710065 - samples/sec: 18.35 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,756 epoch 10 - iter 11/13 - loss 0.27569132 - samples/sec: 15.05 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,801 epoch 10 - iter 12/13 - loss 0.25288799 - samples/sec: 22.75 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,858 epoch 10 - iter 13/13 - loss 0.26320306 - samples/sec: 17.42 - lr: 0.020000\n",
+      "2021-09-21 20:36:57,859 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:36:57,860 EPOCH 10 done: loss 0.2632 - lr 0.0200000\n",
+      "2021-09-21 20:36:57,979 DEV : loss 0.026817908510565758 - score 0.0\n",
+      "2021-09-21 20:36:57,980 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:37:07,246 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:07,247 Testing using best model ...\n",
+      "2021-09-21 20:37:07,249 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:52:52,816 \t0.5\n",
-      "2021-09-08 14:52:52,817 \n",
+      "2021-09-21 20:37:14,310 \t0.0\n",
+      "2021-09-21 20:37:14,310 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.0667\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
+      "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
       "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
       "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         0\n",
       "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
       " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
       "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
+      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         0\n",
       "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
       "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
       "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
-      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
       "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
+      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
       "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
-      "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         1\n",
-      "                                                                       an active diversion requiring physical exertion and competition     1.0000    1.0000    1.0000         1\n",
+      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         1\n",
+      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                                                                             micro avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                             macro avg     0.0667    0.0667    0.0667         2\n",
-      "                                                                                                                          weighted avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                           samples avg     0.5000    0.5000    0.5000         2\n",
+      "                                                                                                                             micro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                             macro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                          weighted avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                           samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 14:52:52,817 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:04,388 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:37:14,311 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:29,640 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:53:08,256 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:37:34,038 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 49344.75it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 44306.03it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:08,258 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)']\n",
-      "2021-09-08 14:53:08,267 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,269 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:37:34,040 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending', b'an adult female person (as opposed to a man)', b'the social event at which the ceremony of marriage is performed']\n",
+      "2021-09-21 20:37:34,144 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,146 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8345,28 +8375,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:08,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,270 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 14:53:08,270 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,270 Parameters:\n",
-      "2021-09-08 14:53:08,271  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:53:08,271  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:53:08,271  - patience: \"3\"\n",
-      "2021-09-08 14:53:08,271  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:53:08,272  - max_epochs: \"10\"\n",
-      "2021-09-08 14:53:08,272  - shuffle: \"True\"\n",
-      "2021-09-08 14:53:08,272  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:53:08,273  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:53:08,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,273 Model training base path: \"None1\"\n",
-      "2021-09-08 14:53:08,273 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,274 Device: cuda:0\n",
-      "2021-09-08 14:53:08,274 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,274 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:53:08,280 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,340 epoch 1 - iter 1/13 - loss 0.48224959 - samples/sec: 26.49 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,385 epoch 1 - iter 2/13 - loss 0.54167809 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,430 epoch 1 - iter 3/13 - loss 0.49526099 - samples/sec: 22.26 - lr: 0.020000\n"
+      "2021-09-21 20:37:34,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,147 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:37:34,147 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,148 Parameters:\n",
+      "2021-09-21 20:37:34,148  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:37:34,148  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:37:34,149  - patience: \"3\"\n",
+      "2021-09-21 20:37:34,149  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:37:34,149  - max_epochs: \"10\"\n",
+      "2021-09-21 20:37:34,149  - shuffle: \"True\"\n",
+      "2021-09-21 20:37:34,150  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:37:34,150  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:37:34,150 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,150 Model training base path: \"None1\"\n",
+      "2021-09-21 20:37:34,151 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,151 Device: cuda:0\n",
+      "2021-09-21 20:37:34,151 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,152 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:37:34,170 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,232 epoch 1 - iter 1/13 - loss 1.10600078 - samples/sec: 24.73 - lr: 0.020000\n"
      ]
     },
     {
@@ -8380,253 +8408,256 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:08,475 epoch 1 - iter 4/13 - loss 0.49695553 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,520 epoch 1 - iter 5/13 - loss 0.66688708 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,567 epoch 1 - iter 6/13 - loss 0.72201287 - samples/sec: 21.51 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,614 epoch 1 - iter 7/13 - loss 0.68963736 - samples/sec: 22.08 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,658 epoch 1 - iter 8/13 - loss 0.63489107 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,704 epoch 1 - iter 9/13 - loss 0.58352545 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,749 epoch 1 - iter 10/13 - loss 0.60332399 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,791 epoch 1 - iter 11/13 - loss 0.55023160 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,835 epoch 1 - iter 12/13 - loss 0.66820453 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,881 epoch 1 - iter 13/13 - loss 0.65966032 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:53:08,882 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:08,882 EPOCH 1 done: loss 0.6597 - lr 0.0200000\n",
-      "2021-09-08 14:53:08,920 DEV : loss 0.7917630076408386 - score 0.0\n",
-      "2021-09-08 14:53:08,920 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:37:34,281 epoch 1 - iter 2/13 - loss 0.97323468 - samples/sec: 20.78 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,329 epoch 1 - iter 3/13 - loss 0.91656250 - samples/sec: 21.12 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,377 epoch 1 - iter 4/13 - loss 0.79322096 - samples/sec: 20.98 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,424 epoch 1 - iter 5/13 - loss 0.73323524 - samples/sec: 21.46 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,470 epoch 1 - iter 6/13 - loss 0.89644984 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,517 epoch 1 - iter 7/13 - loss 0.90610270 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,563 epoch 1 - iter 8/13 - loss 0.81503324 - samples/sec: 21.83 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,610 epoch 1 - iter 9/13 - loss 0.86674590 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,660 epoch 1 - iter 10/13 - loss 0.81176376 - samples/sec: 20.22 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,707 epoch 1 - iter 11/13 - loss 0.81151245 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,754 epoch 1 - iter 12/13 - loss 0.83383482 - samples/sec: 21.50 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,800 epoch 1 - iter 13/13 - loss 0.84547552 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 20:37:34,801 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:34,801 EPOCH 1 done: loss 0.8455 - lr 0.0200000\n",
+      "2021-09-21 20:37:34,837 DEV : loss 0.572102427482605 - score 0.0\n",
+      "2021-09-21 20:37:34,838 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:53:13,244 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:13,314 epoch 2 - iter 1/13 - loss 1.45715129 - samples/sec: 21.60 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,359 epoch 2 - iter 2/13 - loss 0.96723199 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,404 epoch 2 - iter 3/13 - loss 0.85597529 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,450 epoch 2 - iter 4/13 - loss 0.90657316 - samples/sec: 22.27 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,495 epoch 2 - iter 5/13 - loss 0.89074072 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,540 epoch 2 - iter 6/13 - loss 0.82909470 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,585 epoch 2 - iter 7/13 - loss 0.80974960 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,630 epoch 2 - iter 8/13 - loss 0.75822492 - samples/sec: 22.29 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,677 epoch 2 - iter 9/13 - loss 0.74410361 - samples/sec: 21.63 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,722 epoch 2 - iter 10/13 - loss 0.67746467 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,767 epoch 2 - iter 11/13 - loss 0.66914446 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,812 epoch 2 - iter 12/13 - loss 0.63705340 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,857 epoch 2 - iter 13/13 - loss 0.65348384 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:53:13,858 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:13,859 EPOCH 2 done: loss 0.6535 - lr 0.0200000\n",
-      "2021-09-08 14:53:13,894 DEV : loss 0.4364928901195526 - score 0.0\n",
-      "2021-09-08 14:53:13,895 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:37:39,054 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:39,142 epoch 2 - iter 1/13 - loss 0.54297251 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,189 epoch 2 - iter 2/13 - loss 0.39254730 - samples/sec: 21.29 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,236 epoch 2 - iter 3/13 - loss 0.41320434 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,283 epoch 2 - iter 4/13 - loss 0.43122910 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,357 epoch 2 - iter 5/13 - loss 0.46156980 - samples/sec: 13.58 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,408 epoch 2 - iter 6/13 - loss 0.49776790 - samples/sec: 19.66 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,475 epoch 2 - iter 7/13 - loss 0.54759419 - samples/sec: 15.03 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,533 epoch 2 - iter 8/13 - loss 0.50656699 - samples/sec: 17.53 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,594 epoch 2 - iter 9/13 - loss 0.58296246 - samples/sec: 16.33 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,654 epoch 2 - iter 10/13 - loss 0.62605146 - samples/sec: 16.77 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,714 epoch 2 - iter 11/13 - loss 0.65767660 - samples/sec: 16.96 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,764 epoch 2 - iter 12/13 - loss 0.65566746 - samples/sec: 20.13 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,819 epoch 2 - iter 13/13 - loss 0.62629518 - samples/sec: 18.15 - lr: 0.020000\n",
+      "2021-09-21 20:37:39,820 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:39,821 EPOCH 2 done: loss 0.6263 - lr 0.0200000\n",
+      "2021-09-21 20:37:39,923 DEV : loss 0.3156842887401581 - score 0.0\n",
+      "2021-09-21 20:37:39,924 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:53:18,948 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:19,014 epoch 3 - iter 1/13 - loss 0.13159271 - samples/sec: 23.21 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,056 epoch 3 - iter 2/13 - loss 0.06847428 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,101 epoch 3 - iter 3/13 - loss 0.17739093 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,146 epoch 3 - iter 4/13 - loss 0.42225295 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,192 epoch 3 - iter 5/13 - loss 0.44433340 - samples/sec: 21.92 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,238 epoch 3 - iter 6/13 - loss 0.45747883 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,283 epoch 3 - iter 7/13 - loss 0.45432957 - samples/sec: 22.14 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,328 epoch 3 - iter 8/13 - loss 0.45414960 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,374 epoch 3 - iter 9/13 - loss 0.45972798 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,418 epoch 3 - iter 10/13 - loss 0.48731843 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,464 epoch 3 - iter 11/13 - loss 0.47238721 - samples/sec: 22.03 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,512 epoch 3 - iter 12/13 - loss 0.52243346 - samples/sec: 21.29 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,557 epoch 3 - iter 13/13 - loss 0.50493333 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,558 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:19,558 EPOCH 3 done: loss 0.5049 - lr 0.0200000\n",
-      "2021-09-08 14:53:19,596 DEV : loss 0.4823736250400543 - score 0.0\n",
-      "2021-09-08 14:53:19,597 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:53:19,602 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:19,666 epoch 4 - iter 1/13 - loss 0.25062630 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,711 epoch 4 - iter 2/13 - loss 0.90114515 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,756 epoch 4 - iter 3/13 - loss 0.66236833 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,801 epoch 4 - iter 4/13 - loss 0.53351643 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,845 epoch 4 - iter 5/13 - loss 0.43659626 - samples/sec: 23.16 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,886 epoch 4 - iter 6/13 - loss 0.36821724 - samples/sec: 24.26 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,931 epoch 4 - iter 7/13 - loss 0.34022396 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:53:19,976 epoch 4 - iter 8/13 - loss 0.33705435 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,021 epoch 4 - iter 9/13 - loss 0.30826717 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,066 epoch 4 - iter 10/13 - loss 0.32226493 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,108 epoch 4 - iter 11/13 - loss 0.29552771 - samples/sec: 24.14 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,153 epoch 4 - iter 12/13 - loss 0.30649671 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,195 epoch 4 - iter 13/13 - loss 0.28484357 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,196 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:20,196 EPOCH 4 done: loss 0.2848 - lr 0.0200000\n",
-      "2021-09-08 14:53:20,231 DEV : loss 1.1622792482376099 - score 0.0\n",
-      "2021-09-08 14:53:20,232 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:53:20,234 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:20,294 epoch 5 - iter 1/13 - loss 0.02546570 - samples/sec: 24.09 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,339 epoch 5 - iter 2/13 - loss 0.56751840 - samples/sec: 22.18 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,384 epoch 5 - iter 3/13 - loss 0.41374775 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,427 epoch 5 - iter 4/13 - loss 0.31705185 - samples/sec: 23.85 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,471 epoch 5 - iter 5/13 - loss 0.26265594 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,518 epoch 5 - iter 6/13 - loss 0.22869317 - samples/sec: 21.56 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,563 epoch 5 - iter 7/13 - loss 0.51857366 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,605 epoch 5 - iter 8/13 - loss 0.45625259 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,647 epoch 5 - iter 9/13 - loss 0.40970060 - samples/sec: 24.22 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,689 epoch 5 - iter 10/13 - loss 0.36880912 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,733 epoch 5 - iter 11/13 - loss 0.47651818 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,775 epoch 5 - iter 12/13 - loss 0.43713532 - samples/sec: 24.08 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,820 epoch 5 - iter 13/13 - loss 0.45735868 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:53:20,821 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:20,821 EPOCH 5 done: loss 0.4574 - lr 0.0200000\n",
-      "2021-09-08 14:53:20,856 DEV : loss 0.10734532028436661 - score 0.0\n",
-      "2021-09-08 14:53:20,857 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n"
+      "2021-09-21 20:37:44,102 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:44,192 epoch 3 - iter 1/13 - loss 0.34288034 - samples/sec: 17.57 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,244 epoch 3 - iter 2/13 - loss 0.40069619 - samples/sec: 19.43 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,310 epoch 3 - iter 3/13 - loss 0.55839588 - samples/sec: 15.28 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,370 epoch 3 - iter 4/13 - loss 0.56199235 - samples/sec: 16.76 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,430 epoch 3 - iter 5/13 - loss 0.58364758 - samples/sec: 16.67 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,507 epoch 3 - iter 6/13 - loss 0.58874863 - samples/sec: 13.13 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,557 epoch 3 - iter 7/13 - loss 0.54968974 - samples/sec: 20.28 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,620 epoch 3 - iter 8/13 - loss 0.48900762 - samples/sec: 15.97 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,675 epoch 3 - iter 9/13 - loss 0.45163767 - samples/sec: 18.23 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,738 epoch 3 - iter 10/13 - loss 0.48426334 - samples/sec: 15.98 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,802 epoch 3 - iter 11/13 - loss 0.50551542 - samples/sec: 15.64 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,853 epoch 3 - iter 12/13 - loss 0.47366777 - samples/sec: 19.81 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,900 epoch 3 - iter 13/13 - loss 0.50557400 - samples/sec: 21.53 - lr: 0.020000\n",
+      "2021-09-21 20:37:44,901 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:44,901 EPOCH 3 done: loss 0.5056 - lr 0.0200000\n",
+      "2021-09-21 20:37:44,936 DEV : loss 0.15898916125297546 - score 0.0\n",
+      "2021-09-21 20:37:44,937 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:37:49,861 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:49,952 epoch 4 - iter 1/13 - loss 0.59311461 - samples/sec: 14.86 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,011 epoch 4 - iter 2/13 - loss 0.33008679 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,078 epoch 4 - iter 3/13 - loss 0.31162623 - samples/sec: 15.15 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,145 epoch 4 - iter 4/13 - loss 0.41980351 - samples/sec: 14.97 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,212 epoch 4 - iter 5/13 - loss 0.42552491 - samples/sec: 15.06 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,261 epoch 4 - iter 6/13 - loss 0.37888724 - samples/sec: 20.67 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,325 epoch 4 - iter 7/13 - loss 0.42765537 - samples/sec: 15.70 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,385 epoch 4 - iter 8/13 - loss 0.55714470 - samples/sec: 16.74 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,441 epoch 4 - iter 9/13 - loss 0.50260599 - samples/sec: 17.85 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,516 epoch 4 - iter 10/13 - loss 0.54920090 - samples/sec: 13.37 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,577 epoch 4 - iter 11/13 - loss 0.55128328 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,634 epoch 4 - iter 12/13 - loss 0.56899335 - samples/sec: 17.45 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,695 epoch 4 - iter 13/13 - loss 0.54623591 - samples/sec: 16.68 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,696 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:50,696 EPOCH 4 done: loss 0.5462 - lr 0.0200000\n",
+      "2021-09-21 20:37:50,730 DEV : loss 0.2750459611415863 - score 0.0\n",
+      "2021-09-21 20:37:50,731 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:37:50,733 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:50,917 epoch 5 - iter 1/13 - loss 0.00970834 - samples/sec: 17.27 - lr: 0.020000\n",
+      "2021-09-21 20:37:50,986 epoch 5 - iter 2/13 - loss 0.00879416 - samples/sec: 14.40 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,055 epoch 5 - iter 3/13 - loss 0.05523146 - samples/sec: 14.62 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,114 epoch 5 - iter 4/13 - loss 0.05823042 - samples/sec: 17.11 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,181 epoch 5 - iter 5/13 - loss 0.06671074 - samples/sec: 15.07 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,244 epoch 5 - iter 6/13 - loss 0.09395010 - samples/sec: 15.85 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,298 epoch 5 - iter 7/13 - loss 0.09868613 - samples/sec: 18.83 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,354 epoch 5 - iter 8/13 - loss 0.18647117 - samples/sec: 17.88 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,426 epoch 5 - iter 9/13 - loss 0.17529998 - samples/sec: 13.95 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,483 epoch 5 - iter 10/13 - loss 0.28273185 - samples/sec: 17.82 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,560 epoch 5 - iter 11/13 - loss 0.47446373 - samples/sec: 13.08 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,616 epoch 5 - iter 12/13 - loss 0.48306420 - samples/sec: 18.00 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,682 epoch 5 - iter 13/13 - loss 0.53195595 - samples/sec: 15.12 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,683 ----------------------------------------------------------------------------------------------------\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:24,997 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:25,062 epoch 6 - iter 1/13 - loss 0.00758275 - samples/sec: 23.09 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,110 epoch 6 - iter 2/13 - loss 0.03712184 - samples/sec: 21.31 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,152 epoch 6 - iter 3/13 - loss 0.02608931 - samples/sec: 23.88 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,194 epoch 6 - iter 4/13 - loss 0.02112326 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,236 epoch 6 - iter 5/13 - loss 0.01780014 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,278 epoch 6 - iter 6/13 - loss 0.01577206 - samples/sec: 23.96 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,320 epoch 6 - iter 7/13 - loss 0.01535169 - samples/sec: 24.18 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,363 epoch 6 - iter 8/13 - loss 0.01451885 - samples/sec: 23.73 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,405 epoch 6 - iter 9/13 - loss 0.01481345 - samples/sec: 24.09 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,450 epoch 6 - iter 10/13 - loss 0.01644092 - samples/sec: 22.11 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,496 epoch 6 - iter 11/13 - loss 0.04336607 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,541 epoch 6 - iter 12/13 - loss 0.04279492 - samples/sec: 22.13 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,584 epoch 6 - iter 13/13 - loss 0.03984487 - samples/sec: 23.87 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,584 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:25,585 EPOCH 6 done: loss 0.0398 - lr 0.0200000\n",
-      "2021-09-08 14:53:25,623 DEV : loss 1.031790852546692 - score 0.0\n",
-      "2021-09-08 14:53:25,623 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:53:25,626 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:25,687 epoch 7 - iter 1/13 - loss 0.00566118 - samples/sec: 23.80 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,732 epoch 7 - iter 2/13 - loss 0.46511623 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,775 epoch 7 - iter 3/13 - loss 0.31223266 - samples/sec: 23.46 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,817 epoch 7 - iter 4/13 - loss 0.23445483 - samples/sec: 24.21 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,862 epoch 7 - iter 5/13 - loss 0.35266844 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,904 epoch 7 - iter 6/13 - loss 0.29712933 - samples/sec: 24.25 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,949 epoch 7 - iter 7/13 - loss 0.26999534 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:53:25,993 epoch 7 - iter 8/13 - loss 0.44665260 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,039 epoch 7 - iter 9/13 - loss 0.40116592 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,084 epoch 7 - iter 10/13 - loss 0.54506636 - samples/sec: 22.53 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,126 epoch 7 - iter 11/13 - loss 0.49562626 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,171 epoch 7 - iter 12/13 - loss 0.50573420 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,213 epoch 7 - iter 13/13 - loss 0.46697987 - samples/sec: 24.10 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,214 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:26,214 EPOCH 7 done: loss 0.4670 - lr 0.0200000\n",
-      "2021-09-08 14:53:26,251 DEV : loss 0.24887342751026154 - score 0.0\n",
-      "2021-09-08 14:53:26,251 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:53:26,254 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:26,314 epoch 8 - iter 1/13 - loss 0.00124529 - samples/sec: 24.28 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,359 epoch 8 - iter 2/13 - loss 0.05714719 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,401 epoch 8 - iter 3/13 - loss 0.03853854 - samples/sec: 24.12 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,448 epoch 8 - iter 4/13 - loss 0.13570041 - samples/sec: 21.41 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,490 epoch 8 - iter 5/13 - loss 0.10862340 - samples/sec: 24.14 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,535 epoch 8 - iter 6/13 - loss 0.25938872 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,577 epoch 8 - iter 7/13 - loss 0.22592501 - samples/sec: 24.30 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,619 epoch 8 - iter 8/13 - loss 0.20284271 - samples/sec: 24.07 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,663 epoch 8 - iter 9/13 - loss 0.23526308 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,708 epoch 8 - iter 10/13 - loss 0.30729144 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,753 epoch 8 - iter 11/13 - loss 0.28383078 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,795 epoch 8 - iter 12/13 - loss 0.26051016 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,840 epoch 8 - iter 13/13 - loss 0.24512078 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,841 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:26,841 EPOCH 8 done: loss 0.2451 - lr 0.0200000\n",
-      "2021-09-08 14:53:26,877 DEV : loss 0.10928066819906235 - score 0.0\n",
-      "2021-09-08 14:53:26,877 BAD EPOCHS (no improvement): 3\n",
-      "2021-09-08 14:53:26,880 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:26,943 epoch 9 - iter 1/13 - loss 1.14945066 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 14:53:26,988 epoch 9 - iter 2/13 - loss 0.58957362 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,029 epoch 9 - iter 3/13 - loss 0.39402985 - samples/sec: 24.30 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,072 epoch 9 - iter 4/13 - loss 0.29599139 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,114 epoch 9 - iter 5/13 - loss 0.23697158 - samples/sec: 24.08 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,157 epoch 9 - iter 6/13 - loss 0.19814797 - samples/sec: 23.08 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,202 epoch 9 - iter 7/13 - loss 0.18433862 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,248 epoch 9 - iter 8/13 - loss 0.28318358 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,293 epoch 9 - iter 9/13 - loss 0.25683154 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,338 epoch 9 - iter 10/13 - loss 0.23375361 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,380 epoch 9 - iter 11/13 - loss 0.21286504 - samples/sec: 24.10 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,422 epoch 9 - iter 12/13 - loss 0.19536034 - samples/sec: 24.11 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,464 epoch 9 - iter 13/13 - loss 0.18039938 - samples/sec: 24.05 - lr: 0.020000\n",
-      "2021-09-08 14:53:27,464 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:27,465 EPOCH 9 done: loss 0.1804 - lr 0.0200000\n",
-      "2021-09-08 14:53:27,500 DEV : loss 0.2060055285692215 - score 0.0\n",
-      "Epoch     9: reducing learning rate of group 0 to 1.0000e-02.\n",
-      "2021-09-08 14:53:27,501 BAD EPOCHS (no improvement): 4\n",
-      "2021-09-08 14:53:27,503 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:27,563 epoch 10 - iter 1/13 - loss 0.00043575 - samples/sec: 24.18 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,608 epoch 10 - iter 2/13 - loss 0.48832582 - samples/sec: 22.43 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,653 epoch 10 - iter 3/13 - loss 0.43752867 - samples/sec: 22.31 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,695 epoch 10 - iter 4/13 - loss 0.33037514 - samples/sec: 24.10 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,736 epoch 10 - iter 5/13 - loss 0.26486479 - samples/sec: 24.27 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,780 epoch 10 - iter 6/13 - loss 0.22199795 - samples/sec: 23.02 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,825 epoch 10 - iter 7/13 - loss 0.42656353 - samples/sec: 22.35 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,867 epoch 10 - iter 8/13 - loss 0.37369857 - samples/sec: 24.23 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,909 epoch 10 - iter 9/13 - loss 0.33234746 - samples/sec: 24.02 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,951 epoch 10 - iter 10/13 - loss 0.29917635 - samples/sec: 24.04 - lr: 0.010000\n",
-      "2021-09-08 14:53:27,996 epoch 10 - iter 11/13 - loss 0.29229875 - samples/sec: 22.38 - lr: 0.010000\n",
-      "2021-09-08 14:53:28,038 epoch 10 - iter 12/13 - loss 0.26928345 - samples/sec: 24.01 - lr: 0.010000\n"
+      "2021-09-21 20:37:51,684 EPOCH 5 done: loss 0.5320 - lr 0.0200000\n",
+      "2021-09-21 20:37:51,767 DEV : loss 0.18467573821544647 - score 0.0\n",
+      "2021-09-21 20:37:51,769 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:37:51,771 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:51,886 epoch 6 - iter 1/13 - loss 0.33988011 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 20:37:51,945 epoch 6 - iter 2/13 - loss 0.19793716 - samples/sec: 17.01 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,020 epoch 6 - iter 3/13 - loss 0.15694629 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,089 epoch 6 - iter 4/13 - loss 0.24210461 - samples/sec: 14.55 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,136 epoch 6 - iter 5/13 - loss 0.19457868 - samples/sec: 21.57 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,196 epoch 6 - iter 6/13 - loss 0.26994075 - samples/sec: 16.95 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,242 epoch 6 - iter 7/13 - loss 0.25692031 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,288 epoch 6 - iter 8/13 - loss 0.24675937 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,332 epoch 6 - iter 9/13 - loss 0.22554365 - samples/sec: 23.18 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,403 epoch 6 - iter 10/13 - loss 0.20692323 - samples/sec: 14.19 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,477 epoch 6 - iter 11/13 - loss 0.23521699 - samples/sec: 13.53 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,534 epoch 6 - iter 12/13 - loss 0.27081483 - samples/sec: 17.66 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,587 epoch 6 - iter 13/13 - loss 0.25015281 - samples/sec: 19.09 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,588 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:52,588 EPOCH 6 done: loss 0.2502 - lr 0.0200000\n",
+      "2021-09-21 20:37:52,635 DEV : loss 0.22864992916584015 - score 0.0\n",
+      "2021-09-21 20:37:52,637 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:37:52,640 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:52,715 epoch 7 - iter 1/13 - loss 0.00107964 - samples/sec: 18.19 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,764 epoch 7 - iter 2/13 - loss 0.00247154 - samples/sec: 20.99 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,834 epoch 7 - iter 3/13 - loss 0.00937677 - samples/sec: 14.28 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,909 epoch 7 - iter 4/13 - loss 0.08445541 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 20:37:52,968 epoch 7 - iter 5/13 - loss 0.07752812 - samples/sec: 17.00 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,024 epoch 7 - iter 6/13 - loss 0.19871825 - samples/sec: 18.08 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,084 epoch 7 - iter 7/13 - loss 0.17168225 - samples/sec: 16.56 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,146 epoch 7 - iter 8/13 - loss 0.15046926 - samples/sec: 16.40 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,211 epoch 7 - iter 9/13 - loss 0.15416453 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,272 epoch 7 - iter 10/13 - loss 0.13891340 - samples/sec: 16.47 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,348 epoch 7 - iter 11/13 - loss 0.14347192 - samples/sec: 13.27 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,404 epoch 7 - iter 12/13 - loss 0.13245715 - samples/sec: 17.92 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,480 epoch 7 - iter 13/13 - loss 0.12345359 - samples/sec: 13.30 - lr: 0.020000\n",
+      "2021-09-21 20:37:53,480 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:53,481 EPOCH 7 done: loss 0.1235 - lr 0.0200000\n",
+      "2021-09-21 20:37:53,521 DEV : loss 0.14559251070022583 - score 0.0\n",
+      "2021-09-21 20:37:53,522 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:37:57,384 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:57,454 epoch 8 - iter 1/13 - loss 0.22035587 - samples/sec: 20.95 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,498 epoch 8 - iter 2/13 - loss 0.11082085 - samples/sec: 23.12 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,542 epoch 8 - iter 3/13 - loss 0.07428225 - samples/sec: 23.34 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,585 epoch 8 - iter 4/13 - loss 0.05621321 - samples/sec: 23.47 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,628 epoch 8 - iter 5/13 - loss 0.04511090 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,674 epoch 8 - iter 6/13 - loss 0.10652342 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,722 epoch 8 - iter 7/13 - loss 0.11244665 - samples/sec: 21.34 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,764 epoch 8 - iter 8/13 - loss 0.09900678 - samples/sec: 23.57 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,811 epoch 8 - iter 9/13 - loss 0.10027230 - samples/sec: 21.64 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,854 epoch 8 - iter 10/13 - loss 0.09046470 - samples/sec: 23.61 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,900 epoch 8 - iter 11/13 - loss 0.13334217 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,943 epoch 8 - iter 12/13 - loss 0.12226104 - samples/sec: 23.50 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,987 epoch 8 - iter 13/13 - loss 0.11294138 - samples/sec: 23.26 - lr: 0.020000\n",
+      "2021-09-21 20:37:57,988 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:37:57,988 EPOCH 8 done: loss 0.1129 - lr 0.0200000\n",
+      "2021-09-21 20:37:58,025 DEV : loss 0.141498401761055 - score 0.0\n",
+      "2021-09-21 20:37:58,026 BAD EPOCHS (no improvement): 0\n",
+      "saving best model\n",
+      "2021-09-21 20:38:01,973 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:02,039 epoch 9 - iter 1/13 - loss 0.00180872 - samples/sec: 21.75 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,089 epoch 9 - iter 2/13 - loss 0.42534868 - samples/sec: 20.21 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,138 epoch 9 - iter 3/13 - loss 0.34592498 - samples/sec: 20.71 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,184 epoch 9 - iter 4/13 - loss 0.26076186 - samples/sec: 22.14 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,233 epoch 9 - iter 5/13 - loss 0.22161314 - samples/sec: 20.62 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,278 epoch 9 - iter 6/13 - loss 0.18475972 - samples/sec: 22.76 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,323 epoch 9 - iter 7/13 - loss 0.15874953 - samples/sec: 22.42 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,367 epoch 9 - iter 8/13 - loss 0.13898635 - samples/sec: 22.95 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,411 epoch 9 - iter 9/13 - loss 0.12364945 - samples/sec: 22.50 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,455 epoch 9 - iter 10/13 - loss 0.11135061 - samples/sec: 22.96 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,500 epoch 9 - iter 11/13 - loss 0.10128544 - samples/sec: 22.49 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,544 epoch 9 - iter 12/13 - loss 0.09363286 - samples/sec: 23.02 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,591 epoch 9 - iter 13/13 - loss 0.08656931 - samples/sec: 21.36 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,593 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:02,593 EPOCH 9 done: loss 0.0866 - lr 0.0200000\n",
+      "2021-09-21 20:38:02,724 DEV : loss 0.519903302192688 - score 0.0\n",
+      "2021-09-21 20:38:02,725 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:38:02,796 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:02,864 epoch 10 - iter 1/13 - loss 0.92978573 - samples/sec: 20.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,910 epoch 10 - iter 2/13 - loss 0.47401925 - samples/sec: 21.52 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,955 epoch 10 - iter 3/13 - loss 0.31691867 - samples/sec: 23.02 - lr: 0.020000\n",
+      "2021-09-21 20:38:02,998 epoch 10 - iter 4/13 - loss 0.23782764 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,041 epoch 10 - iter 5/13 - loss 0.19093954 - samples/sec: 23.09 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,088 epoch 10 - iter 6/13 - loss 0.16420886 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,132 epoch 10 - iter 7/13 - loss 0.14129932 - samples/sec: 22.84 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,176 epoch 10 - iter 8/13 - loss 0.12416329 - samples/sec: 23.27 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,219 epoch 10 - iter 9/13 - loss 0.11041970 - samples/sec: 23.10 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,264 epoch 10 - iter 10/13 - loss 0.09943042 - samples/sec: 22.65 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:28,083 epoch 10 - iter 13/13 - loss 0.25068216 - samples/sec: 22.36 - lr: 0.010000\n",
-      "2021-09-08 14:53:28,084 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:28,085 EPOCH 10 done: loss 0.2507 - lr 0.0100000\n",
-      "2021-09-08 14:53:28,121 DEV : loss 0.5990442037582397 - score 0.0\n",
-      "2021-09-08 14:53:28,122 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:53:32,153 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:32,154 Testing using best model ...\n",
-      "2021-09-08 14:53:32,155 loading file None1/best-model.pt\n",
+      "2021-09-21 20:38:03,308 epoch 10 - iter 11/13 - loss 0.09044113 - samples/sec: 22.87 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,352 epoch 10 - iter 12/13 - loss 0.08297195 - samples/sec: 22.81 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,397 epoch 10 - iter 13/13 - loss 0.07674245 - samples/sec: 22.49 - lr: 0.020000\n",
+      "2021-09-21 20:38:03,398 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:03,399 EPOCH 10 done: loss 0.0767 - lr 0.0200000\n",
+      "2021-09-21 20:38:03,532 DEV : loss 0.26242807507514954 - score 0.0\n",
+      "2021-09-21 20:38:03,533 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:38:07,328 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:07,329 Testing using best model ...\n",
+      "2021-09-21 20:38:07,330 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:53:36,940 \t0.0\n",
-      "2021-09-08 14:53:36,941 \n",
+      "2021-09-21 20:38:12,147 \t0.5\n",
+      "2021-09-21 20:38:12,148 \n",
       "Results:\n",
-      "- F-score (micro) 0.0\n",
-      "- F-score (macro) 0.0\n",
-      "- Accuracy 0.0\n",
+      "- F-score (micro) 0.5\n",
+      "- F-score (macro) 0.0667\n",
+      "- Accuracy 0.5\n",
       "\n",
       "By class:\n",
-      "                                                                                                                                        precision    recall  f1-score   support\n",
+      "                                                                                                                                       precision    recall  f1-score   support\n",
       "\n",
-      "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
-      "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
-      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
-      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
-      "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
-      "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
-      "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
-      "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
-      "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
-      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
-      "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
-      "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         1\n",
-      "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         1\n",
+      "                                                                                                          undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
+      "                                                                         the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
+      "                                                                                       a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         0\n",
+      "a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
+      "                                                              the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
+      "                                                                           an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
+      "                                                                                          a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
+      "                                                                                                  the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
+      "                               an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
+      "                                                         a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
+      "                                                                      an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
+      "                                                                       social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
+      "                                                                                         light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
+      "                                                                                         an adult female person (as opposed to a man)     1.0000    1.0000    1.0000         1\n",
+      "                                                                      the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                                                                             micro avg     0.0000    0.0000    0.0000         2\n",
-      "                                                                                                                             macro avg     0.0000    0.0000    0.0000         2\n",
-      "                                                                                                                          weighted avg     0.0000    0.0000    0.0000         2\n",
-      "                                                                                                                           samples avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                            micro avg     0.5000    0.5000    0.5000         2\n",
+      "                                                                                                                            macro avg     0.0667    0.0667    0.0667         2\n",
+      "                                                                                                                         weighted avg     0.5000    0.5000    0.5000         2\n",
+      "                                                                                                                          samples avg     0.5000    0.5000    0.5000         2\n",
       "\n",
-      "2021-09-08 14:53:36,941 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:48,492 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
+      "2021-09-21 20:38:12,148 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:28,304 loading file /vol/fob-vol7/nebenf19/samahakk/masterarbeit_experiment2/topic_yin/model/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:53:52,367 Computing label dictionary. Progress:\n"
+      "2021-09-21 20:38:32,490 Computing label dictionary. Progress:\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 15/15 [00:00<00:00, 47771.12it/s]"
+      "100%|██████████| 15/15 [00:00<00:00, 29803.20it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:52,369 [b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'an active diversion requiring physical exertion and competition', b'social relations involving intrigue to gain authority or power', b'light and humorous drama with a happy ending', b'the legal dissolution of a marriage', b'a strong belief in a supernatural power or powers that control human destiny']\n",
-      "2021-09-08 14:53:52,377 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,379 Model: \"TARSClassifier(\n",
+      "2021-09-21 20:38:32,492 [b'undertake a journey or trip', b'the practical application of science to commerce or industry', b'a healthy state of wellbeing free from disease', b'an adult female person (as opposed to a man)', b'a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian', b'the social event at which the ceremony of marriage is performed', b'the latest and most admired style in clothes and cosmetics and behavior', b'an activity that is diverting and that holds the attention', b'a particular branch of scientific knowledge', b'the legal dissolution of a marriage', b'an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law', b'a strong belief in a supernatural power or powers that control human destiny', b'social relations involving intrigue to gain authority or power', b'an active diversion requiring physical exertion and competition', b'light and humorous drama with a happy ending']\n",
+      "2021-09-21 20:38:32,502 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:32,504 Model: \"TARSClassifier(\n",
       "  (document_embeddings): None\n",
       "  (decoder): None\n",
       "  (loss_function): None\n",
@@ -8939,28 +8970,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:52,380 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,380 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
-      "2021-09-08 14:53:52,380 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,381 Parameters:\n",
-      "2021-09-08 14:53:52,381  - learning_rate: \"0.02\"\n",
-      "2021-09-08 14:53:52,381  - mini_batch_size: \"1\"\n",
-      "2021-09-08 14:53:52,381  - patience: \"3\"\n",
-      "2021-09-08 14:53:52,382  - anneal_factor: \"0.5\"\n",
-      "2021-09-08 14:53:52,382  - max_epochs: \"10\"\n",
-      "2021-09-08 14:53:52,382  - shuffle: \"True\"\n",
-      "2021-09-08 14:53:52,382  - train_with_dev: \"False\"\n",
-      "2021-09-08 14:53:52,383  - batch_growth_annealing: \"False\"\n",
-      "2021-09-08 14:53:52,383 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,383 Model training base path: \"None1\"\n",
-      "2021-09-08 14:53:52,384 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,384 Device: cuda:0\n",
-      "2021-09-08 14:53:52,384 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,384 Embeddings storage mode: cpu\n",
-      "2021-09-08 14:53:52,390 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,450 epoch 1 - iter 1/13 - loss 1.72483349 - samples/sec: 26.47 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,495 epoch 1 - iter 2/13 - loss 1.05685650 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,541 epoch 1 - iter 3/13 - loss 0.90574399 - samples/sec: 22.26 - lr: 0.020000\n"
+      "2021-09-21 20:38:32,505 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:32,506 Corpus: \"Corpus: 13 train + 1 dev + 2 test sentences\"\n",
+      "2021-09-21 20:38:32,506 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:32,506 Parameters:\n",
+      "2021-09-21 20:38:32,507  - learning_rate: \"0.02\"\n",
+      "2021-09-21 20:38:32,507  - mini_batch_size: \"1\"\n",
+      "2021-09-21 20:38:32,508  - patience: \"3\"\n",
+      "2021-09-21 20:38:32,508  - anneal_factor: \"0.5\"\n",
+      "2021-09-21 20:38:32,509  - max_epochs: \"10\"\n",
+      "2021-09-21 20:38:32,509  - shuffle: \"True\"\n",
+      "2021-09-21 20:38:32,509  - train_with_dev: \"False\"\n",
+      "2021-09-21 20:38:32,510  - batch_growth_annealing: \"False\"\n",
+      "2021-09-21 20:38:32,510 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:32,511 Model training base path: \"None1\"\n",
+      "2021-09-21 20:38:32,512 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:32,512 Device: cuda:0\n",
+      "2021-09-21 20:38:32,513 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:32,513 Embeddings storage mode: cpu\n",
+      "2021-09-21 20:38:32,520 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:32,608 epoch 1 - iter 1/13 - loss 0.61584568 - samples/sec: 17.03 - lr: 0.020000\n",
+      "2021-09-21 20:38:32,674 epoch 1 - iter 2/13 - loss 0.56347546 - samples/sec: 15.45 - lr: 0.020000\n"
      ]
     },
     {
@@ -8974,235 +9004,236 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:53:52,586 epoch 1 - iter 4/13 - loss 0.81133173 - samples/sec: 22.22 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,631 epoch 1 - iter 5/13 - loss 0.72117101 - samples/sec: 22.60 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,676 epoch 1 - iter 6/13 - loss 0.74832582 - samples/sec: 22.25 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,721 epoch 1 - iter 7/13 - loss 0.68403565 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,766 epoch 1 - iter 8/13 - loss 0.61361031 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,811 epoch 1 - iter 9/13 - loss 0.64098113 - samples/sec: 22.30 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,856 epoch 1 - iter 10/13 - loss 0.63526793 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,898 epoch 1 - iter 11/13 - loss 0.58007094 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,943 epoch 1 - iter 12/13 - loss 0.70058905 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,990 epoch 1 - iter 13/13 - loss 0.67744820 - samples/sec: 21.45 - lr: 0.020000\n",
-      "2021-09-08 14:53:52,991 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:52,991 EPOCH 1 done: loss 0.6774 - lr 0.0200000\n",
-      "2021-09-08 14:53:53,112 DEV : loss 0.3688484728336334 - score 0.0\n",
-      "2021-09-08 14:53:53,113 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:38:32,739 epoch 1 - iter 3/13 - loss 0.82306578 - samples/sec: 15.50 - lr: 0.020000\n",
+      "2021-09-21 20:38:32,787 epoch 1 - iter 4/13 - loss 0.73466551 - samples/sec: 20.87 - lr: 0.020000\n",
+      "2021-09-21 20:38:32,853 epoch 1 - iter 5/13 - loss 0.70314700 - samples/sec: 15.34 - lr: 0.020000\n",
+      "2021-09-21 20:38:32,913 epoch 1 - iter 6/13 - loss 0.68705837 - samples/sec: 16.62 - lr: 0.020000\n",
+      "2021-09-21 20:38:32,974 epoch 1 - iter 7/13 - loss 0.63054462 - samples/sec: 16.61 - lr: 0.020000\n",
+      "2021-09-21 20:38:33,048 epoch 1 - iter 8/13 - loss 0.59283948 - samples/sec: 13.48 - lr: 0.020000\n",
+      "2021-09-21 20:38:33,111 epoch 1 - iter 9/13 - loss 0.59574483 - samples/sec: 16.00 - lr: 0.020000\n",
+      "2021-09-21 20:38:33,164 epoch 1 - iter 10/13 - loss 0.58338779 - samples/sec: 19.22 - lr: 0.020000\n",
+      "2021-09-21 20:38:33,223 epoch 1 - iter 11/13 - loss 0.55841819 - samples/sec: 17.13 - lr: 0.020000\n",
+      "2021-09-21 20:38:33,270 epoch 1 - iter 12/13 - loss 0.65436511 - samples/sec: 21.26 - lr: 0.020000\n",
+      "2021-09-21 20:38:33,345 epoch 1 - iter 13/13 - loss 0.68042840 - samples/sec: 13.44 - lr: 0.020000\n",
+      "2021-09-21 20:38:33,346 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:33,346 EPOCH 1 done: loss 0.6804 - lr 0.0200000\n",
+      "2021-09-21 20:38:33,439 DEV : loss 0.3581869304180145 - score 0.0\n",
+      "2021-09-21 20:38:33,440 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:53:57,323 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:57,392 epoch 2 - iter 1/13 - loss 2.33037877 - samples/sec: 21.65 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,437 epoch 2 - iter 2/13 - loss 1.53721175 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,483 epoch 2 - iter 3/13 - loss 1.14972779 - samples/sec: 21.95 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,528 epoch 2 - iter 4/13 - loss 1.06721904 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,574 epoch 2 - iter 5/13 - loss 1.00173879 - samples/sec: 22.23 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,619 epoch 2 - iter 6/13 - loss 0.94381322 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,665 epoch 2 - iter 7/13 - loss 0.91035616 - samples/sec: 21.78 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,710 epoch 2 - iter 8/13 - loss 0.84954966 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,755 epoch 2 - iter 9/13 - loss 0.78844183 - samples/sec: 22.38 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,800 epoch 2 - iter 10/13 - loss 0.73873947 - samples/sec: 22.50 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,845 epoch 2 - iter 11/13 - loss 0.69917374 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,887 epoch 2 - iter 12/13 - loss 0.64231399 - samples/sec: 24.18 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,932 epoch 2 - iter 13/13 - loss 0.60260531 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:53:57,933 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:57,933 EPOCH 2 done: loss 0.6026 - lr 0.0200000\n",
-      "2021-09-08 14:53:57,969 DEV : loss 0.5990630984306335 - score 0.0\n",
-      "2021-09-08 14:53:57,970 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:53:57,974 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:58,034 epoch 3 - iter 1/13 - loss 0.02113693 - samples/sec: 24.02 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,076 epoch 3 - iter 2/13 - loss 0.01242149 - samples/sec: 24.14 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,121 epoch 3 - iter 3/13 - loss 0.04135480 - samples/sec: 22.35 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,166 epoch 3 - iter 4/13 - loss 0.31031062 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,208 epoch 3 - iter 5/13 - loss 0.25130845 - samples/sec: 24.04 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,253 epoch 3 - iter 6/13 - loss 0.27949363 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,298 epoch 3 - iter 7/13 - loss 0.28782327 - samples/sec: 22.51 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,341 epoch 3 - iter 8/13 - loss 0.25556663 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,386 epoch 3 - iter 9/13 - loss 0.29031761 - samples/sec: 22.57 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,431 epoch 3 - iter 10/13 - loss 0.39950663 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,472 epoch 3 - iter 11/13 - loss 0.36562472 - samples/sec: 24.20 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,517 epoch 3 - iter 12/13 - loss 0.36238855 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,562 epoch 3 - iter 13/13 - loss 0.37383143 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,563 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:58,563 EPOCH 3 done: loss 0.3738 - lr 0.0200000\n",
-      "2021-09-08 14:53:58,601 DEV : loss 0.8552677631378174 - score 0.0\n",
-      "2021-09-08 14:53:58,602 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:53:58,604 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:58,667 epoch 4 - iter 1/13 - loss 0.17880113 - samples/sec: 22.61 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,712 epoch 4 - iter 2/13 - loss 0.44469864 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,754 epoch 4 - iter 3/13 - loss 0.29844907 - samples/sec: 24.07 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,799 epoch 4 - iter 4/13 - loss 0.41863092 - samples/sec: 22.43 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,844 epoch 4 - iter 5/13 - loss 0.45020488 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,889 epoch 4 - iter 6/13 - loss 0.43866415 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,930 epoch 4 - iter 7/13 - loss 0.38869654 - samples/sec: 24.25 - lr: 0.020000\n",
-      "2021-09-08 14:53:58,977 epoch 4 - iter 8/13 - loss 0.34824631 - samples/sec: 21.53 - lr: 0.020000\n",
-      "2021-09-08 14:53:59,022 epoch 4 - iter 9/13 - loss 0.31745186 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:53:59,064 epoch 4 - iter 10/13 - loss 0.28836406 - samples/sec: 24.08 - lr: 0.020000\n",
-      "2021-09-08 14:53:59,110 epoch 4 - iter 11/13 - loss 0.34204924 - samples/sec: 22.17 - lr: 0.020000\n",
-      "2021-09-08 14:53:59,151 epoch 4 - iter 12/13 - loss 0.31453961 - samples/sec: 24.20 - lr: 0.020000\n",
-      "2021-09-08 14:53:59,197 epoch 4 - iter 13/13 - loss 0.42285998 - samples/sec: 22.34 - lr: 0.020000\n",
-      "2021-09-08 14:53:59,197 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:53:59,198 EPOCH 4 done: loss 0.4229 - lr 0.0200000\n",
-      "2021-09-08 14:53:59,235 DEV : loss 0.28628990054130554 - score 0.0\n",
-      "2021-09-08 14:53:59,235 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:38:37,384 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:37,452 epoch 2 - iter 1/13 - loss 0.37540925 - samples/sec: 21.06 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,499 epoch 2 - iter 2/13 - loss 0.27673322 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,545 epoch 2 - iter 3/13 - loss 0.30276391 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,591 epoch 2 - iter 4/13 - loss 0.27521762 - samples/sec: 21.97 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,638 epoch 2 - iter 5/13 - loss 0.24794233 - samples/sec: 21.54 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,684 epoch 2 - iter 6/13 - loss 0.29429435 - samples/sec: 21.75 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,731 epoch 2 - iter 7/13 - loss 0.34018929 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,777 epoch 2 - iter 8/13 - loss 0.43944468 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,823 epoch 2 - iter 9/13 - loss 0.65423700 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,870 epoch 2 - iter 10/13 - loss 0.68568792 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,916 epoch 2 - iter 11/13 - loss 0.67198670 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:38:37,959 epoch 2 - iter 12/13 - loss 0.62497948 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,006 epoch 2 - iter 13/13 - loss 0.62693917 - samples/sec: 21.45 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,007 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:38,007 EPOCH 2 done: loss 0.6269 - lr 0.0200000\n",
+      "2021-09-21 20:38:38,042 DEV : loss 0.48544204235076904 - score 0.0\n",
+      "2021-09-21 20:38:38,043 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:38:38,044 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:38,109 epoch 3 - iter 1/13 - loss 0.34373033 - samples/sec: 21.85 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,156 epoch 3 - iter 2/13 - loss 0.28790852 - samples/sec: 21.62 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,202 epoch 3 - iter 3/13 - loss 0.72387442 - samples/sec: 21.91 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,248 epoch 3 - iter 4/13 - loss 0.63380687 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,294 epoch 3 - iter 5/13 - loss 0.57845364 - samples/sec: 21.86 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,341 epoch 3 - iter 6/13 - loss 0.49176734 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,387 epoch 3 - iter 7/13 - loss 0.46817509 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,433 epoch 3 - iter 8/13 - loss 0.44476363 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,480 epoch 3 - iter 9/13 - loss 0.44174891 - samples/sec: 21.79 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,526 epoch 3 - iter 10/13 - loss 0.53347615 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,572 epoch 3 - iter 11/13 - loss 0.51892505 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,615 epoch 3 - iter 12/13 - loss 0.47896340 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,659 epoch 3 - iter 13/13 - loss 0.44341527 - samples/sec: 23.37 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,660 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:38,660 EPOCH 3 done: loss 0.4434 - lr 0.0200000\n",
+      "2021-09-21 20:38:38,695 DEV : loss 0.4110802412033081 - score 0.0\n",
+      "2021-09-21 20:38:38,696 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:38:38,697 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:38,764 epoch 4 - iter 1/13 - loss 0.09882039 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,810 epoch 4 - iter 2/13 - loss 0.31637636 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,856 epoch 4 - iter 3/13 - loss 0.23000572 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,903 epoch 4 - iter 4/13 - loss 0.17867199 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,949 epoch 4 - iter 5/13 - loss 0.43355852 - samples/sec: 21.77 - lr: 0.020000\n",
+      "2021-09-21 20:38:38,992 epoch 4 - iter 6/13 - loss 0.36472813 - samples/sec: 23.56 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,038 epoch 4 - iter 7/13 - loss 0.36230401 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,084 epoch 4 - iter 8/13 - loss 0.33342883 - samples/sec: 21.88 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,132 epoch 4 - iter 9/13 - loss 0.34175438 - samples/sec: 21.43 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,178 epoch 4 - iter 10/13 - loss 0.37128334 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,225 epoch 4 - iter 11/13 - loss 0.36461844 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,271 epoch 4 - iter 12/13 - loss 0.47279037 - samples/sec: 21.68 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,318 epoch 4 - iter 13/13 - loss 0.46950636 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:39,319 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:39,319 EPOCH 4 done: loss 0.4695 - lr 0.0200000\n",
+      "2021-09-21 20:38:39,351 DEV : loss 0.05592123046517372 - score 0.0\n",
+      "2021-09-21 20:38:39,352 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:54:03,269 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:03,338 epoch 5 - iter 1/13 - loss 1.66659272 - samples/sec: 21.68 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,380 epoch 5 - iter 2/13 - loss 0.84286241 - samples/sec: 23.87 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,423 epoch 5 - iter 3/13 - loss 0.56961570 - samples/sec: 23.48 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,465 epoch 5 - iter 4/13 - loss 0.43436121 - samples/sec: 24.08 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,507 epoch 5 - iter 5/13 - loss 0.35004505 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,552 epoch 5 - iter 6/13 - loss 0.59181977 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,597 epoch 5 - iter 7/13 - loss 0.58595350 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,639 epoch 5 - iter 8/13 - loss 0.51600382 - samples/sec: 24.15 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,681 epoch 5 - iter 9/13 - loss 0.45879421 - samples/sec: 23.86 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,726 epoch 5 - iter 10/13 - loss 0.44071676 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,772 epoch 5 - iter 11/13 - loss 0.48389896 - samples/sec: 22.26 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,817 epoch 5 - iter 12/13 - loss 0.50344342 - samples/sec: 22.55 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,862 epoch 5 - iter 13/13 - loss 0.52046915 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:54:03,863 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:03,863 EPOCH 5 done: loss 0.5205 - lr 0.0200000\n",
-      "2021-09-08 14:54:03,899 DEV : loss 0.36574870347976685 - score 0.0\n",
-      "2021-09-08 14:54:03,899 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:54:03,901 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:38:47,953 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:48,024 epoch 5 - iter 1/13 - loss 0.48943868 - samples/sec: 21.07 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,068 epoch 5 - iter 2/13 - loss 0.25101462 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,114 epoch 5 - iter 3/13 - loss 0.40814586 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,161 epoch 5 - iter 4/13 - loss 0.49885373 - samples/sec: 21.51 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,207 epoch 5 - iter 5/13 - loss 0.72283732 - samples/sec: 21.84 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,254 epoch 5 - iter 6/13 - loss 0.76418149 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,300 epoch 5 - iter 7/13 - loss 0.73216577 - samples/sec: 21.78 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,347 epoch 5 - iter 8/13 - loss 0.71485511 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,393 epoch 5 - iter 9/13 - loss 0.65260843 - samples/sec: 21.70 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,440 epoch 5 - iter 10/13 - loss 0.62153973 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,486 epoch 5 - iter 11/13 - loss 0.61680674 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,533 epoch 5 - iter 12/13 - loss 0.61257284 - samples/sec: 21.72 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,579 epoch 5 - iter 13/13 - loss 0.57537401 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,580 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:48,580 EPOCH 5 done: loss 0.5754 - lr 0.0200000\n",
+      "2021-09-21 20:38:48,613 DEV : loss 0.21930314600467682 - score 0.0\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:54:03,964 epoch 6 - iter 1/13 - loss 0.81948012 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,010 epoch 6 - iter 2/13 - loss 0.83201092 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,055 epoch 6 - iter 3/13 - loss 0.63555142 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,101 epoch 6 - iter 4/13 - loss 0.53053167 - samples/sec: 21.69 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,146 epoch 6 - iter 5/13 - loss 0.43434188 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,188 epoch 6 - iter 6/13 - loss 0.36920267 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,230 epoch 6 - iter 7/13 - loss 0.31679833 - samples/sec: 24.18 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,275 epoch 6 - iter 8/13 - loss 0.29608111 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,320 epoch 6 - iter 9/13 - loss 0.35692187 - samples/sec: 22.45 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,365 epoch 6 - iter 10/13 - loss 0.35784958 - samples/sec: 22.28 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,410 epoch 6 - iter 11/13 - loss 0.34589149 - samples/sec: 22.46 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,456 epoch 6 - iter 12/13 - loss 0.40521031 - samples/sec: 22.19 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,501 epoch 6 - iter 13/13 - loss 0.38140589 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,502 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:04,502 EPOCH 6 done: loss 0.3814 - lr 0.0200000\n",
-      "2021-09-08 14:54:04,537 DEV : loss 0.5359722971916199 - score 0.0\n",
-      "2021-09-08 14:54:04,538 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:54:04,540 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:04,600 epoch 7 - iter 1/13 - loss 0.00468902 - samples/sec: 24.26 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,645 epoch 7 - iter 2/13 - loss 0.10878818 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,687 epoch 7 - iter 3/13 - loss 0.08884713 - samples/sec: 24.00 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,731 epoch 7 - iter 4/13 - loss 0.07021045 - samples/sec: 23.01 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,776 epoch 7 - iter 5/13 - loss 0.31758262 - samples/sec: 22.52 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,818 epoch 7 - iter 6/13 - loss 0.26633696 - samples/sec: 23.95 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,863 epoch 7 - iter 7/13 - loss 0.35435984 - samples/sec: 22.48 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,908 epoch 7 - iter 8/13 - loss 0.35338771 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,950 epoch 7 - iter 9/13 - loss 0.31665842 - samples/sec: 24.21 - lr: 0.020000\n",
-      "2021-09-08 14:54:04,992 epoch 7 - iter 10/13 - loss 0.28509821 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 14:54:05,033 epoch 7 - iter 11/13 - loss 0.25953747 - samples/sec: 24.13 - lr: 0.020000\n",
-      "2021-09-08 14:54:05,079 epoch 7 - iter 12/13 - loss 0.24481963 - samples/sec: 22.36 - lr: 0.020000\n",
-      "2021-09-08 14:54:05,124 epoch 7 - iter 13/13 - loss 0.26133999 - samples/sec: 22.40 - lr: 0.020000\n",
-      "2021-09-08 14:54:05,125 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:05,125 EPOCH 7 done: loss 0.2613 - lr 0.0200000\n",
-      "2021-09-08 14:54:05,160 DEV : loss 0.017763663083314896 - score 0.0\n",
-      "2021-09-08 14:54:05,161 BAD EPOCHS (no improvement): 0\n",
+      "2021-09-21 20:38:48,614 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:38:48,616 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:48,681 epoch 6 - iter 1/13 - loss 0.11322573 - samples/sec: 21.55 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,727 epoch 6 - iter 2/13 - loss 0.48922388 - samples/sec: 21.88 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,774 epoch 6 - iter 3/13 - loss 0.61252639 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,820 epoch 6 - iter 4/13 - loss 0.58445000 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,869 epoch 6 - iter 5/13 - loss 0.74643325 - samples/sec: 20.64 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,915 epoch 6 - iter 6/13 - loss 0.73890618 - samples/sec: 21.65 - lr: 0.020000\n",
+      "2021-09-21 20:38:48,962 epoch 6 - iter 7/13 - loss 0.65545214 - samples/sec: 21.63 - lr: 0.020000\n",
+      "2021-09-21 20:38:49,008 epoch 6 - iter 8/13 - loss 0.59020081 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:38:49,055 epoch 6 - iter 9/13 - loss 0.54751311 - samples/sec: 21.68 - lr: 0.020000\n",
+      "2021-09-21 20:38:49,101 epoch 6 - iter 10/13 - loss 0.50950701 - samples/sec: 21.76 - lr: 0.020000\n",
+      "2021-09-21 20:38:49,148 epoch 6 - iter 11/13 - loss 0.46530878 - samples/sec: 21.58 - lr: 0.020000\n",
+      "2021-09-21 20:38:49,194 epoch 6 - iter 12/13 - loss 0.43783793 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:38:49,241 epoch 6 - iter 13/13 - loss 0.46611159 - samples/sec: 21.67 - lr: 0.020000\n",
+      "2021-09-21 20:38:49,242 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:49,242 EPOCH 6 done: loss 0.4661 - lr 0.0200000\n",
+      "2021-09-21 20:38:49,276 DEV : loss 0.03087124601006508 - score 0.0\n",
+      "2021-09-21 20:38:49,277 BAD EPOCHS (no improvement): 0\n",
       "saving best model\n",
-      "2021-09-08 14:54:09,245 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:09,312 epoch 8 - iter 1/13 - loss 0.00967904 - samples/sec: 23.34 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,354 epoch 8 - iter 2/13 - loss 0.01429864 - samples/sec: 23.84 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,397 epoch 8 - iter 3/13 - loss 0.01024578 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,439 epoch 8 - iter 4/13 - loss 0.00949376 - samples/sec: 24.01 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,481 epoch 8 - iter 5/13 - loss 0.00788988 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,523 epoch 8 - iter 6/13 - loss 0.00786328 - samples/sec: 24.14 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,565 epoch 8 - iter 7/13 - loss 0.00800435 - samples/sec: 23.91 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,610 epoch 8 - iter 8/13 - loss 0.06718752 - samples/sec: 22.39 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,655 epoch 8 - iter 9/13 - loss 0.06529181 - samples/sec: 22.31 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,700 epoch 8 - iter 10/13 - loss 0.14646769 - samples/sec: 22.56 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,742 epoch 8 - iter 11/13 - loss 0.13325104 - samples/sec: 23.89 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,784 epoch 8 - iter 12/13 - loss 0.12335587 - samples/sec: 24.17 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,829 epoch 8 - iter 13/13 - loss 0.16461137 - samples/sec: 22.33 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,830 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:09,830 EPOCH 8 done: loss 0.1646 - lr 0.0200000\n",
-      "2021-09-08 14:54:09,867 DEV : loss 0.04788362979888916 - score 0.0\n",
-      "2021-09-08 14:54:09,867 BAD EPOCHS (no improvement): 1\n",
-      "2021-09-08 14:54:09,869 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:09,931 epoch 9 - iter 1/13 - loss 0.00041678 - samples/sec: 23.25 - lr: 0.020000\n",
-      "2021-09-08 14:54:09,973 epoch 9 - iter 2/13 - loss 0.00096356 - samples/sec: 24.09 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,018 epoch 9 - iter 3/13 - loss 0.08217688 - samples/sec: 22.42 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,060 epoch 9 - iter 4/13 - loss 0.06213917 - samples/sec: 23.97 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,105 epoch 9 - iter 5/13 - loss 0.07005700 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,147 epoch 9 - iter 6/13 - loss 0.05865332 - samples/sec: 23.99 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,189 epoch 9 - iter 7/13 - loss 0.05044391 - samples/sec: 24.19 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,231 epoch 9 - iter 8/13 - loss 0.04429578 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,275 epoch 9 - iter 9/13 - loss 0.12456932 - samples/sec: 22.54 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,318 epoch 9 - iter 10/13 - loss 0.11253949 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,363 epoch 9 - iter 11/13 - loss 0.10684858 - samples/sec: 22.49 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,405 epoch 9 - iter 12/13 - loss 0.09856394 - samples/sec: 23.77 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,447 epoch 9 - iter 13/13 - loss 0.09132636 - samples/sec: 24.10 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,448 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:10,448 EPOCH 9 done: loss 0.0913 - lr 0.0200000\n",
-      "2021-09-08 14:54:10,483 DEV : loss 0.07432319223880768 - score 0.0\n",
-      "2021-09-08 14:54:10,484 BAD EPOCHS (no improvement): 2\n",
-      "2021-09-08 14:54:10,486 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:10,549 epoch 10 - iter 1/13 - loss 0.59643739 - samples/sec: 22.24 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,596 epoch 10 - iter 2/13 - loss 0.36636036 - samples/sec: 21.42 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,638 epoch 10 - iter 3/13 - loss 0.24540242 - samples/sec: 23.94 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,681 epoch 10 - iter 4/13 - loss 0.18425446 - samples/sec: 23.93 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,722 epoch 10 - iter 5/13 - loss 0.14777071 - samples/sec: 24.16 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,767 epoch 10 - iter 6/13 - loss 0.31792318 - samples/sec: 22.44 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,809 epoch 10 - iter 7/13 - loss 0.27276698 - samples/sec: 24.07 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,851 epoch 10 - iter 8/13 - loss 0.24009371 - samples/sec: 23.98 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,893 epoch 10 - iter 9/13 - loss 0.21349814 - samples/sec: 24.03 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,938 epoch 10 - iter 10/13 - loss 0.19913018 - samples/sec: 22.47 - lr: 0.020000\n",
-      "2021-09-08 14:54:10,981 epoch 10 - iter 11/13 - loss 0.18159742 - samples/sec: 23.92 - lr: 0.020000\n",
-      "2021-09-08 14:54:11,026 epoch 10 - iter 12/13 - loss 0.19172256 - samples/sec: 22.37 - lr: 0.020000\n",
-      "2021-09-08 14:54:11,071 epoch 10 - iter 13/13 - loss 0.18283682 - samples/sec: 22.32 - lr: 0.020000\n",
-      "2021-09-08 14:54:11,072 ----------------------------------------------------------------------------------------------------\n"
+      "2021-09-21 20:38:53,274 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:53,344 epoch 7 - iter 1/13 - loss 0.08897068 - samples/sec: 20.97 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,388 epoch 7 - iter 2/13 - loss 0.07184869 - samples/sec: 23.23 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,434 epoch 7 - iter 3/13 - loss 0.12857227 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,480 epoch 7 - iter 4/13 - loss 0.15495587 - samples/sec: 21.79 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,525 epoch 7 - iter 5/13 - loss 0.12901724 - samples/sec: 22.68 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,568 epoch 7 - iter 6/13 - loss 0.10898352 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,615 epoch 7 - iter 7/13 - loss 0.17038170 - samples/sec: 21.66 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,661 epoch 7 - iter 8/13 - loss 0.16068218 - samples/sec: 21.62 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,705 epoch 7 - iter 9/13 - loss 0.14312938 - samples/sec: 23.36 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,751 epoch 7 - iter 10/13 - loss 0.18030374 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,794 epoch 7 - iter 11/13 - loss 0.16588567 - samples/sec: 23.31 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,840 epoch 7 - iter 12/13 - loss 0.19459748 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,887 epoch 7 - iter 13/13 - loss 0.20151125 - samples/sec: 21.73 - lr: 0.020000\n",
+      "2021-09-21 20:38:53,888 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:53,888 EPOCH 7 done: loss 0.2015 - lr 0.0200000\n",
+      "2021-09-21 20:38:53,921 DEV : loss 0.0662781223654747 - score 0.0\n",
+      "2021-09-21 20:38:53,922 BAD EPOCHS (no improvement): 1\n",
+      "2021-09-21 20:38:53,924 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:53,988 epoch 8 - iter 1/13 - loss 0.28517798 - samples/sec: 22.01 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,035 epoch 8 - iter 2/13 - loss 0.29946524 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,078 epoch 8 - iter 3/13 - loss 0.20232888 - samples/sec: 23.60 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,124 epoch 8 - iter 4/13 - loss 0.47650820 - samples/sec: 21.54 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,168 epoch 8 - iter 5/13 - loss 0.38232540 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,212 epoch 8 - iter 6/13 - loss 0.32023043 - samples/sec: 23.05 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,258 epoch 8 - iter 7/13 - loss 0.29431343 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,305 epoch 8 - iter 8/13 - loss 0.42259696 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,351 epoch 8 - iter 9/13 - loss 0.52439810 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,399 epoch 8 - iter 10/13 - loss 0.48615401 - samples/sec: 20.99 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,446 epoch 8 - iter 11/13 - loss 0.49399053 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,492 epoch 8 - iter 12/13 - loss 0.48028820 - samples/sec: 21.69 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,535 epoch 8 - iter 13/13 - loss 0.44725771 - samples/sec: 23.48 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,536 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:54,537 EPOCH 8 done: loss 0.4473 - lr 0.0200000\n",
+      "2021-09-21 20:38:54,569 DEV : loss 0.05703819915652275 - score 0.0\n",
+      "2021-09-21 20:38:54,570 BAD EPOCHS (no improvement): 2\n",
+      "2021-09-21 20:38:54,572 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:54,634 epoch 9 - iter 1/13 - loss 0.00713214 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,680 epoch 9 - iter 2/13 - loss 0.04225553 - samples/sec: 21.80 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,726 epoch 9 - iter 3/13 - loss 0.04974761 - samples/sec: 21.74 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,769 epoch 9 - iter 4/13 - loss 0.03819942 - samples/sec: 23.63 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,813 epoch 9 - iter 5/13 - loss 0.03088532 - samples/sec: 23.20 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,859 epoch 9 - iter 6/13 - loss 0.16156626 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,905 epoch 9 - iter 7/13 - loss 0.21582245 - samples/sec: 21.71 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,952 epoch 9 - iter 8/13 - loss 0.20115561 - samples/sec: 21.75 - lr: 0.020000\n",
+      "2021-09-21 20:38:54,995 epoch 9 - iter 9/13 - loss 0.17980727 - samples/sec: 23.41 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,041 epoch 9 - iter 10/13 - loss 0.20027310 - samples/sec: 21.81 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,087 epoch 9 - iter 11/13 - loss 0.18801624 - samples/sec: 21.83 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,134 epoch 9 - iter 12/13 - loss 0.18494560 - samples/sec: 21.62 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,181 epoch 9 - iter 13/13 - loss 0.17363929 - samples/sec: 21.50 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,182 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:55,182 EPOCH 9 done: loss 0.1736 - lr 0.0200000\n",
+      "2021-09-21 20:38:55,219 DEV : loss 0.14111456274986267 - score 0.0\n",
+      "2021-09-21 20:38:55,220 BAD EPOCHS (no improvement): 3\n",
+      "2021-09-21 20:38:55,223 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:55,285 epoch 10 - iter 1/13 - loss 0.00709288 - samples/sec: 23.28 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,328 epoch 10 - iter 2/13 - loss 0.01078408 - samples/sec: 23.43 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,371 epoch 10 - iter 3/13 - loss 0.00819882 - samples/sec: 23.39 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,414 epoch 10 - iter 4/13 - loss 0.00739914 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,457 epoch 10 - iter 5/13 - loss 0.00623681 - samples/sec: 23.38 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,500 epoch 10 - iter 6/13 - loss 0.00537381 - samples/sec: 23.40 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,547 epoch 10 - iter 7/13 - loss 0.28434012 - samples/sec: 21.87 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,590 epoch 10 - iter 8/13 - loss 0.24906646 - samples/sec: 23.14 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,636 epoch 10 - iter 9/13 - loss 0.24200244 - samples/sec: 21.82 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,683 epoch 10 - iter 10/13 - loss 0.22211698 - samples/sec: 21.60 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,729 epoch 10 - iter 11/13 - loss 0.24252364 - samples/sec: 21.90 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,775 epoch 10 - iter 12/13 - loss 0.34189978 - samples/sec: 21.99 - lr: 0.020000\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2021-09-08 14:54:11,072 EPOCH 10 done: loss 0.1828 - lr 0.0200000\n",
-      "2021-09-08 14:54:11,107 DEV : loss 0.01197982206940651 - score 0.0\n",
-      "2021-09-08 14:54:11,108 BAD EPOCHS (no improvement): 0\n",
-      "saving best model\n",
-      "2021-09-08 14:54:21,321 ----------------------------------------------------------------------------------------------------\n",
-      "2021-09-08 14:54:21,322 Testing using best model ...\n",
-      "2021-09-08 14:54:21,323 loading file None1/best-model.pt\n",
+      "2021-09-21 20:38:55,822 epoch 10 - iter 13/13 - loss 0.37078715 - samples/sec: 21.59 - lr: 0.020000\n",
+      "2021-09-21 20:38:55,823 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:38:55,823 EPOCH 10 done: loss 0.3708 - lr 0.0200000\n",
+      "2021-09-21 20:38:55,859 DEV : loss 0.09902603179216385 - score 0.0\n",
+      "Epoch    10: reducing learning rate of group 0 to 1.0000e-02.\n",
+      "2021-09-21 20:38:55,859 BAD EPOCHS (no improvement): 4\n",
+      "2021-09-21 20:39:00,121 ----------------------------------------------------------------------------------------------------\n",
+      "2021-09-21 20:39:00,122 Testing using best model ...\n",
+      "2021-09-21 20:39:00,123 loading file None1/best-model.pt\n",
       "init TARS\n",
-      "2021-09-08 14:54:25,991 \t0.5\n",
-      "2021-09-08 14:54:25,992 \n",
+      "2021-09-21 20:39:04,795 \t0.0\n",
+      "2021-09-21 20:39:04,796 \n",
       "Results:\n",
-      "- F-score (micro) 0.5\n",
-      "- F-score (macro) 0.0667\n",
-      "- Accuracy 0.5\n",
+      "- F-score (micro) 0.0\n",
+      "- F-score (macro) 0.0\n",
+      "- Accuracy 0.0\n",
       "\n",
       "By class:\n",
       "                                                                                                                                        precision    recall  f1-score   support\n",
       "\n",
+      "                                                                                                           undertake a journey or trip     0.0000    0.0000    0.0000         0\n",
       "                                                                          the practical application of science to commerce or industry     0.0000    0.0000    0.0000         0\n",
       "                                                                                        a healthy state of wellbeing free from disease     0.0000    0.0000    0.0000         0\n",
       "                                                                                          an adult female person (as opposed to a man)     0.0000    0.0000    0.0000         0\n",
-      " a father or mother; one who begets or one who gives birth to or nurtures and raises a child a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
       "a father or mother; one who begets or one who gives birth to or nurtures and raises a child; a relative who plays the role of guardian     0.0000    0.0000    0.0000         0\n",
       "                                                                       the social event at which the ceremony of marriage is performed     0.0000    0.0000    0.0000         0\n",
       "                                                               the latest and most admired style in clothes and cosmetics and behavior     0.0000    0.0000    0.0000         0\n",
       "                                                                            an activity that is diverting and that holds the attention     0.0000    0.0000    0.0000         0\n",
       "                                                                                           a particular branch of scientific knowledge     0.0000    0.0000    0.0000         0\n",
+      "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         0\n",
       "                                an act punishable by law; usually considered an evil act an evil act not necessarily punishable by law     0.0000    0.0000    0.0000         0\n",
-      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         0\n",
+      "                                                          a strong belief in a supernatural power or powers that control human destiny     0.0000    0.0000    0.0000         0\n",
       "                                                                        social relations involving intrigue to gain authority or power     0.0000    0.0000    0.0000         0\n",
-      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         0\n",
-      "                                                                                                   the legal dissolution of a marriage     0.0000    0.0000    0.0000         1\n",
-      "                                                          a strong belief in a supernatural power or powers that control human destiny     1.0000    1.0000    1.0000         1\n",
+      "                                                                       an active diversion requiring physical exertion and competition     0.0000    0.0000    0.0000         1\n",
+      "                                                                                          light and humorous drama with a happy ending     0.0000    0.0000    0.0000         1\n",
       "\n",
-      "                                                                                                                             micro avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                             macro avg     0.0667    0.0667    0.0667         2\n",
-      "                                                                                                                          weighted avg     0.5000    0.5000    0.5000         2\n",
-      "                                                                                                                           samples avg     0.5000    0.5000    0.5000         2\n",
+      "                                                                                                                             micro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                             macro avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                          weighted avg     0.0000    0.0000    0.0000         2\n",
+      "                                                                                                                           samples avg     0.0000    0.0000    0.0000         2\n",
       "\n",
-      "2021-09-08 14:54:25,992 ----------------------------------------------------------------------------------------------------\n",
-      "Accuracy Durchschnitt: 0.30506108202443283\n"
+      "2021-09-21 20:39:04,796 ----------------------------------------------------------------------------------------------------\n",
+      "Accuracy Durchschnitt: 0.30715532286212915\n"
      ]
     }
    ],
@@ -9286,11 +9317,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a310936c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.3647469458987784, 0.2757417102966841, 0.2862129144851658, 0.29842931937172773, 0.3106457242582897]\n",
+      "0.031082215512590848\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(avg_acc_list)\n",
+    "print(statistics.pstdev(avg_acc_list))"
+   ]
   }
  ],
  "metadata": {
-- 
GitLab