From 2344f0b0f6492233e12c7a6ef83eb26b89deff69 Mon Sep 17 00:00:00 2001
From: Sebastian Pohl <sebpohl@gmx.net>
Date: Tue, 26 Sep 2023 13:13:30 +0200
Subject: [PATCH] new db connector

---
 db_operations/db_connector.py | 48 +++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/db_operations/db_connector.py b/db_operations/db_connector.py
index 6a1e620..657c698 100644
--- a/db_operations/db_connector.py
+++ b/db_operations/db_connector.py
@@ -1,8 +1,56 @@
 from pymongo import MongoClient
+from sshtunnel import SSHTunnelForwarder
 from bson.objectid import ObjectId
 import base64
 
 
+class NewDBConnector:
+    def __init__(self, db_name=None):
+        if db_name is None:
+            self.db_name = "git_scraping"
+        else:
+            self.db_name = db_name
+
+        MONGO_HOST = "141.20.38.120"
+        MONGO_DB = "github_scraping"
+
+        with open("github_scraping/db_operations/mongodb_user.txt", "r") as f:
+            mongodb_username = f.readline().strip()
+            mongodb_password = f.readline().strip()
+            remote_username = f.readline().strip()
+            remote_password = f.readline().strip()
+
+        self.server = SSHTunnelForwarder(
+            MONGO_HOST,
+            ssh_username=remote_username,
+            ssh_password=remote_password,
+            remote_bind_address=('127.0.0.1', 27017)
+        )
+
+        self.server.start()
+
+        print("DEBUG: server started")
+
+        # access new db
+        self.client = MongoClient('127.0.0.1', self.server.local_bind_port)
+
+        self.db = self.client[self.db_name]
+        # full commits is on of the two databases for commit histories
+        self.full_commits = self.db["data_v1"]
+        # brief hunks is another database for commit histories focusing on small changes as I recall
+        self.brief_hunks = self.db["data_v3_small"]
+        # final_state is the main collection for complete final versions of the workflows
+        self.final_state = self.db["data_v5_final_repos"]
+        # final state ful repo is an abandoned attempt to collect more data on thise repositories for which we built dags
+        self.final_state_full_repo = self.db["final_state_full_repo"]
+        # workflow structures is a collection for the dags we were able to built
+        self.workflow_structures = self.db["workflow_structures_v1"]
+        self.dag_histories = self.db["dag_histories"]
+        self.commit_difference_pairs = self.db["commit_difference_pairs"]
+        self.rules = self.db["rules"]
+        self.full_text_rules = self.db["full_text_rules"]
+
+
 class DBConnector:
     def __init__(self, connection_string=None, db_name=None):
         if connection_string is None:
-- 
GitLab