From 2344f0b0f6492233e12c7a6ef83eb26b89deff69 Mon Sep 17 00:00:00 2001 From: Sebastian Pohl <sebpohl@gmx.net> Date: Tue, 26 Sep 2023 13:13:30 +0200 Subject: [PATCH] new db connector --- db_operations/db_connector.py | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/db_operations/db_connector.py b/db_operations/db_connector.py index 6a1e620..657c698 100644 --- a/db_operations/db_connector.py +++ b/db_operations/db_connector.py @@ -1,8 +1,56 @@ from pymongo import MongoClient +from sshtunnel import SSHTunnelForwarder from bson.objectid import ObjectId import base64 +class NewDBConnector: + def __init__(self, db_name=None): + if db_name is None: + self.db_name = "git_scraping" + else: + self.db_name = db_name + + MONGO_HOST = "141.20.38.120" + MONGO_DB = "github_scraping" + + with open("github_scraping/db_operations/mongodb_user.txt", "r") as f: + mongodb_username = f.readline().strip() + mongodb_password = f.readline().strip() + remote_username = f.readline().strip() + remote_password = f.readline().strip() + + self.server = SSHTunnelForwarder( + MONGO_HOST, + ssh_username=remote_username, + ssh_password=remote_password, + remote_bind_address=('127.0.0.1', 27017) + ) + + self.server.start() + + print("DEBUG: server started") + + # access new db + self.client = MongoClient('127.0.0.1', self.server.local_bind_port) + + self.db = self.client[self.db_name] + # full commits is on of the two databases for commit histories + self.full_commits = self.db["data_v1"] + # brief hunks is another database for commit histories focusing on small changes as I recall + self.brief_hunks = self.db["data_v3_small"] + # final_state is the main collection for complete final versions of the workflows + self.final_state = self.db["data_v5_final_repos"] + # final state ful repo is an abandoned attempt to collect more data on thise repositories for which we built dags + self.final_state_full_repo = self.db["final_state_full_repo"] + # workflow structures is a collection for the dags we were able to built + self.workflow_structures = self.db["workflow_structures_v1"] + self.dag_histories = self.db["dag_histories"] + self.commit_difference_pairs = self.db["commit_difference_pairs"] + self.rules = self.db["rules"] + self.full_text_rules = self.db["full_text_rules"] + + class DBConnector: def __init__(self, connection_string=None, db_name=None): if connection_string is None: -- GitLab