From d18baa344af6cfaff572e279b593f7df6d67b82a Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 27 Nov 2023 19:48:00 -0600 Subject: [PATCH 1/9] Start renaming things to be github specific and adding gitlab functions --- augur/api/routes/user.py | 4 +- augur/application/db/models/augur_data.py | 24 +++ .../application/db/models/augur_operations.py | 23 ++- augur/tasks/frontend.py | 25 ++- .../github/util/github_api_key_handler.py | 2 +- augur/tasks/gitlab/gitlab_api_key_handler.py | 178 ++++++++++++++++++ augur/tasks/gitlab/gitlab_random_key_auth.py | 28 +++ augur/tasks/gitlab/gitlab_task_session.py | 44 +++++ 8 files changed, 312 insertions(+), 16 deletions(-) create mode 100644 augur/tasks/gitlab/gitlab_api_key_handler.py create mode 100644 augur/tasks/gitlab/gitlab_random_key_auth.py create mode 100644 augur/tasks/gitlab/gitlab_task_session.py diff --git a/augur/api/routes/user.py b/augur/api/routes/user.py index dfaeb81f7f..62bc44068a 100644 --- a/augur/api/routes/user.py +++ b/augur/api/routes/user.py @@ -227,7 +227,7 @@ def add_user_repo(): repo = request.args.get("repo_url") group_name = request.args.get("group_name") - result = current_user.add_repo(group_name, repo) + result = current_user.add_github_repo(group_name, repo) return jsonify(result[1]) @@ -260,7 +260,7 @@ def add_user_org(): org = request.args.get("org_url") group_name = request.args.get("group_name") - result = current_user.add_org(group_name, org) + result = current_user.add_github_org(group_name, org) return jsonify(result[1]) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index f5efceb4e7..cbfd8d6afb 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -949,6 +949,30 @@ def parse_github_repo_url(url: str) -> tuple: capturing_groups = result.groups() + owner = capturing_groups[0] + repo = capturing_groups[1] + + return owner, repo + + @staticmethod + def parse_gitlab_repo_url(url: str) -> tuple: + """ Gets the owner and repo from a gitlab url. + + Args: + url: Gitlab url + + Returns: + Tuple of owner and repo. Or a tuple of None and None if the url is invalid. + """ + + result = re.search(r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$", url) + + if not result: + return None, None + + capturing_groups = result.groups() + + owner = capturing_groups[0] repo = capturing_groups[1] diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 63b9144b35..00d3fa3ab3 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -449,17 +449,30 @@ def remove_group(self, group_name): return result - def add_repo(self, group_name, repo_url): + def add_github_repo(self, group_name, repo_url): from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.tasks.github.util.github_api_key_handler import NoValidKeysError try: with GithubTaskSession(logger) as session: - result = UserRepo.add(session, repo_url, self.user_id, group_name) + result = UserRepo.add_github_repo(session, repo_url, self.user_id, group_name) except NoValidKeysError: return False, {"status": "No valid keys"} return result + + def add_gitlab_repo(self, group_name, repo_url): + + from augur.tasks.gitlab.gitlab_task_session import GitLabTaskSession + from augur.tasks.github.util.github_api_key_handler import NoValidKeysError + try: + with GitLabTaskSession(logger) as session: + result = UserRepo.add_gitlab_repo(session, repo_url, self.user_id, group_name) + except NoValidKeysError: + return False, {"status": "No valid keys"} + + return result + def remove_repo(self, group_name, repo_id): @@ -468,7 +481,7 @@ def remove_repo(self, group_name, repo_id): return result - def add_org(self, group_name, org_url): + def add_github_org(self, group_name, org_url): from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.tasks.github.util.github_api_key_handler import NoValidKeysError @@ -771,7 +784,7 @@ def insert(session, repo_id: int, group_id:int = 1) -> bool: return data[0]["group_id"] == group_id and data[0]["repo_id"] == repo_id @staticmethod - def add(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: + def add_github_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: """Add repo to the user repo table Args: @@ -911,7 +924,7 @@ def add_org_repos(session, url: List[str], user_id: int, group_name: int): failed_repos = [] for repo in repos: - result = UserRepo.add(session, repo, user_id, group_id=group_id, from_org_list=True, repo_type=type, repo_group_id=repo_group_id) + result = UserRepo.add_github_repo(session, repo, user_id, group_id=group_id, from_org_list=True, repo_type=type, repo_group_id=repo_group_id) # keep track of all the repos that failed if not result[0]: diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index b8eb8b203c..2adc806534 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -30,15 +30,15 @@ def add_org_repo_list(user_id, group_name, urls): valid_repos = [] for url in urls: - # matches https://github.com/{org}/ or htts://github.com/{org} + # matches https://github.com/{org}/ or http://github.com/{org} if Repo.parse_github_org_url(url): - added = user.add_org(group_name, url)[0] + added = user.add_github_org(group_name, url)[0] if added: valid_orgs.append(url) - # matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo} + # matches https://github.com/{org}/{repo}/ or http://github.com/{org}/{repo} elif Repo.parse_github_repo_url(url)[0]: - added = user.add_repo(group_name, url)[0] + added = user.add_github_repo(group_name, url)[0] if added: valid_repos.append(url) @@ -46,7 +46,7 @@ def add_org_repo_list(user_id, group_name, urls): elif (match := parse_org_and_repo_name(url)): org, repo = match.groups() repo_url = f"https://github.com/{org}/{repo}/" - added = user.add_repo(group_name, repo_url)[0] + added = user.add_github_repo(group_name, repo_url)[0] if added: valid_repos.append(url) @@ -54,9 +54,17 @@ def add_org_repo_list(user_id, group_name, urls): elif (match := parse_org_name(url)): org = match.group(1) org_url = f"https://github.com/{org}/" - added = user.add_org(group_name, org_url)[0] + added = user.add_github_org(group_name, org_url)[0] if added: valid_orgs.append(url) + + # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} + elif Repo.parse_gitlab_repo_url(url)[0]: + + #added = user.add_github_repo(group_name, url)[0] + if added: + valid_repos.append(url) + else: invalid_urls.append(url) @@ -66,18 +74,19 @@ def add_org_repo_list(user_id, group_name, urls): - +# TODO: Change to github specific @celery.task def add_repo(user_id, group_name, repo_url): logger = logging.getLogger(add_org.__name__) with GithubTaskSession(logger) as session: - result = UserRepo.add(session, repo_url, user_id, group_name) + result = UserRepo.add_github_repo(session, repo_url, user_id, group_name) print(repo_url, result) +# TODO: Change to github specific @celery.task def add_org(user_id, group_name, org_url): diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 8a19430e87..20ce07f066 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -32,7 +32,7 @@ def __init__(self, session: DatabaseSession): self.logger = session.logger self.config = AugurConfig(self.logger, session) - self.oauth_redis_key = "oauth_keys_list" + self.oauth_redis_key = "github_oauth_keys_list" self.redis_key_list = RedisList(self.oauth_redis_key) diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py new file mode 100644 index 0000000000..e59a3620ef --- /dev/null +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -0,0 +1,178 @@ +import httpx +import time +import random + +from typing import Optional, List + +from augur.tasks.util.redis_list import RedisList +from augur.application.db.session import DatabaseSession +from augur.application.config import AugurConfig +from sqlalchemy import func + + +class NoValidKeysError(Exception): + pass + + +class GitlabApiKeyHandler(): + """Handles Gitlab API key retrieval from the database and redis + + Attributes: + session (DatabaseSession): Database connection + logger (logging.Logger): Handles all logs + oauth_redis_key (str): The key where the github api keys are cached in redis + redis_key_list (RedisList): Acts like a python list, and interacts directly with the redis cache + config_key (str): The api key that is stored in the users config table + key: (List[str]): List of keys retrieve from database or cache + """ + + def __init__(self, session: DatabaseSession): + + self.session = session + self.logger = session.logger + self.config = AugurConfig(self.logger, session) + + self.oauth_redis_key = "gitlab_oauth_keys_list" + + self.redis_key_list = RedisList(self.oauth_redis_key) + + self.config_key = self.get_config_key() + + self.keys = self.get_api_keys() + + self.logger.info(f"Retrieved {len(self.keys)} gitlab api keys for use") + + def get_random_key(self): + """Retrieves a random key from the list of keys + + Returns: + A random github api key + """ + + return random.choice(self.keys) + + def get_config_key(self) -> str: + """Retrieves the users github api key from their config table + + Returns: + Github API key from config table + """ + + # TODO: Change to get the gitlab api key + return self.config.get_value("Keys", "github_api_key") + + def get_api_keys_from_database(self) -> List[str]: + """Retieves all github api keys from database + + Note: + It retrieves all the keys from the database except the one defined in the users config + + Returns: + Github api keys that are in the database + """ + from augur.application.db.models import WorkerOauth + + select = WorkerOauth.access_token + # randomizing the order at db time + #select.order_by(func.random()) + # TODO: Change to get gitlab api keys + where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github'] + + return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()] + #return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()] + + + def get_api_keys(self) -> List[str]: + """Retrieves all valid Github API Keys + + Note: + It checks to see if the keys are in the redis cache first. + It removes bad keys before returning. + If keys were taken from the database, it caches all the valid keys that were found + + Returns: + Valid Github api keys + """ + + redis_keys = list(self.redis_key_list) + + if redis_keys: + return redis_keys + + attempts = 0 + while attempts < 3: + + try: + keys = self.get_api_keys_from_database() + break + except: + time.sleep(5) + attempts += 1 + + if self.config_key is not None: + keys += [self.config_key] + + if len(keys) == 0: + return [] + + valid_keys = [] + with httpx.Client() as client: + + for key in keys: + + # removes key if it returns "Bad Credentials" + if self.is_bad_api_key(client, key) is False: + valid_keys.append(key) + else: + print(f"WARNING: The key '{key}' is not a valid key. Hint: If valid in past it may have expired") + + # just in case the mulitprocessing adds extra values to the list. + # we are clearing it before we push the values we got + self.redis_key_list.clear() + + # add all the keys to redis + self.redis_key_list.extend(valid_keys) + + if not valid_keys: + raise NoValidKeysError("No valid github api keys found in the config or worker oauth table") + + + # shuffling the keys so not all processes get the same keys in the same order + valid_now = valid_keys + #try: + #self.logger.info(f'valid keys before shuffle: {valid_keys}') + #valid_keys = random.sample(valid_keys, len(valid_keys)) + #self.logger.info(f'valid keys AFTER shuffle: {valid_keys}') + #except Exception as e: + # self.logger.debug(f'{e}') + # valid_keys = valid_now + # pass + + return valid_keys + + # TODO: Change to use gitlab rate limit api + def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: + """Determines if a Github API is bad + + Args: + client: makes the http requests + oauth_key: github api key that is being tested + + Returns: + True if key is bad. False if the key is good + """ + + # this endpoint allows us to check the rate limit, but it does not use one of our 5000 requests + url = "https://api.github.com/rate_limit" + + headers = {'Authorization': f'token {oauth_key}'} + + data = client.request(method="GET", url=url, headers=headers, timeout=180).json() + + try: + if data["message"] == "Bad credentials": + return True + except KeyError: + pass + + return False \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py new file mode 100644 index 0000000000..9194a94fe8 --- /dev/null +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -0,0 +1,28 @@ +"""Defines the GitlabRandomKeyAuth class""" + +from augur.tasks.util.random_key_auth import RandomKeyAuth +from augur.tasks.gitlab.gitlab_api_key_handler import GitlabApiKeyHandler +from augur.application.db.session import DatabaseSession + + +class GitlabRandomKeyAuth(RandomKeyAuth): + """Defines a github specific RandomKeyAuth class so + github collections can have a class randomly selects an api key for each request + """ + + def __init__(self, session: DatabaseSession, logger): + """Creates a GitlabRandomKeyAuth object and initializes the RandomKeyAuth parent class""" + + + # gets the github api keys from the database via the GithubApiKeyHandler + github_api_keys = GitlabApiKeyHandler(session).keys + #github_api_keys = random.sample(github_api_keys, len(github_api_keys)) + + if not github_api_keys: + print("Failed to find github api keys. This is usually because your key has expired") + + # TODO: Change to set key headers how gitlab expects + header_name = "Authorization" + key_format = "token {0}" + + super().__init__(github_api_keys, header_name, session.logger, key_format) \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_task_session.py b/augur/tasks/gitlab/gitlab_task_session.py new file mode 100644 index 0000000000..a7a68ec6b5 --- /dev/null +++ b/augur/tasks/gitlab/gitlab_task_session.py @@ -0,0 +1,44 @@ +from logging import Logger + +from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth +from augur.application.db.session import DatabaseSession + +class GitlabTaskManifest: + + def __init__(self, logger): + + from augur.tasks.init.celery_app import engine + from augur.application.db.session import DatabaseSession + + self.augur_db = DatabaseSession(logger, engine) + self.key_auth = GitlabRandomKeyAuth(self.augur_db.session, logger) + self.logger = logger + self.platform_id = 2 + + def __enter__(self): + + return self + + def __exit__(self, exception_type, exception_value, exception_traceback): + + self.augur_db.close() + + +class GithubTaskSession(DatabaseSession): + """ORM session used in gitlab tasks. + This class adds the platform_id and the gitlab key authentication class, + to the already existing DatabaseSession so there is a central location to access + api keys and a single platform_id reference + + Attributes: + oauths (GitlabRandomKeyAuth): Class that handles randomly assigning gitlab api keys to httpx requests + platform_id (int): The id that refers to the Gitlab platform + """ + + def __init__(self, logger: Logger, engine=None): + + super().__init__(logger, engine=engine) + + self.oauths = GitlabRandomKeyAuth(self, logger) + self.platform_id = 2 + From 2b25d5989cbd421b19813e394b235b099d6b2fca Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 27 Nov 2023 20:16:38 -0600 Subject: [PATCH 2/9] Continue changing code to insert gitlab repos Signed-off-by: Andrew Brain --- augur/api/routes/dei.py | 2 +- augur/application/db/models/augur_data.py | 93 ++++++++++++++++++- .../application/db/models/augur_operations.py | 71 +++++++++++++- augur/tasks/frontend.py | 4 +- augur/util/repo_load_controller.py | 2 +- .../test_models/test_augur_data/test_repo.py | 12 +-- .../test_augur_operations/test_user_repo.py | 14 +-- 7 files changed, 177 insertions(+), 21 deletions(-) diff --git a/augur/api/routes/dei.py b/augur/api/routes/dei.py index dea79b79c2..82324a8d62 100644 --- a/augur/api/routes/dei.py +++ b/augur/api/routes/dei.py @@ -52,7 +52,7 @@ def dei_track_repo(application: ClientApplication): return jsonify({"status": "Repo already exists"}) frontend_repo_group: RepoGroup = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first() - repo_id = Repo.insert(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="") + repo_id = Repo.insert_github_repo(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="") if not repo_id: return jsonify({"status": "Error adding repo"}) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index cbfd8d6afb..9a2f14e8bb 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -929,6 +929,50 @@ def is_valid_github_repo(gh_session, url: str) -> bool: return False, {"status": f"Github Error: {data['message']}"} return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} + + # TODO: Change to use gitlab api + @staticmethod + def is_valid_gitlab_repo(gl_session, url: str) -> bool: + """Determine whether repo url is valid. + + Args: + url: repo_url + + Returns + True if repo url is valid and False if not + """ + from augur.tasks.github.util.github_paginator import hit_api + + REPO_ENDPOINT = "https://api.github.com/repos/{}/{}" + + if not gl_session.oauths.list_of_keys: + return False, {"status": "No valid github api keys to retrieve data with"} + + owner, repo = Repo.parse_gitlab_repo_url(url) + if not owner or not repo: + return False, {"status":"Invalid repo url"} + + url = REPO_ENDPOINT.format(owner, repo) + + attempts = 0 + while attempts < 10: + result = hit_api(gl_session.oauths, url, logger) + + # if result is None try again + if not result: + attempts+=1 + continue + + data = result.json() + # if there was an error return False + if "message" in data.keys(): + + if data["message"] == "Not Found": + return False, {"status": "Invalid repo"} + + return False, {"status": f"Gitlab Error: {data['message']}"} + + return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} @staticmethod def parse_github_repo_url(url: str) -> tuple: @@ -998,7 +1042,54 @@ def parse_github_org_url(url): return result.groups()[0] @staticmethod - def insert(session, url: str, repo_group_id: int, tool_source, repo_type): + def insert_githlab_repo(session, url: str, repo_group_id: int, tool_source, repo_type): + """Add a repo to the repo table. + + Args: + url: repo url + repo_group_id: group to assign repo to + + Note: + If repo row exists then it will update the repo_group_id if param repo_group_id is not a default. If it does not exist is will simply insert the repo. + """ + + if not isinstance(url, str) or not isinstance(repo_group_id, int) or not isinstance(tool_source, str) or not isinstance(repo_type, str): + return None + + if not RepoGroup.is_valid_repo_group_id(session, repo_group_id): + return None + + if url.endswith("/"): + url = url[:-1] + + url = url.lower() + + owner, repo = Repo.parse_gitlab_repo_url(url) + if not owner or not repo: + return None + + repo_data = { + "repo_group_id": repo_group_id, + "repo_git": url, + "repo_path": f"gitlab.com/{owner}/", + "repo_name": repo, + "repo_type": repo_type, + "tool_source": tool_source, + "tool_version": "1.0", + "data_source": "Git" + } + + repo_unique = ["repo_git"] + return_columns = ["repo_id"] + result = session.insert_data(repo_data, Repo, repo_unique, return_columns, on_conflict_update=False) + + if not result: + return None + + return result[0]["repo_id"] + + @staticmethod + def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type): """Add a repo to the repo table. Args: diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 00d3fa3ab3..37f0e2f420 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -488,7 +488,7 @@ def add_github_org(self, group_name, org_url): try: with GithubTaskSession(logger) as session: - result = UserRepo.add_org_repos(session, org_url, self.user_id, group_name) + result = UserRepo.add_github_org_repos(session, org_url, self.user_id, group_name) except NoValidKeysError: return False, {"status": "No valid keys"} @@ -782,6 +782,71 @@ def insert(session, repo_id: int, group_id:int = 1) -> bool: return False return data[0]["group_id"] == group_id and data[0]["repo_id"] == repo_id + + @staticmethod + def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: + """Add repo to the user repo table + + Args: + urls: list of repo urls + user_id: id of user_id from users table + group_name: name of group to add repo to. + group_id: id of the group + valid_repo: boolean that indicates whether the repo has already been validated + + Note: + Either the group_name or group_id can be passed not both + + Returns: + Dict that contains the key "status" and additional useful data + """ + + if group_name and group_id: + return False, {"status": "Pass only the group name or group id not both"} + + if not group_name and not group_id: + return False, {"status": "Need group name or group id to add a repo"} + + if from_org_list and not repo_type: + return False, {"status": "Repo type must be passed if the repo is from an organization's list of repos"} + + if group_id is None: + + group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name) + if group_id is None: + return False, {"status": "Invalid group name"} + + if not from_org_list: + result = Repo.is_valid_gitlab_repo(session, url) + if not result[0]: + return False, {"status": result[1]["status"], "repo_url": url} + + repo_type = result[1]["repo_type"] + + # if no repo_group_id is passed then assign the repo to the frontend repo group + if repo_group_id is None: + + frontend_repo_group = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first() + if not frontend_repo_group: + return False, {"status": "Could not find repo group with name 'Frontend Repos'", "repo_url": url} + + repo_group_id = frontend_repo_group.repo_group_id + + + repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend", repo_type) + if not repo_id: + return False, {"status": "Repo insertion failed", "repo_url": url} + + result = UserRepo.insert(session, repo_id, group_id) + if not result: + return False, {"status": "repo_user insertion failed", "repo_url": url} + + #collection_status records are now only added during collection -IM 5/1/23 + #status = CollectionStatus.insert(session, repo_id) + #if not status: + # return False, {"status": "Failed to create status for repo", "repo_url": url} + + return True, {"status": "Repo Added", "repo_url": url} @staticmethod def add_github_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: @@ -833,7 +898,7 @@ def add_github_repo(session, url: List[str], user_id: int, group_name=None, grou repo_group_id = frontend_repo_group.repo_group_id - repo_id = Repo.insert(session, url, repo_group_id, "Frontend", repo_type) + repo_id = Repo.insert_github_repo(session, url, repo_group_id, "Frontend", repo_type) if not repo_id: return False, {"status": "Repo insertion failed", "repo_url": url} @@ -875,7 +940,7 @@ def delete(session, repo_id:int, user_id:int, group_name:str) -> dict: return True, {"status": "Repo Removed"} @staticmethod - def add_org_repos(session, url: List[str], user_id: int, group_name: int): + def add_github_org_repos(session, url: List[str], user_id: int, group_name: int): """Add list of orgs and their repos to a users repos. Args: diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index 2adc806534..fffd79d330 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -61,7 +61,7 @@ def add_org_repo_list(user_id, group_name, urls): # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} elif Repo.parse_gitlab_repo_url(url)[0]: - #added = user.add_github_repo(group_name, url)[0] + added = user.add_gitlab_repo(group_name, url)[0] if added: valid_repos.append(url) @@ -93,6 +93,6 @@ def add_org(user_id, group_name, org_url): logger = logging.getLogger(add_org.__name__) with GithubTaskSession(logger) as session: - result = UserRepo.add_org_repos(session, org_url, user_id, group_name) + result = UserRepo.add_github_org_repos(session, org_url, user_id, group_name) print(org_url, result) diff --git a/augur/util/repo_load_controller.py b/augur/util/repo_load_controller.py index b5ff5d07a0..943e3373a6 100644 --- a/augur/util/repo_load_controller.py +++ b/augur/util/repo_load_controller.py @@ -62,7 +62,7 @@ def add_cli_repo(self, repo_data: Dict[str, Any], from_org_list=False, repo_type # if the repo doesn't exist it adds it - repo_id = Repo.insert(self.session, url, repo_group_id, "CLI", repo_type) + repo_id = Repo.insert_github_repo(self.session, url, repo_group_id, "CLI", repo_type) if not repo_id: logger.warning(f"Invalid repo group id specified for {url}, skipping.") diff --git a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py b/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py index bf22254244..1c32472f32 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py +++ b/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py @@ -77,20 +77,20 @@ def test_insert_repo(test_db_engine): with DatabaseSession(logger, test_db_engine) as session: - assert Repo.insert(session, data["repo_urls"][0], data["rg_id"], data["tool_source"]) is not None - assert Repo.insert(session, data["repo_urls"][1], data["rg_id"], data["tool_source"]) is not None + assert Repo.insert_github_repo(session, data["repo_urls"][0], data["rg_id"], data["tool_source"]) is not None + assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], data["tool_source"]) is not None # invalid rg_id - assert Repo.insert(session, data["repo_urls"][0], 12, data["tool_source"]) is None + assert Repo.insert_github_repo(session, data["repo_urls"][0], 12, data["tool_source"]) is None # invalid type for repo url - assert Repo.insert(session, 1, data["rg_id"], data["tool_source"]) is None + assert Repo.insert_github_repo(session, 1, data["rg_id"], data["tool_source"]) is None # invalid type for rg_id - assert Repo.insert(session, data["repo_urls"][1], "1", data["tool_source"]) is None + assert Repo.insert_github_repo(session, data["repo_urls"][1], "1", data["tool_source"]) is None # invalid type for tool_source - assert Repo.insert(session, data["repo_urls"][1], data["rg_id"], 52) is None + assert Repo.insert_github_repo(session, data["repo_urls"][1], data["rg_id"], 52) is None with test_db_engine.connect() as connection: diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py b/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py index 3fc5451791..4b288cbabb 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py +++ b/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py @@ -124,7 +124,7 @@ def test_add_frontend_repos_with_invalid_repo(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: - result = UserRepo.add(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_repo(session, url, data["user_id"], data["user_group_name"]) assert result[1]["status"] == "Invalid repo" @@ -163,11 +163,11 @@ def test_add_frontend_repos_with_duplicates(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: - result = UserRepo.add(session, url, data["user_id"], data["user_group_name"]) - result2 = UserRepo.add(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_repo(session, url, data["user_id"], data["user_group_name"]) + result2 = UserRepo.add_github_repo(session, url, data["user_id"], data["user_group_name"]) # add repo with invalid group name - result3 = UserRepo.add(session, url, data["user_id"], "Invalid group name") + result3 = UserRepo.add_github_repo(session, url, data["user_id"], "Invalid group name") assert result[1]["status"] == "Repo Added" assert result2[1]["status"] == "Repo Added" @@ -263,11 +263,11 @@ def test_add_frontend_org_with_invalid_org(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: url = f"https://github.com/{data['org_name']}/" - result = UserRepo.add_org_repos(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_org_repos(session, url, data["user_id"], data["user_group_name"]) assert result[1]["status"] == "Invalid owner url" # test with invalid group name - result = UserRepo.add_org_repos(session, url, data["user_id"], "Invalid group name") + result = UserRepo.add_github_org_repos(session, url, data["user_id"], "Invalid group name") assert result[1]["status"] == "Invalid group name" with test_db_engine.connect() as connection: @@ -305,7 +305,7 @@ def test_add_frontend_org_with_valid_org(test_db_engine): with GithubTaskSession(logger, test_db_engine) as session: url = "https://github.com/{}/".format(data["org_name"]) - result = UserRepo.add_org_repos(session, url, data["user_id"], data["user_group_name"]) + result = UserRepo.add_github_org_repos(session, url, data["user_id"], data["user_group_name"]) assert result[1]["status"] == "Org repos added" with test_db_engine.connect() as connection: From ca7702904de7bb819b3021973bcbdd55a24b62cc Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:19:47 -0600 Subject: [PATCH 3/9] Update gitlab api key handler and gitlab random key auth to use gitlab --- augur/tasks/gitlab/gitlab_api_key_handler.py | 25 +++++++------------- augur/tasks/gitlab/gitlab_random_key_auth.py | 3 +-- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index e59a3620ef..fabc04543f 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -57,9 +57,7 @@ def get_config_key(self) -> str: Returns: Github API key from config table """ - - # TODO: Change to get the gitlab api key - return self.config.get_value("Keys", "github_api_key") + return self.config.get_value("Keys", "gitlab_api_key") def get_api_keys_from_database(self) -> List[str]: """Retieves all github api keys from database @@ -75,8 +73,7 @@ def get_api_keys_from_database(self) -> List[str]: select = WorkerOauth.access_token # randomizing the order at db time #select.order_by(func.random()) - # TODO: Change to get gitlab api keys - where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github'] + where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'gitlab'] return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()] #return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()] @@ -150,9 +147,8 @@ def get_api_keys(self) -> List[str]: return valid_keys - # TODO: Change to use gitlab rate limit api def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: - """Determines if a Github API is bad + """Determines if a Gitlab API key is bad Args: client: makes the http requests @@ -162,17 +158,12 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: True if key is bad. False if the key is good """ - # this endpoint allows us to check the rate limit, but it does not use one of our 5000 requests url = "https://api.github.com/rate_limit" - headers = {'Authorization': f'token {oauth_key}'} - - data = client.request(method="GET", url=url, headers=headers, timeout=180).json() - - try: - if data["message"] == "Bad credentials": - return True - except KeyError: - pass + headers = {'Authorization': f'Bearer {oauth_key}'} + response = client.request(method="GET", url=url, headers=headers, timeout=180) + if response.status_code == 401: + return True + return False \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py index 9194a94fe8..7543f5a310 100644 --- a/augur/tasks/gitlab/gitlab_random_key_auth.py +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -21,8 +21,7 @@ def __init__(self, session: DatabaseSession, logger): if not github_api_keys: print("Failed to find github api keys. This is usually because your key has expired") - # TODO: Change to set key headers how gitlab expects header_name = "Authorization" - key_format = "token {0}" + key_format = "Bearer {0}" super().__init__(github_api_keys, header_name, session.logger, key_format) \ No newline at end of file From 63b6350fe1b32300c70bf7260bf885d4591560ec Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:29:34 -0600 Subject: [PATCH 4/9] Change is_valid_gitlab_repo to use gitlab api --- augur/application/db/models/augur_data.py | 44 +++++++++++------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9a2f14e8bb..a90a2254f4 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -930,49 +930,45 @@ def is_valid_github_repo(gh_session, url: str) -> bool: return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} - # TODO: Change to use gitlab api @staticmethod def is_valid_gitlab_repo(gl_session, url: str) -> bool: - """Determine whether repo url is valid. + """Determine whether a GitLab repo URL is valid. Args: - url: repo_url + gl_session: GitLab session object with API key + url: Repository URL - Returns - True if repo url is valid and False if not + Returns: + True if repo URL is valid, False otherwise """ - from augur.tasks.github.util.github_paginator import hit_api - - REPO_ENDPOINT = "https://api.github.com/repos/{}/{}" - - if not gl_session.oauths.list_of_keys: - return False, {"status": "No valid github api keys to retrieve data with"} + REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/{}" owner, repo = Repo.parse_gitlab_repo_url(url) if not owner or not repo: - return False, {"status":"Invalid repo url"} + return False, {"status": "Invalid repo URL"} - url = REPO_ENDPOINT.format(owner, repo) + # Encode namespace and project name for the API request + project_identifier = f"{owner}%2F{repo}" + url = REPO_ENDPOINT.format(project_identifier) attempts = 0 while attempts < 10: - result = hit_api(gl_session.oauths, url, logger) + response = gl_session.get(url) - # if result is None try again - if not result: - attempts+=1 + if response.status_code != 200: + attempts += 1 continue - data = result.json() - # if there was an error return False - if "message" in data.keys(): + if response.status_code == 404: + return False, {"status": "Invalid repo"} - if data["message"] == "Not Found": - return False, {"status": "Invalid repo"} + if response.status_code != 200: + return False, {"status": f"GitLab Error: {response.json().get('message', 'Unknown error')}"} - return False, {"status": f"Gitlab Error: {data['message']}"} + return True, {"status": "Valid repo"} + + return False, {"status": "Failed to validate repo after multiple attempts"} - return True, {"status": "Valid repo", "repo_type": data["owner"]["type"]} @staticmethod def parse_github_repo_url(url: str) -> tuple: From 8446c04e81a7a2ef9da38efa032ddb316337c63b Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:32:39 -0600 Subject: [PATCH 5/9] Clean up method --- augur/application/db/models/augur_data.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index a90a2254f4..47eeffcdca 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -941,6 +941,8 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool: Returns: True if repo URL is valid, False otherwise """ + from augur.tasks.github.util.github_paginator import hit_api + REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/{}" owner, repo = Repo.parse_gitlab_repo_url(url) @@ -953,19 +955,15 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool: attempts = 0 while attempts < 10: - response = gl_session.get(url) - - if response.status_code != 200: - attempts += 1 - continue + response = hit_api(gl_session.oauths, url, logger) if response.status_code == 404: return False, {"status": "Invalid repo"} - if response.status_code != 200: - return False, {"status": f"GitLab Error: {response.json().get('message', 'Unknown error')}"} + if response.status_code == 200: + return True, {"status": "Valid repo"} - return True, {"status": "Valid repo"} + attempts += 1 return False, {"status": "Failed to validate repo after multiple attempts"} @@ -988,7 +986,6 @@ def parse_github_repo_url(url: str) -> tuple: capturing_groups = result.groups() - owner = capturing_groups[0] repo = capturing_groups[1] From 9a0fb0845ea9f88aabcc0e7265e7cd4e4d918208 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:37:22 -0600 Subject: [PATCH 6/9] Renaming --- augur/tasks/gitlab/gitlab_random_key_auth.py | 9 ++++----- augur/tasks/gitlab/gitlab_task_session.py | 3 +-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py index 7543f5a310..86ad64b056 100644 --- a/augur/tasks/gitlab/gitlab_random_key_auth.py +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -14,14 +14,13 @@ def __init__(self, session: DatabaseSession, logger): """Creates a GitlabRandomKeyAuth object and initializes the RandomKeyAuth parent class""" - # gets the github api keys from the database via the GithubApiKeyHandler - github_api_keys = GitlabApiKeyHandler(session).keys - #github_api_keys = random.sample(github_api_keys, len(github_api_keys)) + # gets the gitlab api keys from the database via the GitlabApiKeyHandler + gitlab_api_keys = GitlabApiKeyHandler(session).keys - if not github_api_keys: + if not gitlab_api_keys: print("Failed to find github api keys. This is usually because your key has expired") header_name = "Authorization" key_format = "Bearer {0}" - super().__init__(github_api_keys, header_name, session.logger, key_format) \ No newline at end of file + super().__init__(gitlab_api_keys, header_name, session.logger, key_format) \ No newline at end of file diff --git a/augur/tasks/gitlab/gitlab_task_session.py b/augur/tasks/gitlab/gitlab_task_session.py index a7a68ec6b5..1871e46c50 100644 --- a/augur/tasks/gitlab/gitlab_task_session.py +++ b/augur/tasks/gitlab/gitlab_task_session.py @@ -23,8 +23,7 @@ def __exit__(self, exception_type, exception_value, exception_traceback): self.augur_db.close() - -class GithubTaskSession(DatabaseSession): +class GitlabTaskSession(DatabaseSession): """ORM session used in gitlab tasks. This class adds the platform_id and the gitlab key authentication class, to the already existing DatabaseSession so there is a central location to access From dccb8bc9989e92d05b8ae51401c566a63efd00fa Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 17:42:56 -0600 Subject: [PATCH 7/9] Fix insert logic to work for gitlab --- augur/application/db/models/augur_data.py | 4 ++-- augur/application/db/models/augur_operations.py | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 47eeffcdca..4f2bfec5ac 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -1035,7 +1035,7 @@ def parse_github_org_url(url): return result.groups()[0] @staticmethod - def insert_githlab_repo(session, url: str, repo_group_id: int, tool_source, repo_type): + def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source): """Add a repo to the repo table. Args: @@ -1066,7 +1066,7 @@ def insert_githlab_repo(session, url: str, repo_group_id: int, tool_source, repo "repo_git": url, "repo_path": f"gitlab.com/{owner}/", "repo_name": repo, - "repo_type": repo_type, + "repo_type": None, "tool_source": tool_source, "tool_version": "1.0", "data_source": "Git" diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 37f0e2f420..a0b57ca4ee 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -784,7 +784,7 @@ def insert(session, repo_id: int, group_id:int = 1) -> bool: return data[0]["group_id"] == group_id and data[0]["repo_id"] == repo_id @staticmethod - def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_type=None, repo_group_id=None) -> dict: + def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, group_id=None, from_org_list=False, repo_group_id=None) -> dict: """Add repo to the user repo table Args: @@ -807,9 +807,6 @@ def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, grou if not group_name and not group_id: return False, {"status": "Need group name or group id to add a repo"} - if from_org_list and not repo_type: - return False, {"status": "Repo type must be passed if the repo is from an organization's list of repos"} - if group_id is None: group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name) @@ -821,8 +818,6 @@ def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, grou if not result[0]: return False, {"status": result[1]["status"], "repo_url": url} - repo_type = result[1]["repo_type"] - # if no repo_group_id is passed then assign the repo to the frontend repo group if repo_group_id is None: @@ -833,7 +828,7 @@ def add_gitlab_repo(session, url: List[str], user_id: int, group_name=None, grou repo_group_id = frontend_repo_group.repo_group_id - repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend", repo_type) + repo_id = Repo.insert_gitlab_repo(session, url, repo_group_id, "Frontend") if not repo_id: return False, {"status": "Repo insertion failed", "repo_url": url} From ec0cfff791745ea9d02e991125cc54c914359c21 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 19:02:37 -0600 Subject: [PATCH 8/9] Successfully insert gitlab repo --- augur/api/view/api.py | 13 ++++++++++++- augur/application/db/models/augur_data.py | 4 ++-- augur/application/db/models/augur_operations.py | 4 ++-- augur/tasks/gitlab/gitlab_api_key_handler.py | 2 +- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index 287b079436..598c0cdb6d 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -102,7 +102,18 @@ def av_add_user_repo(): if rg_obj: # add the orgs repos to the group add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id) - + + # matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo} + elif Repo.parse_gitlab_repo_url(url)[0]: + + org_name, repo_name = Repo.parse_github_repo_url(url) + repo_git = f"https://gitlab.com/{org_name}/{repo_name}" + + # TODO: gitlab ensure the whole repo git is inserted so it can be found here + repo_obj = Repo.get_by_repo_git(session, repo_git) + if repo_obj: + add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id) + else: invalid_urls.append(url) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 4f2bfec5ac..cfbcdca1d7 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -943,7 +943,7 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool: """ from augur.tasks.github.util.github_paginator import hit_api - REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/{}" + REPO_ENDPOINT = "https://gitlab.com/api/v4/projects/{}/" owner, repo = Repo.parse_gitlab_repo_url(url) if not owner or not repo: @@ -1046,7 +1046,7 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source): If repo row exists then it will update the repo_group_id if param repo_group_id is not a default. If it does not exist is will simply insert the repo. """ - if not isinstance(url, str) or not isinstance(repo_group_id, int) or not isinstance(tool_source, str) or not isinstance(repo_type, str): + if not isinstance(url, str) or not isinstance(repo_group_id, int) or not isinstance(tool_source, str): return None if not RepoGroup.is_valid_repo_group_id(session, repo_group_id): diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index a0b57ca4ee..8da2b397fa 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -463,10 +463,10 @@ def add_github_repo(self, group_name, repo_url): def add_gitlab_repo(self, group_name, repo_url): - from augur.tasks.gitlab.gitlab_task_session import GitLabTaskSession + from augur.tasks.gitlab.gitlab_task_session import GitlabTaskSession from augur.tasks.github.util.github_api_key_handler import NoValidKeysError try: - with GitLabTaskSession(logger) as session: + with GitlabTaskSession(logger) as session: result = UserRepo.add_gitlab_repo(session, repo_url, self.user_id, group_name) except NoValidKeysError: return False, {"status": "No valid keys"} diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index fabc04543f..7e6b359f5e 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -158,7 +158,7 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: True if key is bad. False if the key is good """ - url = "https://api.github.com/rate_limit" + url = "https://gitlab.com/api/v4/user" headers = {'Authorization': f'Bearer {oauth_key}'} From 291feae14238a43972d166002f13c5bc8c6b5793 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 28 Nov 2023 19:31:43 -0600 Subject: [PATCH 9/9] Ensure that gitlab repos aren't being started --- augur/tasks/util/collection_util.py | 39 +++++++++++++++-------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 4d5b663a20..1d5ddd79c5 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -587,27 +587,28 @@ def send_messages(self): for col_hook in self.collection_hooks: self.logger.info(f"Starting collection on {len(col_hook.repo_list)} {col_hook.name} repos") - + for repo_git in col_hook.repo_list: - #repo = self.session.query(Repo).filter(Repo.repo_git == repo_git).one() - #repo_id = repo.repo_id - - augur_collection_sequence = [] - for job in col_hook.phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name + repo = self.session.query(Repo).filter(Repo.repo_git == repo_git).one() + if "github" in repo.repo_git: + augur_collection_sequence = [] + for job in col_hook.phases: + #Add the phase to the sequence in order as a celery task. + #The preliminary task creates the larger task chain + augur_collection_sequence.append(job(repo_git)) + + #augur_collection_sequence.append(core_task_success_util.si(repo_git)) + #Link all phases in a chain and send to celery + augur_collection_chain = chain(*augur_collection_sequence) + task_id = augur_collection_chain.apply_async().task_id + + self.logger.info(f"Setting repo {col_hook.name} status to collecting for repo: {repo_git}") + + #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated + yield repo_git, task_id, col_hook.name + else: + print(f"Unable to start collection for {repo.repo_git}") #def start_block_of_repos(logger,session,repo_git_identifiers,phases,repos_type,hook="core"): #