Skip to content

Commit

Permalink
Merge pull request #2658 from chaoss/gitlab-changes
Browse files Browse the repository at this point in the history
Add gitlab merge request and issue collection
  • Loading branch information
sgoggins authored Dec 4, 2023
2 parents 57ea05f + f10fb29 commit 979e501
Show file tree
Hide file tree
Showing 12 changed files with 864 additions and 21 deletions.
9 changes: 6 additions & 3 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,12 @@ def start(disable_collection, development, port):
logger.info("Deleting old task schedule")
os.remove("celerybeat-schedule.db")

celery_beat_process = None
celery_command = "celery -A augur.tasks.init.celery_app.celery_app beat -l debug"
celery_beat_process = subprocess.Popen(celery_command.split(" "))
with DatabaseSession(logger) as db_session:
config = AugurConfig(logger, db_session)
log_level = config.get_value("Logging", "log_level")
celery_beat_process = None
celery_command = f"celery -A augur.tasks.init.celery_app.celery_app beat -l {log_level.lower()}"
celery_beat_process = subprocess.Popen(celery_command.split(" "))

if not disable_collection:

Expand Down
79 changes: 76 additions & 3 deletions augur/application/db/data_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo
def extract_needed_pr_data(pr, repo_id, tool_source, tool_version):


pr_dict = {
pr = {
'repo_id': repo_id,
'pr_url': pr['url'],
# 1-22-2022 inconsistent casting; sometimes int, sometimes float in bulk_insert
Expand Down Expand Up @@ -367,7 +367,7 @@ def extract_needed_pr_data(pr, repo_id, tool_source, tool_version):
'data_source': 'GitHub API'
}

return pr_dict
return pr

def extract_needed_issue_data(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str):

Expand Down Expand Up @@ -513,8 +513,81 @@ def extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id,

return review_row

def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, tool_version):


pr_dict = {
'repo_id': repo_id,
'pr_url': pr['web_url'],
'pr_src_id': pr['id'],
'pr_src_node_id': None,
'pr_html_url': pr['web_url'],
'pr_diff_url': None,
'pr_patch_url': None,
'pr_issue_url': None,
'pr_augur_issue_id': None,
'pr_src_number': pr['iid'],
'pr_src_state': pr['state'],
'pr_src_locked': pr['discussion_locked'],
'pr_src_title': pr['title'],
# TODO: Add contributor logic for gitlab
'pr_augur_contributor_id': None,
'pr_body': pr['description'],
'pr_created_at': pr['created_at'],
'pr_updated_at': pr['updated_at'],
'pr_closed_at': pr['closed_at'],
'pr_merged_at': pr['merged_at'],
'pr_merge_commit_sha': pr['merge_commit_sha'],
'pr_teams': None,
'pr_milestone': pr['milestone'].get('title') if pr['milestone'] else None,
'pr_commits_url': None,
'pr_review_comments_url': None,
'pr_review_comment_url': None,
'pr_comments_url': None,
'pr_statuses_url': None,
'pr_meta_head_id': None,
'pr_meta_base_id': None,
'pr_src_issue_url': None,
'pr_src_comments_url': None,
'pr_src_review_comments_url': None,
'pr_src_commits_url': None,
'pr_src_statuses_url': None,
'pr_src_author_association': None,
'tool_source': tool_source,
'tool_version': tool_version,
'data_source': 'Gitlab API'
}

return pr_dict


def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str):

issue_dict = {
"repo_id": repo_id,
"reporter_id": None,
"pull_request": None,
"pull_request_id": None,
"created_at": issue['created_at'],
"issue_title": issue['title'],
"issue_body": issue['description'] if 'description' in issue else None,
"comment_count": issue['user_notes_count'],
"updated_at": issue['updated_at'],
"closed_at": issue['closed_at'],
"repository_url": issue['_links']['project'],
"issue_url": issue['_links']['self'],
"labels_url": None,
"comments_url": issue['_links']['notes'],
"events_url": None,
"html_url": issue['_links']['self'],
"issue_state": issue['state'],
"issue_node_id": None,
"gh_issue_id": issue['id'],
"gh_issue_number": issue['iid'],
"gh_user_id": issue['author']['id'],
"tool_source": tool_source,
"tool_version": tool_version,
"data_source": data_source
}

return issue_dict

17 changes: 15 additions & 2 deletions augur/application/db/models/augur_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,10 +1232,22 @@ def insert(session, repo_id):
repo_git = repo.repo_git

collection_status_unique = ["repo_id"]
pr_issue_count = 0
github_weight = 0
if "github" in repo_git:

try:
pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git)
#session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}")
github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None)
except Exception as e:
pr_issue_count = None
github_weight = None
session.logger.error(
''.join(traceback.format_exception(None, e, e.__traceback__)))

try:
pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git)
#session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}")
pr_issue_count = 0
github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None)
except Exception as e:
pr_issue_count = None
Expand All @@ -1251,6 +1263,7 @@ def insert(session, repo_id):
"secondary_weight": github_weight,
"ml_weight": github_weight
}


result = session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False)

Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/github/pull_requests/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None:

return all_data


def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db):

tool_source = "Pr Task"
Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/github/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dic
try:
return response.json()
except json.decoder.JSONDecodeError as e:
logger.warning(f"invalid return from GitHub. Response was: {response.text}. Exception: {e}")
logger.warning(f"invalid return. Response was: {response.text}. Exception: {e}")
return json.loads(json.dumps(response.text))

def get_repo_weight_by_issue(logger,repo_git):
Expand Down
12 changes: 6 additions & 6 deletions augur/tasks/gitlab/gitlab_api_key_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class GitlabApiKeyHandler():
Attributes:
session (DatabaseSession): Database connection
logger (logging.Logger): Handles all logs
oauth_redis_key (str): The key where the github api keys are cached in redis
oauth_redis_key (str): The key where the gitlab api keys are cached in redis
redis_key_list (RedisList): Acts like a python list, and interacts directly with the redis cache
config_key (str): The api key that is stored in the users config table
key: (List[str]): List of keys retrieve from database or cache
Expand All @@ -46,21 +46,21 @@ def get_random_key(self):
"""Retrieves a random key from the list of keys
Returns:
A random github api key
A random gitlab api key
"""

return random.choice(self.keys)

def get_config_key(self) -> str:
"""Retrieves the users github api key from their config table
"""Retrieves the users gitlab api key from their config table
Returns:
Github API key from config table
"""
return self.config.get_value("Keys", "gitlab_api_key")

def get_api_keys_from_database(self) -> List[str]:
"""Retieves all github api keys from database
"""Retieves all gitlab api keys from database
Note:
It retrieves all the keys from the database except the one defined in the users config
Expand Down Expand Up @@ -131,7 +131,7 @@ def get_api_keys(self) -> List[str]:
self.redis_key_list.extend(valid_keys)

if not valid_keys:
raise NoValidKeysError("No valid github api keys found in the config or worker oauth table")
raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table")


# shuffling the keys so not all processes get the same keys in the same order
Expand All @@ -152,7 +152,7 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool:
Args:
client: makes the http requests
oauth_key: github api key that is being tested
oauth_key: gitlab api key that is being tested
Returns:
True if key is bad. False if the key is good
Expand Down
Loading

0 comments on commit 979e501

Please sign in to comment.