From f0909635e08d1786e1e74d591190a6c1185c9740 Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Fri, 29 Apr 2022 00:17:54 +0530 Subject: [PATCH 1/5] feat(connector): add trino connector support required some hacks in data loader and KPI validation Co-authored-by: Rajdeep Sharma --- chaos_genius/connectors/__init__.py | 2 ++ chaos_genius/core/utils/data_loader.py | 10 +++++----- chaos_genius/core/utils/kpi_validation.py | 2 +- .../third_party/integration_server_config.py | 9 +++++++++ chaos_genius/utils/metadata_api_config.py | 12 ++++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/chaos_genius/connectors/__init__.py b/chaos_genius/connectors/__init__.py index 19682fdb2..fadcf0847 100644 --- a/chaos_genius/connectors/__init__.py +++ b/chaos_genius/connectors/__init__.py @@ -4,6 +4,7 @@ from chaos_genius.connectors.snowflake import SnowflakeDb from chaos_genius.connectors.redshift import Redshift from chaos_genius.connectors.druid import Druid +from chaos_genius.connectors.trino import Trino DB_CLASS_MAPPER = { @@ -13,6 +14,7 @@ "Snowflake": SnowflakeDb, "Redshift": Redshift, "Druid": Druid, + "Trino": Trino, } diff --git a/chaos_genius/core/utils/data_loader.py b/chaos_genius/core/utils/data_loader.py index 115ac8bdd..98c6a30ee 100644 --- a/chaos_genius/core/utils/data_loader.py +++ b/chaos_genius/core/utils/data_loader.py @@ -105,14 +105,14 @@ def _convert_date_to_string(self, date: date, offset: str): # we shouldn't need to take offset as a string, but rather # take in a pytz timezone and skip using strings. date = date.strftime("%Y-%m-%d") - date += f"T00:00:00{offset}" + date += f" 00:00:00{offset}" if not self.kpi_info.get("timezone_aware"): date = ( - pd.Timestamp(datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z")) + pd.Timestamp(datetime.strptime(date, "%Y-%m-%d %H:%M:%S%z")) .tz_convert(self.connection_info["database_timezone"]) .tz_localize(None) # TODO: We should also use date.isoformat() here - .strftime("%Y-%m-%dT%H:%M:%S") + .strftime("%Y-%m-%d %H:%M:%S") ) return date @@ -130,10 +130,10 @@ def _build_date_filter(self) -> List[str]: filters = [] if self.start_date is not None: start_date_str = self._convert_date_to_string(self.start_date, tz_offset_string) - filters.append(f"{dt_col_str} >= '{start_date_str}'") + filters.append(f"{dt_col_str} >= timestamp '{start_date_str}'") if self.end_date is not None: end_date_str = self._convert_date_to_string(self.end_date, tz_offset_string) - filters.append(f"{dt_col_str} < '{end_date_str}'") + filters.append(f"{dt_col_str} < timestamp '{end_date_str}'") return filters diff --git a/chaos_genius/core/utils/kpi_validation.py b/chaos_genius/core/utils/kpi_validation.py index 518cdce98..f66033a92 100644 --- a/chaos_genius/core/utils/kpi_validation.py +++ b/chaos_genius/core/utils/kpi_validation.py @@ -40,7 +40,7 @@ def validate_kpi(kpi_info: Dict[str, Any], check_tz_aware: bool = False) -> Tupl connection_info = DataSource.get_by_id( kpi_info["data_source"] ).as_dict - supports_date_string_parsing = connection_info["name"] == "Druid" + supports_date_string_parsing = connection_info["connection_type"] in {"Druid", "Trino"} status, message = _validate_kpi_from_df( df, diff --git a/chaos_genius/third_party/integration_server_config.py b/chaos_genius/third_party/integration_server_config.py index bdc003e02..b11e839bd 100644 --- a/chaos_genius/third_party/integration_server_config.py +++ b/chaos_genius/third_party/integration_server_config.py @@ -64,6 +64,7 @@ "e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2": False, # Snowflake "e87ffa8e-a3b5-f69c-9076-6011339de1f6": False, # Redshift "5B45DB62-303C-4E70-92DA-419D3CDBD506": False, # Druid + "ea6b0872-b25b-4591-8829-7e46d3276a5b": False, # Trino # "29b409d9-30a5-4cc8-ad50-886eb846fea3", # Quickbooks } @@ -116,6 +117,14 @@ "username": "username", "password": "password", "db_type": "druid" + }, + "ea6b0872-b25b-4591-8829-7e46d3276a5b": { + "host": "host", + "port": "port", + "username": "username", + "password": "password", + "catalog": "catalog", + "db_type": "trino" } } diff --git a/chaos_genius/utils/metadata_api_config.py b/chaos_genius/utils/metadata_api_config.py index e53d000ce..6838d9d0d 100644 --- a/chaos_genius/utils/metadata_api_config.py +++ b/chaos_genius/utils/metadata_api_config.py @@ -4,7 +4,8 @@ "Redshift": True, "BigQuery": False, "Snowflake": True, - "Druid": False + "Druid": False, + "Trino": True, } TABLE_VIEW_MATERIALIZED_VIEW_AVAILABILITY = { @@ -49,6 +50,13 @@ "materialized_views": True, "supported_aggregations": ["sum", "count"], "supports_multidim_dd": False + }, + "Trino": { + "tables": True, + "views": True, + "materialized_views": True, + "supported_aggregations": ["mean", "sum", "count"], + "supports_multidim_dd": True, } } @@ -66,4 +74,4 @@ if conf["supports_multidim_dd"] ] -NON_THIRD_PARTY_DATASOURCES = TABLE_VIEW_MATERIALIZED_VIEW_AVAILABILITY.keys() \ No newline at end of file +NON_THIRD_PARTY_DATASOURCES = TABLE_VIEW_MATERIALIZED_VIEW_AVAILABILITY.keys() From a82a4649adc607a1c9d8218b080a44aa2dc929a6 Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Fri, 29 Apr 2022 00:19:34 +0530 Subject: [PATCH 2/5] feat(connectors): add trino in data_connection_config --- .../third_party/data_connection_config.json | 783 +++++++++++++++++- 1 file changed, 782 insertions(+), 1 deletion(-) diff --git a/chaos_genius/third_party/data_connection_config.json b/chaos_genius/third_party/data_connection_config.json index 7bc65dd28..4b4cbeed1 100644 --- a/chaos_genius/third_party/data_connection_config.json +++ b/chaos_genius/third_party/data_connection_config.json @@ -1 +1,782 @@ -[{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":true,"properties":{"conversion_window_days":{"default":14,"description":"Define the historical replication lookback window in days","examples":[14],"maximum":1095,"minimum":0,"title":"Conversion Window","type":"integer"},"credentials":{"properties":{"client_id":{"description":"Google client id. More instruction on how to find this value in our docs","title":"Client Id","type":"string"},"client_secret":{"airbyte_secret":true,"description":"Google client secret. More instruction on how to find this value in our docs","title":"Client Secret","type":"string"},"developer_token":{"airbyte_secret":true,"description":"Developer token granted by Google to use their APIs. More instruction on how to find this value in our docs","title":"Developer Token","type":"string"},"refresh_token":{"airbyte_secret":true,"description":"Refresh token generated using developer_token, oauth_client_id, and oauth_client_secret. More instruction on how to find this value in our docs","title":"Refresh Token","type":"string"}},"required":["developer_token","client_id","client_secret","refresh_token"],"title":"Google Credentials","type":"object"},"customer_id":{"description":"Customer id must be specified as a 10-digit number without dashes. More instruction on how to find this value in our docs","title":"Customer Id","type":"string"},"login_customer_id":{"description":"If your access to the customer account is through a manager account, this field is required and must be set to the customer ID of the manager account (10-digit number without dashes). More information about this field you can see here","title":"Login Customer ID","type":"string"},"start_date":{"description":"UTC date and time in the format 2017-01-25. Any data before this date will not be replicated.","examples":["2017-01-25"],"pattern":"^[0-9]{4}-[0-9]{2}-[0-9]{2}$","title":"Start Date","type":"string"}},"required":["credentials","start_date","customer_id"],"title":"Google Ads Spec","type":"object"},"dockerImageTag":"0.1.8","dockerRepository":"airbyte/source-google-ads","documentationUrl":"https://hub.docker.com/r/airbyte/source-google-ads","icon":"\r\n\r\n\r\n\r\n\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\r\n\r\n","isThirdParty":true,"name":"Google Ads","sourceDefinitionId":"253487c0-2246-43ba-a21f-5116b20a2c50"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"credentials_json":{"airbyte_secret":true,"description":"The contents of the JSON service account key. Check out the docs if you need help generating this key.","title":"Credentials JSON","type":"string"},"custom_reports":{"description":"A JSON array describing the custom reports you want to sync from GA. Check out the docs to get more information about this field.","title":"Custom Reports","type":"string"},"start_date":{"description":"A date in the format YYYY-MM-DDT00:00:00Z.","examples":["2020-06-01T00:00:00Z"],"title":"Start Date","type":"string"},"view_id":{"description":"The ID for the Google Analytics View you want to fetch data from. This can be found from the Google Analytics Account Explorer.","title":"View ID","type":"string"}},"required":["credentials_json","view_id","start_date"],"title":"Chaos Genius Google Analytics Source Spec","type":"object"},"dockerImageTag":"0.2.6","dockerRepository":"airbyte/source-googleanalytics-singer","documentationUrl":"https://hub.docker.com/r/airbyte/source-googleanalytics-singer","icon":"\n\n\n\n\t\n\t\t\n\t\n\t\n\t\t\n\t\n\t\n\t\t\n\t\n\n\n","isThirdParty":true,"name":"Google Analytics","sourceDefinitionId":"39f092a6-8c87-4f6f-a8d9-5cef45b7dbe1"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"database":{"description":"Name of the database.","order":2,"type":"string"},"host":{"description":"Hostname of the database.","order":0,"type":"string"},"jdbc_url_params":{"description":"Additional properties to pass to the jdbc url string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)","order":5,"type":"string"},"password":{"airbyte_secret":true,"description":"Password associated with the username.","order":4,"type":"string"},"port":{"default":3306,"description":"Port of the database.","examples":["3306"],"maximum":65536,"minimum":0,"order":1,"type":"integer"},"replication_method":{"default":"STANDARD","description":"Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses the Binlog to detect inserts, updates, and deletes. This needs to be configured on the source database itself.","enum":["STANDARD","CDC"],"order":6,"title":"Replication Method","type":"string"},"username":{"description":"Username to use to access the database.","order":3,"type":"string"}},"required":["host","port","database","username","replication_method"],"title":"MySql Source Spec","type":"object"},"dockerImageTag":"0.3.9","dockerRepository":"airbyte/source-mysql","documentationUrl":"https://docs.chaosgenius.io/docs/Data_Sources_Catalog/mysql","icon":"\n\n","isThirdParty":false,"name":"MySQL","sourceDefinitionId":"435bb9a5-7887-4809-aa58-28c27df0d7ad"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"accounts":{"description":"Account selection strategy.","oneOf":[{"additionalProperties":false,"description":"Fetch data for all available accounts.","properties":{"selection_strategy":{"default":"all","enum":["all"],"type":"string"}},"required":["selection_strategy"],"title":"All accounts assigned to your user"},{"additionalProperties":false,"description":"Fetch data for subset of account ids.","properties":{"ids":{"description":"List of accounts from which data will be fetched.","items":{"type":"string"},"minItems":1,"type":"array","uniqueItems":true},"selection_strategy":{"default":"subset","enum":["subset"],"type":"string"}},"required":["ids","selection_strategy"],"title":"Subset of your accounts"}],"title":"Accounts","type":"object"},"client_id":{"airbyte_secret":true,"description":"ID of your Microsoft Advertising client application.","type":"string"},"client_secret":{"airbyte_secret":true,"description":"Secret of your Microsoft Advertising client application.","type":"string"},"customer_id":{"description":"User's customer ID.","type":"string"},"developer_token":{"airbyte_secret":true,"description":"Developer token associated with user.","type":"string"},"refresh_token":{"airbyte_secret":true,"description":"The long-lived Refresh token received via grant_type=refresh_token request.","type":"string"},"user_id":{"description":"Unique user identifier.","type":"string"}},"required":["accounts","client_id","client_secret","customer_id","developer_token","refresh_token","user_id"],"title":"Bing Ads Spec","type":"object"},"dockerImageTag":"0.1.0","dockerRepository":"airbyte/source-bing-ads","documentationUrl":"https://docs.chaosgenius.io/docs/Data_Sources_Catalog/Bing%20Ads","icon":"","isThirdParty":true,"name":"Bing Ads","sourceDefinitionId":"47f25999-dd5e-4636-8c39-e7cea2453331"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"credentials_json":{"airbyte_secret":true,"description":"The contents of the JSON service account key. See the docs for more information on how to generate this key.","type":"string"},"spreadsheet_id":{"description":"The ID of the spreadsheet to be replicated.","type":"string"}},"required":["spreadsheet_id","credentials_json"],"title":"Stripe Source Spec","type":"object"},"dockerImageTag":"0.2.3","dockerRepository":"airbyte/source-google-sheets","documentationUrl":"https://hub.docker.com/repository/docker/airbyte/source-google-sheets","icon":"\n\n\n\n\t\n\t\n\t\n\n\n","isThirdParty":true,"name":"Google Sheets","sourceDefinitionId":"71607ba1-c0ac-4799-8049-7f4b90dd50f7"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"api_password":{"airbyte_secret":true,"description":"The API PASSWORD for a private application in Shopify shop.","type":"string"},"shop":{"description":"The name of the shopify store. For https://EXAMPLE.myshopify.com, the shop name is 'EXAMPLE'.","type":"string"},"start_date":{"description":"The date you would like to replicate data. Format: YYYY-MM-DD.","examples":["2021-01-01"],"pattern":"^[0-9]{4}-[0-9]{2}-[0-9]{2}$","type":"string"}},"required":["shop","start_date","api_password"],"title":"Shopify Source CDK Specifications","type":"object"},"dockerImageTag":"0.1.9","dockerRepository":"airbyte/source-shopify","documentationUrl":"https://docs.chaosgenius.io/docs/Data_Sources_Catalog/Shopify","icon":"","isThirdParty":true,"name":"Shopify","sourceDefinitionId":"9da77001-af33-4bcd-be46-6252bf9342b9"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"credentials_json":{"airbyte_secret":true,"description":"The contents of the JSON service account key. Check out the docs if you need help generating this key.","title":"Credentials JSON","type":"string"},"dataset_id":{"description":"The BigQuery Dataset ID to look for tables to replicate from.","title":"Default Dataset ID","type":"string"},"project_id":{"description":"The GCP project ID for the project containing the target BigQuery dataset.","title":"Project ID","type":"string"}},"required":["project_id","credentials_json"],"title":"BigQuery Source Spec","type":"object"},"dockerImageTag":"0.1.1","dockerRepository":"airbyte/source-bigquery","documentationUrl":"https://docs.chaosgenius.io/docs/Data_Sources_Catalog/bigquery","icon":"","isThirdParty":false,"name":"BigQuery","sourceDefinitionId":"bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"database":{"description":"Name of the database.","order":2,"title":"DB Name","type":"string"},"host":{"description":"Hostname of the database.","order":0,"title":"Host","type":"string"},"password":{"airbyte_secret":true,"description":"Password associated with the username.","order":4,"title":"Password","type":"string"},"port":{"default":5432,"description":"Port of the database.","examples":["5432"],"maximum":65536,"minimum":0,"order":1,"title":"Port","type":"integer"},"replication_method":{"description":"Replication method to use for extracting data from the database.","oneOf":[{"additionalProperties":false,"description":"Standard replication requires no setup on the DB side but will not be able to represent deletions incrementally.","properties":{"method":{"default":"Standard","enum":["Standard"],"order":0,"type":"string"}},"required":["method"],"title":"Standard"},{"additionalProperties":false,"description":"Logical replication uses the Postgres write-ahead log (WAL) to detect inserts, updates, and deletes. This needs to be configured on the source database itself. Only available on Postgres 10 and above. Read the Postgres Source docs for more information.","properties":{"method":{"default":"CDC","enum":["CDC"],"order":0,"type":"string"},"publication":{"description":"A Postgres publication used for consuming changes.","order":2,"type":"string"},"replication_slot":{"description":"A pgoutput logical replication slot.","order":1,"type":"string"}},"required":["method","replication_slot","publication"],"title":"Logical Replication (CDC)"}],"order":6,"title":"Replication Method","type":"object"},"ssl":{"default":false,"description":"Encrypt client/server communications for increased security.","order":5,"title":"Connect using SSL","type":"boolean"},"username":{"description":"Username to use to access the database.","order":3,"title":"User","type":"string"}},"required":["host","port","database","username"],"title":"Postgres Source Spec","type":"object"},"dockerImageTag":"0.3.5","dockerRepository":"airbyte/source-postgres","documentationUrl":"https://hub.docker.com/r/airbyte/source-postgres","icon":"\r\n\r\n\r\n\r\n\r\n\t\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n","isThirdParty":false,"name":"Postgres","sourceDefinitionId":"decd338e-5647-4c0b-adf4-da0e75f5a750"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"account_id":{"description":"Your Stripe account ID (starts with 'acct_', find yours here).","type":"string"},"client_secret":{"airbyte_secret":true,"description":"Stripe API key (usually starts with 'sk_live_'; find yours here).","pattern":"^(s|r)k_(live|test)_[a-zA-Z0-9]+$","type":"string"},"start_date":{"description":"UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.","examples":["2017-01-25T00:00:00Z"],"pattern":"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$","type":"string"}},"required":["client_secret","account_id","start_date"],"title":"Stripe Source Spec","type":"object"},"dockerImageTag":"0.1.13","dockerRepository":"airbyte/source-stripe","documentationUrl":"https://hub.docker.com/r/airbyte/source-stripe","icon":"Asset 32Stone Hub","isThirdParty":true,"name":"Stripe","sourceDefinitionId":"e094cb9a-26de-4645-8761-65c0c425d1de"},{"connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","additionalProperties":false,"properties":{"database":{"description":"The database you created for Chaos Genius to access data into.","examples":["CG_DATABASE"],"order":3,"title":"Database","type":"string"},"host":{"description":"Host domain of the snowflake instance (must include the account, region, cloud environment).","examples":["accountname.us-east-2.aws"],"order":0,"title":"Account name","type":"string"},"password":{"airbyte_secret":true,"description":"Password associated with the username.","order":6,"title":"Password","type":"string"},"role":{"description":"The role you created for Chaos Genius to access Snowflake.","examples":["CG_ROLE"],"order":1,"title":"Role","type":"string"},"schema":{"description":"The source Snowflake schema tables.","examples":["CG_SCHEMA"],"order":4,"title":"Schema","type":"string"},"username":{"description":"The username you created to allow Chaos Genius to access the database.","examples":["CG_USER"],"order":5,"title":"Username","type":"string"},"warehouse":{"description":"The warehouse you created for Chaos Genius to access data into.","examples":["CG_WAREHOUSE"],"order":2,"title":"Warehouse","type":"string"}},"required":["host","role","warehouse","database","schema","username","password"],"title":"Snowflake Source Spec","type":"object"},"dockerImageTag":"0.1.0","dockerRepository":"airbyte/source-snowflake","documentationUrl":"https://docs.chaosgenius.io/docs/Data_Sources_Catalog/snowflake","icon":"Logo-SnowflakeCreated with Sketch.","isThirdParty":false,"name":"Snowflake","sourceDefinitionId":"e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2"},{"connectionSpecification":{"properties":{"access_token":{"airbyte_secret":true,"description":"The value of the access token generated. See the docs for more information","title":"Access Token","type":"string"},"account_id":{"description":"The Facebook Ad account ID to use when pulling data from the Facebook Marketing API.","title":"Account Id","type":"string"},"include_deleted":{"default":false,"description":"Include data from deleted campaigns, ads, and adsets.","title":"Include Deleted","type":"boolean"},"insights_days_per_job":{"default":7,"description":"Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.","maximum":30,"minimum":1,"title":"Insights Days Per Job","type":"integer"},"insights_lookback_window":{"default":28,"description":"The attribution window for the actions","maximum":28,"minimum":0,"title":"Insights Lookback Window","type":"integer"},"start_date":{"description":"The date from which you'd like to replicate data for AdCreatives and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated.","examples":["2017-01-25T00:00:00Z"],"format":"date-time","pattern":"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$","title":"Start Date","type":"string"}},"required":["account_id","access_token","start_date"],"title":"Source Facebook Marketing","type":"object"},"dockerImageTag":"0.2.13","dockerRepository":"airbyte/source-facebook-marketing","documentationUrl":"https://hub.docker.com/r/airbyte/source-facebook-marketing","icon":"\nimage/svg+xml\n","isThirdParty":true,"name":"Facebook Marketing","sourceDefinitionId":"e7778cfc-e97c-4458-9ecb-b4f2bba8946c"},{"dockerImageTag":"0.3.2","dockerRepository":"airbyte/source-redshift","documentationUrl":"https://docs.chaosgenius.io/docs/Data_Sources_Catalog/redshift","icon":"","name":"Redshift","sourceDefinitionId":"e87ffa8e-a3b5-f69c-9076-6011339de1f6","connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","title":"Redshift Source Spec","type":"object","required":["host","port","database","username","password"],"additionalProperties":false,"properties":{"host":{"description":"Host Endpoint of the Redshift Cluster (must include the cluster-id, region and end with .redshift.amazonaws.com)","type":"string"},"port":{"description":"Port of the database.","type":"integer","minimum":0,"maximum":65536,"default":5439,"examples":["5439"]},"database":{"description":"Name of the database.","type":"string","examples":["master"]},"username":{"description":"Username to use to access the database.","type":"string"},"password":{"description":"Password associated with the username.","type":"string","airbyte_secret":true}}},"isThirdParty":false},{"connectionSpecification":{"properties":{"host":{"description":"Hostname of the database.","order":0,"title":"Host","type":"string"},"password":{"airbyte_secret":true,"description":"Password associated with the username.","order":4,"title":"Password","type":"string"},"port":{"default":8082,"description":"Port of the database.","examples":["5432"],"maximum":65536,"minimum":0,"order":1,"title":"Port","type":"integer"},"username":{"description":"Username to use to access the database.","order":3,"title":"User","type":"string"}},"required":["host","port"],"title":"Druid Source Spec","type":"object"},"icon":"Apache Druid logo","isThirdParty":false,"name":"Druid","sourceDefinitionId":"5B45DB62-303C-4E70-92DA-419D3CDBD506"}] +[ + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": true, + "properties": { + "conversion_window_days": { + "default": 14, + "description": "Define the historical replication lookback window in days", + "examples": [14], + "maximum": 1095, + "minimum": 0, + "title": "Conversion Window", + "type": "integer" + }, + "credentials": { + "properties": { + "client_id": { + "description": "Google client id. More instruction on how to find this value in our docs", + "title": "Client Id", + "type": "string" + }, + "client_secret": { + "airbyte_secret": true, + "description": "Google client secret. More instruction on how to find this value in our docs", + "title": "Client Secret", + "type": "string" + }, + "developer_token": { + "airbyte_secret": true, + "description": "Developer token granted by Google to use their APIs. More instruction on how to find this value in our docs", + "title": "Developer Token", + "type": "string" + }, + "refresh_token": { + "airbyte_secret": true, + "description": "Refresh token generated using developer_token, oauth_client_id, and oauth_client_secret. More instruction on how to find this value in our docs", + "title": "Refresh Token", + "type": "string" + } + }, + "required": [ + "developer_token", + "client_id", + "client_secret", + "refresh_token" + ], + "title": "Google Credentials", + "type": "object" + }, + "customer_id": { + "description": "Customer id must be specified as a 10-digit number without dashes. More instruction on how to find this value in our docs", + "title": "Customer Id", + "type": "string" + }, + "login_customer_id": { + "description": "If your access to the customer account is through a manager account, this field is required and must be set to the customer ID of the manager account (10-digit number without dashes). More information about this field you can see here", + "title": "Login Customer ID", + "type": "string" + }, + "start_date": { + "description": "UTC date and time in the format 2017-01-25. Any data before this date will not be replicated.", + "examples": ["2017-01-25"], + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "title": "Start Date", + "type": "string" + } + }, + "required": ["credentials", "start_date", "customer_id"], + "title": "Google Ads Spec", + "type": "object" + }, + "dockerImageTag": "0.1.8", + "dockerRepository": "airbyte/source-google-ads", + "documentationUrl": "https://hub.docker.com/r/airbyte/source-google-ads", + "icon": "\r\n\r\n\r\n\r\n\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\r\n\r\n", + "isThirdParty": true, + "name": "Google Ads", + "sourceDefinitionId": "253487c0-2246-43ba-a21f-5116b20a2c50" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "credentials_json": { + "airbyte_secret": true, + "description": "The contents of the JSON service account key. Check out the docs if you need help generating this key.", + "title": "Credentials JSON", + "type": "string" + }, + "custom_reports": { + "description": "A JSON array describing the custom reports you want to sync from GA. Check out the docs to get more information about this field.", + "title": "Custom Reports", + "type": "string" + }, + "start_date": { + "description": "A date in the format YYYY-MM-DDT00:00:00Z.", + "examples": ["2020-06-01T00:00:00Z"], + "title": "Start Date", + "type": "string" + }, + "view_id": { + "description": "The ID for the Google Analytics View you want to fetch data from. This can be found from the Google Analytics Account Explorer.", + "title": "View ID", + "type": "string" + } + }, + "required": ["credentials_json", "view_id", "start_date"], + "title": "Chaos Genius Google Analytics Source Spec", + "type": "object" + }, + "dockerImageTag": "0.2.6", + "dockerRepository": "airbyte/source-googleanalytics-singer", + "documentationUrl": "https://hub.docker.com/r/airbyte/source-googleanalytics-singer", + "icon": "\n\n\n\n\t\n\t\t\n\t\n\t\n\t\t\n\t\n\t\n\t\t\n\t\n\n\n", + "isThirdParty": true, + "name": "Google Analytics", + "sourceDefinitionId": "39f092a6-8c87-4f6f-a8d9-5cef45b7dbe1" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "database": { + "description": "Name of the database.", + "order": 2, + "type": "string" + }, + "host": { + "description": "Hostname of the database.", + "order": 0, + "type": "string" + }, + "jdbc_url_params": { + "description": "Additional properties to pass to the jdbc url string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)", + "order": 5, + "type": "string" + }, + "password": { + "airbyte_secret": true, + "description": "Password associated with the username.", + "order": 4, + "type": "string" + }, + "port": { + "default": 3306, + "description": "Port of the database.", + "examples": ["3306"], + "maximum": 65536, + "minimum": 0, + "order": 1, + "type": "integer" + }, + "replication_method": { + "default": "STANDARD", + "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses the Binlog to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", + "enum": ["STANDARD", "CDC"], + "order": 6, + "title": "Replication Method", + "type": "string" + }, + "username": { + "description": "Username to use to access the database.", + "order": 3, + "type": "string" + } + }, + "required": [ + "host", + "port", + "database", + "username", + "replication_method" + ], + "title": "MySql Source Spec", + "type": "object" + }, + "dockerImageTag": "0.3.9", + "dockerRepository": "airbyte/source-mysql", + "documentationUrl": "https://docs.chaosgenius.io/docs/Data_Sources_Catalog/mysql", + "icon": "\n\n", + "isThirdParty": false, + "name": "MySQL", + "sourceDefinitionId": "435bb9a5-7887-4809-aa58-28c27df0d7ad" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "accounts": { + "description": "Account selection strategy.", + "oneOf": [ + { + "additionalProperties": false, + "description": "Fetch data for all available accounts.", + "properties": { + "selection_strategy": { + "default": "all", + "enum": ["all"], + "type": "string" + } + }, + "required": ["selection_strategy"], + "title": "All accounts assigned to your user" + }, + { + "additionalProperties": false, + "description": "Fetch data for subset of account ids.", + "properties": { + "ids": { + "description": "List of accounts from which data will be fetched.", + "items": { "type": "string" }, + "minItems": 1, + "type": "array", + "uniqueItems": true + }, + "selection_strategy": { + "default": "subset", + "enum": ["subset"], + "type": "string" + } + }, + "required": ["ids", "selection_strategy"], + "title": "Subset of your accounts" + } + ], + "title": "Accounts", + "type": "object" + }, + "client_id": { + "airbyte_secret": true, + "description": "ID of your Microsoft Advertising client application.", + "type": "string" + }, + "client_secret": { + "airbyte_secret": true, + "description": "Secret of your Microsoft Advertising client application.", + "type": "string" + }, + "customer_id": { + "description": "User's customer ID.", + "type": "string" + }, + "developer_token": { + "airbyte_secret": true, + "description": "Developer token associated with user.", + "type": "string" + }, + "refresh_token": { + "airbyte_secret": true, + "description": "The long-lived Refresh token received via grant_type=refresh_token request.", + "type": "string" + }, + "user_id": { + "description": "Unique user identifier.", + "type": "string" + } + }, + "required": [ + "accounts", + "client_id", + "client_secret", + "customer_id", + "developer_token", + "refresh_token", + "user_id" + ], + "title": "Bing Ads Spec", + "type": "object" + }, + "dockerImageTag": "0.1.0", + "dockerRepository": "airbyte/source-bing-ads", + "documentationUrl": "https://docs.chaosgenius.io/docs/Data_Sources_Catalog/Bing%20Ads", + "icon": "", + "isThirdParty": true, + "name": "Bing Ads", + "sourceDefinitionId": "47f25999-dd5e-4636-8c39-e7cea2453331" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "credentials_json": { + "airbyte_secret": true, + "description": "The contents of the JSON service account key. See the docs for more information on how to generate this key.", + "type": "string" + }, + "spreadsheet_id": { + "description": "The ID of the spreadsheet to be replicated.", + "type": "string" + } + }, + "required": ["spreadsheet_id", "credentials_json"], + "title": "Stripe Source Spec", + "type": "object" + }, + "dockerImageTag": "0.2.3", + "dockerRepository": "airbyte/source-google-sheets", + "documentationUrl": "https://hub.docker.com/repository/docker/airbyte/source-google-sheets", + "icon": "\n\n\n\n\t\n\t\n\t\n\n\n", + "isThirdParty": true, + "name": "Google Sheets", + "sourceDefinitionId": "71607ba1-c0ac-4799-8049-7f4b90dd50f7" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "api_password": { + "airbyte_secret": true, + "description": "The API PASSWORD for a private application in Shopify shop.", + "type": "string" + }, + "shop": { + "description": "The name of the shopify store. For https://EXAMPLE.myshopify.com, the shop name is 'EXAMPLE'.", + "type": "string" + }, + "start_date": { + "description": "The date you would like to replicate data. Format: YYYY-MM-DD.", + "examples": ["2021-01-01"], + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "type": "string" + } + }, + "required": ["shop", "start_date", "api_password"], + "title": "Shopify Source CDK Specifications", + "type": "object" + }, + "dockerImageTag": "0.1.9", + "dockerRepository": "airbyte/source-shopify", + "documentationUrl": "https://docs.chaosgenius.io/docs/Data_Sources_Catalog/Shopify", + "icon": "", + "isThirdParty": true, + "name": "Shopify", + "sourceDefinitionId": "9da77001-af33-4bcd-be46-6252bf9342b9" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "credentials_json": { + "airbyte_secret": true, + "description": "The contents of the JSON service account key. Check out the docs if you need help generating this key.", + "title": "Credentials JSON", + "type": "string" + }, + "dataset_id": { + "description": "The BigQuery Dataset ID to look for tables to replicate from.", + "title": "Default Dataset ID", + "type": "string" + }, + "project_id": { + "description": "The GCP project ID for the project containing the target BigQuery dataset.", + "title": "Project ID", + "type": "string" + } + }, + "required": ["project_id", "credentials_json"], + "title": "BigQuery Source Spec", + "type": "object" + }, + "dockerImageTag": "0.1.1", + "dockerRepository": "airbyte/source-bigquery", + "documentationUrl": "https://docs.chaosgenius.io/docs/Data_Sources_Catalog/bigquery", + "icon": "", + "isThirdParty": false, + "name": "BigQuery", + "sourceDefinitionId": "bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "database": { + "description": "Name of the database.", + "order": 2, + "title": "DB Name", + "type": "string" + }, + "host": { + "description": "Hostname of the database.", + "order": 0, + "title": "Host", + "type": "string" + }, + "password": { + "airbyte_secret": true, + "description": "Password associated with the username.", + "order": 4, + "title": "Password", + "type": "string" + }, + "port": { + "default": 5432, + "description": "Port of the database.", + "examples": ["5432"], + "maximum": 65536, + "minimum": 0, + "order": 1, + "title": "Port", + "type": "integer" + }, + "replication_method": { + "description": "Replication method to use for extracting data from the database.", + "oneOf": [ + { + "additionalProperties": false, + "description": "Standard replication requires no setup on the DB side but will not be able to represent deletions incrementally.", + "properties": { + "method": { + "default": "Standard", + "enum": ["Standard"], + "order": 0, + "type": "string" + } + }, + "required": ["method"], + "title": "Standard" + }, + { + "additionalProperties": false, + "description": "Logical replication uses the Postgres write-ahead log (WAL) to detect inserts, updates, and deletes. This needs to be configured on the source database itself. Only available on Postgres 10 and above. Read the Postgres Source docs for more information.", + "properties": { + "method": { + "default": "CDC", + "enum": ["CDC"], + "order": 0, + "type": "string" + }, + "publication": { + "description": "A Postgres publication used for consuming changes.", + "order": 2, + "type": "string" + }, + "replication_slot": { + "description": "A pgoutput logical replication slot.", + "order": 1, + "type": "string" + } + }, + "required": ["method", "replication_slot", "publication"], + "title": "Logical Replication (CDC)" + } + ], + "order": 6, + "title": "Replication Method", + "type": "object" + }, + "ssl": { + "default": false, + "description": "Encrypt client/server communications for increased security.", + "order": 5, + "title": "Connect using SSL", + "type": "boolean" + }, + "username": { + "description": "Username to use to access the database.", + "order": 3, + "title": "User", + "type": "string" + } + }, + "required": ["host", "port", "database", "username"], + "title": "Postgres Source Spec", + "type": "object" + }, + "dockerImageTag": "0.3.5", + "dockerRepository": "airbyte/source-postgres", + "documentationUrl": "https://hub.docker.com/r/airbyte/source-postgres", + "icon": "\r\n\r\n\r\n\r\n\r\n\t\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n", + "isThirdParty": false, + "name": "Postgres", + "sourceDefinitionId": "decd338e-5647-4c0b-adf4-da0e75f5a750" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "account_id": { + "description": "Your Stripe account ID (starts with 'acct_', find yours here).", + "type": "string" + }, + "client_secret": { + "airbyte_secret": true, + "description": "Stripe API key (usually starts with 'sk_live_'; find yours here).", + "pattern": "^(s|r)k_(live|test)_[a-zA-Z0-9]+$", + "type": "string" + }, + "start_date": { + "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", + "examples": ["2017-01-25T00:00:00Z"], + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", + "type": "string" + } + }, + "required": ["client_secret", "account_id", "start_date"], + "title": "Stripe Source Spec", + "type": "object" + }, + "dockerImageTag": "0.1.13", + "dockerRepository": "airbyte/source-stripe", + "documentationUrl": "https://hub.docker.com/r/airbyte/source-stripe", + "icon": "Asset 32Stone Hub", + "isThirdParty": true, + "name": "Stripe", + "sourceDefinitionId": "e094cb9a-26de-4645-8761-65c0c425d1de" + }, + { + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "database": { + "description": "The database you created for Chaos Genius to access data into.", + "examples": ["CG_DATABASE"], + "order": 3, + "title": "Database", + "type": "string" + }, + "host": { + "description": "Host domain of the snowflake instance (must include the account, region, cloud environment).", + "examples": ["accountname.us-east-2.aws"], + "order": 0, + "title": "Account name", + "type": "string" + }, + "password": { + "airbyte_secret": true, + "description": "Password associated with the username.", + "order": 6, + "title": "Password", + "type": "string" + }, + "role": { + "description": "The role you created for Chaos Genius to access Snowflake.", + "examples": ["CG_ROLE"], + "order": 1, + "title": "Role", + "type": "string" + }, + "schema": { + "description": "The source Snowflake schema tables.", + "examples": ["CG_SCHEMA"], + "order": 4, + "title": "Schema", + "type": "string" + }, + "username": { + "description": "The username you created to allow Chaos Genius to access the database.", + "examples": ["CG_USER"], + "order": 5, + "title": "Username", + "type": "string" + }, + "warehouse": { + "description": "The warehouse you created for Chaos Genius to access data into.", + "examples": ["CG_WAREHOUSE"], + "order": 2, + "title": "Warehouse", + "type": "string" + } + }, + "required": [ + "host", + "role", + "warehouse", + "database", + "schema", + "username", + "password" + ], + "title": "Snowflake Source Spec", + "type": "object" + }, + "dockerImageTag": "0.1.0", + "dockerRepository": "airbyte/source-snowflake", + "documentationUrl": "https://docs.chaosgenius.io/docs/Data_Sources_Catalog/snowflake", + "icon": "Logo-SnowflakeCreated with Sketch.", + "isThirdParty": false, + "name": "Snowflake", + "sourceDefinitionId": "e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2" + }, + { + "connectionSpecification": { + "properties": { + "access_token": { + "airbyte_secret": true, + "description": "The value of the access token generated. See the docs for more information", + "title": "Access Token", + "type": "string" + }, + "account_id": { + "description": "The Facebook Ad account ID to use when pulling data from the Facebook Marketing API.", + "title": "Account Id", + "type": "string" + }, + "include_deleted": { + "default": false, + "description": "Include data from deleted campaigns, ads, and adsets.", + "title": "Include Deleted", + "type": "boolean" + }, + "insights_days_per_job": { + "default": 7, + "description": "Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.", + "maximum": 30, + "minimum": 1, + "title": "Insights Days Per Job", + "type": "integer" + }, + "insights_lookback_window": { + "default": 28, + "description": "The attribution window for the actions", + "maximum": 28, + "minimum": 0, + "title": "Insights Lookback Window", + "type": "integer" + }, + "start_date": { + "description": "The date from which you'd like to replicate data for AdCreatives and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated.", + "examples": ["2017-01-25T00:00:00Z"], + "format": "date-time", + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", + "title": "Start Date", + "type": "string" + } + }, + "required": ["account_id", "access_token", "start_date"], + "title": "Source Facebook Marketing", + "type": "object" + }, + "dockerImageTag": "0.2.13", + "dockerRepository": "airbyte/source-facebook-marketing", + "documentationUrl": "https://hub.docker.com/r/airbyte/source-facebook-marketing", + "icon": "\nimage/svg+xml\n", + "isThirdParty": true, + "name": "Facebook Marketing", + "sourceDefinitionId": "e7778cfc-e97c-4458-9ecb-b4f2bba8946c" + }, + { + "dockerImageTag": "0.3.2", + "dockerRepository": "airbyte/source-redshift", + "documentationUrl": "https://docs.chaosgenius.io/docs/Data_Sources_Catalog/redshift", + "icon": "", + "name": "Redshift", + "sourceDefinitionId": "e87ffa8e-a3b5-f69c-9076-6011339de1f6", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Redshift Source Spec", + "type": "object", + "required": ["host", "port", "database", "username", "password"], + "additionalProperties": false, + "properties": { + "host": { + "description": "Host Endpoint of the Redshift Cluster (must include the cluster-id, region and end with .redshift.amazonaws.com)", + "type": "string" + }, + "port": { + "description": "Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 5439, + "examples": ["5439"] + }, + "database": { + "description": "Name of the database.", + "type": "string", + "examples": ["master"] + }, + "username": { + "description": "Username to use to access the database.", + "type": "string" + }, + "password": { + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true + } + } + }, + "isThirdParty": false + }, + { + "connectionSpecification": { + "properties": { + "host": { + "description": "Hostname of the database.", + "order": 0, + "title": "Host", + "type": "string" + }, + "password": { + "airbyte_secret": true, + "description": "Password associated with the username.", + "order": 4, + "title": "Password", + "type": "string" + }, + "port": { + "default": 8082, + "description": "Port of the database.", + "examples": ["5432"], + "maximum": 65536, + "minimum": 0, + "order": 1, + "title": "Port", + "type": "integer" + }, + "username": { + "description": "Username to use to access the database.", + "order": 3, + "title": "User", + "type": "string" + } + }, + "required": ["host", "port"], + "title": "Druid Source Spec", + "type": "object" + }, + "icon": "Apache Druid logo", + "isThirdParty": false, + "name": "Druid", + "sourceDefinitionId": "5B45DB62-303C-4E70-92DA-419D3CDBD506" + }, + { + "connectionSpecification": { + "properties": { + "host": { + "description": "Hostname of the database.", + "order": 0, + "title": "Host", + "type": "string" + }, + "password": { + "airbyte_secret": true, + "description": "Password associated with the username.", + "order": 4, + "title": "Password", + "type": "string" + }, + "port": { + "default": 8080, + "description": "Port of the database.", + "examples": ["5432"], + "maximum": 65536, + "minimum": 0, + "order": 1, + "title": "Port", + "type": "integer" + }, + "username": { + "description": "Username to use to access the database.", + "order": 3, + "title": "User", + "type": "string" + }, + "catalog": { + "description": "Data catalog to use.", + "order": 5, + "title": "Catalog", + "type": "string" + } + }, + "required": ["host", "port", "catalog"], + "title": "Trino Source Spec", + "type": "object" + }, + "icon": "Apache Druid logo", + "isThirdParty": false, + "name": "Trino", + "sourceDefinitionId": "ea6b0872-b25b-4591-8829-7e46d3276a5b" + } +] From a2ab8186f5ec3be5df107d87d4185160dcaea6ae Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Fri, 29 Apr 2022 00:21:46 +0530 Subject: [PATCH 3/5] fix(connectors): add trino client to requirements --- requirements/prod.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements/prod.txt b/requirements/prod.txt index b67998592..a9b868d99 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -21,6 +21,8 @@ snowflake-sqlalchemy==1.2.4 sqlalchemy-redshift==0.8.6 # For apache druid pydruid[sqlalchemy]~=0.6.2 +# For Trino +trino[sqlalchemy]==0.313.0 # Migrations Flask-Migrate==2.7.0 From 64096133f2df7911495fb3312fad61f4c06ec798 Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Mon, 23 May 2022 15:49:43 +0530 Subject: [PATCH 4/5] feat(connectors): add missing Trino connector impl forgot to add this file in f0909635e08d1786e1e74d591190a6c1185c9740 lol --- chaos_genius/connectors/trino.py | 86 ++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 chaos_genius/connectors/trino.py diff --git a/chaos_genius/connectors/trino.py b/chaos_genius/connectors/trino.py new file mode 100644 index 000000000..69119f6e0 --- /dev/null +++ b/chaos_genius/connectors/trino.py @@ -0,0 +1,86 @@ +"""Apache Trino connector.""" + +import pandas as pd +from sqlalchemy import create_engine, text + +from chaos_genius.connectors.base_db import BaseDb + +from .connector_utils import merge_dataframe_chunks + + +class Trino(BaseDb): + """Apache Trino connector.""" + + test_db_query = "SELECT 1" + + def get_db_uri(self): + """Create SQLAlchemy URI from data source info.""" + db_info = self.ds_info + if db_info is None: + raise Exception("Datasource info not found for Trino.") + + host = db_info.get("host") + port = int(db_info.get("port")) + username = db_info.get("username") + catalog = db_info.get("catalog") + password = db_info.get("password") + if not (host and port and catalog): + raise Exception("Database Credential not found for Trino.") + + if not (username and password): + self.sqlalchemy_db_uri = f"trino://{host}:{port}/{catalog}" + else: + self.sqlalchemy_db_uri = ( + f"trino://{username}:{password}@{host}:{port}/{catalog}" + ) + return self.sqlalchemy_db_uri + + def get_db_engine(self): + """Create an SQLAlchemy engine from data source info.""" + db_uri = self.get_db_uri() + self.engine = create_engine( + db_uri, echo=self.debug, connect_args={"http_scheme": "https", "verify": False} + ) + return self.engine + + def test_connection(self): + """Test data source connection.""" + if not hasattr(self, "engine") or not self.engine: + self.engine = self.get_db_engine() + query_text = text(self.test_db_query) + status, message = None, "" + try: + with self.engine.connect() as connection: + cursor = connection.execute(query_text) + results = cursor.all() + if results[0][0] == 1: + status = True + else: + status = False + except Exception as err_msg: # noqa: B902 + status = False + message = str(err_msg) + return status, message + + def run_query(self, query, as_df=True): + """Run a SQL query.""" + engine = self.get_db_engine() + if as_df: + return merge_dataframe_chunks( + pd.read_sql_query(query, engine, chunksize=self.CHUNKSIZE) + ) + else: + return [] + + def get_schema(self): + """Get schema name.""" + schema_name = self.ds_info.get("schema") if self.ds_info is not None else None + if schema_name: + self.schema = schema_name + else: + self.schema = "public" + return self.schema + + def get_schema_names_list(self): + data = self.inspector.get_schema_names() + return data From 979657b74baf3da9b5a85349288e96431fd88876 Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Wed, 25 May 2022 17:28:54 +0530 Subject: [PATCH 5/5] fix(connectors): use custom datetime formats for Trino Changed to use the refactors in #962, #963 --- chaos_genius/connectors/trino.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/chaos_genius/connectors/trino.py b/chaos_genius/connectors/trino.py index 69119f6e0..35e69f9b4 100644 --- a/chaos_genius/connectors/trino.py +++ b/chaos_genius/connectors/trino.py @@ -12,6 +12,24 @@ class Trino(BaseDb): """Apache Trino connector.""" test_db_query = "SELECT 1" + __SQL_DATE_FORMAT = "timestamp '%Y-%m-%d 00:00:00{}'" + __SQL_STRPTIME_FORMAT = "timestamp '%Y-%m-%d %H:%M:%S%z'" + __SQL_STRFTIME_FORMAT = "timestamp '%Y-%m-%d %H:%M:%S'" + + @property + def sql_date_format(self): + """String format to convert date to datetime along with an offset.""" + return self.__SQL_DATE_FORMAT + + @property + def sql_strptime_format(self): + """Format to convert strings into dates.""" + return self.__SQL_STRPTIME_FORMAT + + @property + def sql_strftime_format(self): + """Format to convert dates into strings.""" + return self.__SQL_STRFTIME_FORMAT def get_db_uri(self): """Create SQLAlchemy URI from data source info."""