DeepLcom · canbekley · Jan 6, 2025 · Magicbeanbuyer · Jan 13, 2025 · Magicbeanbuyer
diff --git a/dbt/adapters/clickhouse/dbclient.py b/dbt/adapters/clickhouse/dbclient.py
@@ -107,6 +107,7 @@ def __init__(self, credentials: ClickHouseCredentials):
             "distributed_table": {},
             "distributed_incremental": {},
             "general": {},
+            "remote_table": {},
         }
         if (
             not credentials.allow_automatic_deduplication

diff --git a/dbt/adapters/clickhouse/impl.py b/dbt/adapters/clickhouse/impl.py
@@ -117,8 +117,7 @@ def convert_time_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
     @available.parse(lambda *a, **k: {})
     def get_clickhouse_cluster_name(self):
         conn = self.connections.get_if_exists()
-        if conn.credentials.cluster:
-            return f'"{conn.credentials.cluster}"'
+        return conn.credentials.cluster
 
     @available.parse(lambda *a, **k: {})
     def get_clickhouse_local_suffix(self):

diff --git a/dbt/adapters/clickhouse/relation.py b/dbt/adapters/clickhouse/relation.py
@@ -44,6 +44,7 @@ class ClickHouseRelation(BaseRelation):
     quote_character: str = '`'
     can_exchange: bool = False
     can_on_cluster: bool = False
+    remote_cluster: Optional[str] = None
 
     def __post_init__(self):
         if self.database != self.schema and self.database:

diff --git a/dbt/include/clickhouse/macros/adapters.sql b/dbt/include/clickhouse/macros/adapters.sql
@@ -32,7 +32,7 @@
       db.engine as db_engine,
       {%- if adapter.get_clickhouse_cluster_name() -%}
         count(distinct _shard_num) > 1  as  is_on_cluster
-        from clusterAllReplicas({{ adapter.get_clickhouse_cluster_name() }}, system.tables) as t
+        from clusterAllReplicas('{{ adapter.get_clickhouse_cluster_name() }}', system.tables) as t
           join system.databases as db on t.database = db.name
         where schema = '{{ schema_relation.schema }}'
         group by name, schema, type, db_engine

diff --git a/dbt/include/clickhouse/macros/materializations/distributed_table.sql b/dbt/include/clickhouse/macros/materializations/distributed_table.sql
@@ -86,24 +86,30 @@
 
 {% endmaterialization %}
 
-{% macro create_distributed_table(relation, local_relation) %}
-    {%- set cluster = adapter.get_clickhouse_cluster_name() -%}
-   {% if cluster is none %}
-        {% do exceptions.raise_compiler_error('Cluster name should be defined for using distributed materializations, current is None') %}
-    {% endif %}
+{% macro create_distributed_table(relation, local_relation, sql=none) %}
+  {% if adapter.get_clickhouse_cluster_name() is none %}
+    {% do exceptions.raise_compiler_error('Cluster name should be defined for using distributed materializations, current is None') %}
+  {% endif %}
 
-   {%- set cluster = cluster[1:-1] -%}
-   {%- set sharding = config.get('sharding_key') -%}
-
-    create or replace table {{ relation }} {{ on_cluster_clause(relation) }} as {{ local_relation }}
-    ENGINE = Distributed('{{ cluster}}', '{{ local_relation.schema }}', '{{ local_relation.name }}'
-    {%- if sharding is not none and sharding.strip() != '' -%}
-        , {{ sharding }}
-    {%- else %}
-        , rand()
-    {% endif -%}
-    )
- {% endmacro %}
+  {%- set remote_cluster = local_relation.remote_cluster or adapter.get_clickhouse_cluster_name() -%}
+  {%- set sharding = config.get('sharding_key') -%}
+  {%- set reference = "as " ~ local_relation -%}
+  {%- if sql -%}
+    {%- set col_list = [] -%}
+    {%- for col in adapter.get_column_schema_from_query(sql) -%}
+      {%- do col_list.append(col.name + ' ' + col.data_type) -%}
+    {%- endfor -%}
+    {%- set reference = "(" ~ (col_list | join(', ')) ~ ")" -%}
+  {%- endif -%}
+  create or replace table {{ relation }} {{ on_cluster_clause(relation) }} {{ reference }}
+  engine = Distributed('{{ remote_cluster }}', '{{ local_relation.schema }}', '{{ local_relation.name }}'
+  {%- if sharding is not none and sharding.strip() != '' -%}
+    , {{ sharding }}
+  {%- else %}
+    , rand()
+  {% endif -%}
+  )
+{% endmacro %}
 
 {% macro create_empty_table_from_relation(relation, source_relation, sql=none) -%}
   {%- set sql_header = config.get('sql_header', none) -%}

diff --git a/dbt/include/clickhouse/macros/materializations/remote_table.sql b/dbt/include/clickhouse/macros/materializations/remote_table.sql
@@ -0,0 +1,27 @@
+{% materialization remote_table, adapter='clickhouse', supported_languages=['python', 'sql'] -%}
+  {%- set remote_config = config.get('remote_config', {}) -%}
+  {%- set remote_cluster = remote_config.get('cluster') or adapter.get_clickhouse_cluster_name() -%}
+  {%- set remote_schema = remote_config.get('schema') or adapter.get_clickhouse_local_db_prefix() + this.schema -%}
+  {%- set remote_identifier = remote_config.get('identifier') or this.identifier + adapter.get_clickhouse_local_suffix() -%}
+
+  {% set target_relation = this.incorporate(type='table') %}
+  {% set remote_relation = target_relation.incorporate(path={"identifier": remote_identifier, "schema": remote_schema}, remote_cluster=remote_cluster) %}
+
+  {{ run_hooks(pre_hooks, inside_transaction=False) }}
+  {{ run_hooks(pre_hooks, inside_transaction=True) }}
+
+  {% call statement('main') %}
+    {{ create_distributed_table(target_relation, remote_relation, sql) }}
+  {% endcall %}
+
+  {% set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
+  {% set grant_config = config.get('grants') %}
+  {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
+  {% do persist_docs(target_relation, model) %}
+
+  {{ run_hooks(post_hooks, inside_transaction=True) }}
+  {{ adapter.commit() }}
+  {{ run_hooks(post_hooks, inside_transaction=False) }}
+  {{ return({'relations': [target_relation]}) }}
+
+{% endmaterialization %}
diff --git a/tests/integration/adapter/remote_table/test_remote_table.py b/tests/integration/adapter/remote_table/test_remote_table.py
@@ -0,0 +1,97 @@
+import pytest
+from dbt.tests.util import run_dbt
+
+
+model_with_defaults = """
+{{
+    config(
+        materialized='remote_table',
+    )
+}}
+select toUInt64(number) as key1, toInt64(-number) as key2 from numbers(10)
+"""
+
+
+class TestRemoteTable:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "remote_table.sql": model_with_defaults,
+        }
+
+    def test_with_defaults(self, project):
+        # initialize a local table on current cluster
+        project.run_sql(f"""
+            create table {project.test_schema}.remote_table_local on cluster {project.test_config["cluster"]}
+            (key1 UInt64, key2 Int64)
+            engine=ReplicatedMergeTree order by key1
+        """)
+
+        # this `remote_table` run with default configuration will point to previously created local table, taking
+        # `local_db_prefix` and `local_suffix` settings into account.
+        run_dbt()
+
+        # insert into distributed table
+        project.run_sql(f"""
+            insert into {project.test_schema}.remote_table
+            select toUInt64(number) as key1, toInt64(-number) as key2 from numbers(10)
+            settings insert_quorum=1
+        """)
+
+        _assert_is_distributed_table(project)
+        _assert_correct_distributed_data(project)
+
+
+model_with_remote_configuration = """
+{{
+    config(
+        materialized='remote_table',
+        remote_config={'cluster': 'test_remote', 'schema': this.schema, 'identifier': 'remote_target_table'},
+        sharding_key='key1',
+    )
+}}
+select toUInt64(number) as key1, toInt64(-number) as key2 from numbers(10)
+"""
+
+
+class TestRemoteTableRemoteConfig:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "remote_table.sql": model_with_remote_configuration,
+        }
+
+    def test_with_remote_configuration(self, project):
+        # initialize a local table on remote cluster 'test_remote_cluster'
+        project.run_sql(f"create database if not exists {project.test_schema} on cluster `test_remote`")
+        project.run_sql(f"""
+            create table {project.test_schema}.remote_target_table on cluster `test_remote`
+            engine=MergeTree order by key1
+            as select toUInt64(number) as key1, toInt64(-number) as key2 from numbers(10)
+        """)
+
+        # the created distributed table should point to a local table as defined in the model's `remote_config`
+        run_dbt()
+
+        _assert_is_distributed_table(project)
+        _assert_correct_distributed_data(project)
+
+        # assert correct engine parameters
+        result = project.run_sql(f"select create_table_query from system.tables where name='remote_table'", fetch="one")
+        assert f"Distributed('test_remote', '{project.test_schema}', 'remote_target_table', key1)" in result[0]
+
+
+def _assert_is_distributed_table(project):
+    # check correct table creation on current host
+    result = project.run_sql(
+        f"select engine from system.tables where name='remote_table'",
+        fetch="one"
+    )
+    assert result is not None
+    assert result[0] == "Distributed"
+
+
+def _assert_correct_distributed_data(project):
+    # query remote data from distributed table
+    result = project.run_sql("select count(*) as num_rows from remote_table", fetch="one")
+    assert result[0] == 10
diff --git a/tests/integration/test_config.xml b/tests/integration/test_config.xml
@@ -7,6 +7,8 @@
         <replica from_env="REPLICA_NUM" />
         <server_index from_env="SERVER_INDEX" />
     </macros>
+    <default_replica_path>/clickhouse/tables/{uuid}/{shard}</default_replica_path>
+    <default_replica_name>{replica}</default_replica_name>
     <remote_servers>
         <test_shard>
             <shard>
@@ -44,6 +46,14 @@
                 </replica>
             </shard>
         </test_replica>
+        <test_remote>
+            <shard>
+                <replica>
+                    <host>ch2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_remote>
     </remote_servers>
     <keeper_server>
         <tcp_port>9181</tcp_port>