Merge branch 'feast-dev:master' into master

opendatahub-io · Dec 23, 2024 · 22404df · 22404df
2 parents c3eea7f + 3f49517
commit 22404df
Show file tree

Hide file tree

Showing 128 changed files with 7,172 additions and 2,600 deletions.
diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml
@@ -72,13 +72,13 @@ jobs:
           java-version: '11'
           java-package: jdk
           architecture: x64
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: ~/.m2/repository
           key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
           restore-keys: |
             ${{ runner.os }}-it-maven-
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: ~/.m2/repository
           key: ${{ runner.os }}-ut-maven-${{ hashFiles('**/pom.xml') }}

diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml
@@ -53,13 +53,13 @@ jobs:
           java-version: '11'
           java-package: jdk
           architecture: x64
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: ~/.m2/repository
           key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
           restore-keys: |
             ${{ runner.os }}-it-maven-
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: ~/.m2/repository
           key: ${{ runner.os }}-ut-maven-${{ hashFiles('**/pom.xml') }}
@@ -97,11 +97,11 @@ jobs:
           python-version: "3.11"
           architecture: x64
       - name: Authenticate to Google Cloud
-        uses: 'google-github-actions/auth@v1'
+        uses: google-github-actions/auth@v2
         with:
           credentials_json: '${{ secrets.GCP_SA_KEY }}'
       - name: Set up gcloud SDK
-        uses: google-github-actions/setup-gcloud@v1
+        uses: google-github-actions/setup-gcloud@v2
         with:
           project_id: ${{ secrets.GCP_PROJECT_ID }}
       - run: gcloud auth configure-docker --quiet
@@ -137,18 +137,18 @@ jobs:
         with:
           python-version: '3.11'
           architecture: 'x64'
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: ~/.m2/repository
           key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
           restore-keys: |
             ${{ runner.os }}-it-maven-
       - name: Authenticate to Google Cloud
-        uses: 'google-github-actions/auth@v1'
+        uses: google-github-actions/auth@v2
         with:
           credentials_json: '${{ secrets.GCP_SA_KEY }}'
       - name: Set up gcloud SDK
-        uses: google-github-actions/setup-gcloud@v1
+        uses: google-github-actions/setup-gcloud@v2
         with:
           project_id: ${{ secrets.GCP_PROJECT_ID }}
       - name: Use gcloud CLI

diff --git a/.github/workflows/lint_pr.yml b/.github/workflows/lint_pr.yml
@@ -14,7 +14,7 @@ jobs:
     name: Validate PR title
     runs-on: ubuntu-latest
     steps:
-      - uses: amannn/action-semantic-pull-request@v4
+      - uses: amannn/action-semantic-pull-request@v5
         with:
           # Must use uppercase
           subjectPattern: ^(?=[A-Z]).+$

diff --git a/.github/workflows/operator_pr.yml b/.github/workflows/operator_pr.yml
@@ -7,7 +7,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Go
-        uses: actions/setup-go@v2
+        uses: actions/setup-go@v5
         with:
           go-version: 1.21.x
       - name: Operator tests

diff --git a/.github/workflows/pr_local_integration_tests.yml b/.github/workflows/pr_local_integration_tests.yml
@@ -45,7 +45,7 @@ jobs:
       - name: Get uv cache dir
         id: uv-cache
         run: |
-          echo "::set-output name=dir::$(uv cache dir)"
+          echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT
       - name: uv cache
         uses: actions/cache@v4
         with:

diff --git a/.github/workflows/smoke_tests.yml b/.github/workflows/smoke_tests.yml
@@ -31,7 +31,7 @@ jobs:
       - name: Get uv cache dir
         id: uv-cache
         run: |
-          echo "::set-output name=dir::$(uv cache dir)"
+          echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT
       - name: uv cache
         uses: actions/cache@v4
         with:

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -33,8 +33,8 @@ jobs:
           curl -LsSf https://astral.sh/uv/install.sh | sh
       - name: Get uv cache dir
         id: uv-cache
-        run: |
-          echo "::set-output name=dir::$(uv cache dir)"
+        run: |          
+          echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT
       - name: uv cache
         uses: actions/cache@v4
         with:
@@ -52,7 +52,7 @@ jobs:
       NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
     steps:
       - uses: actions/checkout@v4
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
         with:
           node-version-file: './ui/.nvmrc'
           registry-url: 'https://registry.npmjs.org'

diff --git a/Makefile b/Makefile
@@ -96,14 +96,14 @@ test-python-unit:
 	python -m pytest -n 8 --color=yes sdk/python/tests
 
 test-python-integration:
-	python -m pytest -n 8 --integration --color=yes --durations=10 --timeout=1200 --timeout_method=thread --dist loadgroup \
+	python -m pytest --tb=short -v -n 8 --integration --color=yes --durations=10 --timeout=1200 --timeout_method=thread --dist loadgroup \
 		-k "(not snowflake or not test_historical_features_main)" \
 		sdk/python/tests
 
 test-python-integration-local:
 	FEAST_IS_LOCAL_TEST=True \
 	FEAST_LOCAL_ONLINE_CONTAINER=True \
-	python -m pytest -n 8 --color=yes --integration --durations=10 --timeout=1200 --timeout_method=thread --dist loadgroup \
+	python -m pytest --tb=short -v -n 8 --color=yes --integration --durations=10 --timeout=1200 --timeout_method=thread --dist loadgroup \
 		-k "not test_lambda_materialization and not test_snowflake_materialization" \
 		sdk/python/tests
 

diff --git a/docs/README.md b/docs/README.md
@@ -42,12 +42,15 @@ serving system must make a request to the feature store to retrieve feature valu
 
 ## Who is Feast for?
 
-Feast helps ML platform/MLOps teams with DevOps experience productionize real-time models. Feast also helps these teams 
-build a feature platform that improves collaboration between data engineers, software engineers, machine learning 
-engineers, and data scientists.
+Feast helps ML platform/MLOps teams with DevOps experience productionize real-time models. Feast also helps these teams build a feature platform that improves collaboration between data engineers, software engineers, machine learning engineers, and data scientists.
 
-Feast is likely **not** the right tool if you
-* are in an organization that’s just getting started with ML and is not yet sure what the business impact of ML is
+* *For Data Scientists*: Feast is a a tool where you can easily define, store, and retrieve your features for both model development and model deployment. By using Feast, you can focus on what you do best: build features that power your AI/ML models and maximize the value of your data.
+
+* *For MLOps Engineers*: Feast is a library that allows you to connect your existing infrastructure (e.g., online database, application server, microservice, analytical database, and orchestration tooling) that enables your Data Scientists to ship features for their models to production using a friendly SDK without having to be concerned with software engineering challenges that occur from serving real-time production systems. By using Feast, you can focus on maintaining a resilient system, instead of implementing features for Data Scientists.
+
+* *For Data Engineers*: Feast provides a centralized catalog for storing feature definitions allowing one to maintain a single source of truth for feature data. It provides the abstraction for reading and writing to many different types of offline and online data stores. Using either the provided python SDK or the feature server service, users can write data to the online and/or offline stores and then read that data out again in either low-latency online scenarios for model inference, or in batch scenarios for model training.
+
+* *For AI Engineers*: Feast provides a platform designed to scale your AI applications by enabling seamless integration of richer data and facilitating fine-tuning. With Feast, you can optimize the performance of your AI models while ensuring a scalable and efficient data pipeline.
 
 ## What Feast is not?
 

diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
@@ -32,6 +32,7 @@
   * [Registry](getting-started/components/registry.md)
   * [Offline store](getting-started/components/offline-store.md)
   * [Online store](getting-started/components/online-store.md)
+  * [Feature server](getting-started/components/feature-server.md)
   * [Batch Materialization Engine](getting-started/components/batch-materialization-engine.md)
   * [Provider](getting-started/components/provider.md)
   * [Authorization Manager](getting-started/components/authz_manager.md)

diff --git a/docs/getting-started/components/README.md b/docs/getting-started/components/README.md
@@ -12,6 +12,10 @@
 [online-store.md](online-store.md)
 {% endcontent-ref %}
 
+{% content-ref url="feature-server.md" %}
+[feature-server.md](feature-server.md)
+{% endcontent-ref %}
+
 {% content-ref url="batch-materialization-engine.md" %}
 [batch-materialization-engine.md](batch-materialization-engine.md)
 {% endcontent-ref %}

diff --git a/docs/getting-started/components/feature-server.md b/docs/getting-started/components/feature-server.md
@@ -0,0 +1,40 @@
+# Feature Server
+
+The Feature Server is a core architectural component in Feast, designed to provide low-latency feature retrieval and updates for machine learning applications.
+
+It is a REST API server built using [FastAPI](https://fastapi.tiangolo.com/) and exposes a limited set of endpoints to serve features, push data, and support materialization operations. The server is scalable, flexible, and designed to work seamlessly with various deployment environments, including local setups and cloud-based systems.
+
+## Motivation
+
+In machine learning workflows, real-time access to feature values is critical for enabling low-latency predictions. The Feature Server simplifies this requirement by:
+
+1. **Serving Features:** Allowing clients to retrieve feature values for specific entities in real-time, reducing the complexity of direct interactions with the online store.
+2. **Data Integration:** Providing endpoints to push feature data directly into the online or offline store, ensuring data freshness and consistency.
+3. **Scalability:** Supporting horizontal scaling to handle high request volumes efficiently.
+4. **Standardized API:** Exposing HTTP/JSON endpoints that integrate seamlessly with various programming languages and ML pipelines.
+5. **Secure Communication:** Supporting TLS (SSL) for secure data transmission in production environments.
+
+## Architecture
+
+The Feature Server operates as a stateless service backed by two key components:
+
+- **[Online Store](./online-store.md):** The primary data store used for low-latency feature retrieval.
+- **[Registry](./registry.md):** The metadata store that defines feature sets, feature views, and their relationships to entities.
+
+## Key Features
+
+1. **RESTful API:** Provides standardized endpoints for feature retrieval and data pushing.
+2. **CLI Integration:** Easily managed through the Feast CLI with commands like `feast serve`.
+3. **Flexible Deployment:** Can be deployed locally, via Docker, or on Kubernetes using Helm charts.
+4. **Scalability:** Designed for distributed deployments to handle large-scale workloads.
+5. **TLS Support:** Ensures secure communication in production setups.
+
+## Endpoints Overview
+
+| Endpoint                   | Description                                                             |
+| -------------------------- | ----------------------------------------------------------------------- |
+| `/get-online-features`     | Retrieves feature values for specified entities and feature references. |
+| `/push`                    | Pushes feature data to the online and/or offline store.                 |
+| `/materialize`             | Materializes features within a specific time range to the online store. |
+| `/materialize-incremental` | Incrementally materializes features up to the current timestamp.        |
+
diff --git a/docs/getting-started/components/overview.md b/docs/getting-started/components/overview.md
@@ -13,6 +13,7 @@
 * **Deploy Model:** The trained model binary (and list of features) are deployed into a model serving system. This step is not executed by Feast.
 * **Prediction:** A backend system makes a request for a prediction from the model serving service.
 * **Get Online Features:** The model serving service makes a request to the Feast Online Serving service for online features using a Feast SDK.
+* **Feature Retrieval:** The online serving service retrieves the latest feature values from the online store and returns them to the model serving service.
 
 ## Components
 
@@ -24,6 +25,7 @@ A complete Feast deployment contains the following components:
   * Materialize (load) feature values into the online store.
   * Build and retrieve training datasets from the offline store.
   * Retrieve online features.
+* **Feature Server:** The Feature Server is a REST API server that serves feature values for a given entity key and feature reference. The Feature Server is designed to be horizontally scalable and can be deployed in a distributed manner.
 * **Stream Processor:** The Stream Processor can be used to ingest feature data from streams and write it into the online or offline stores. Currently, there's an experimental Spark processor that's able to consume data from Kafka.
 * **Batch Materialization Engine:** The [Batch Materialization Engine](batch-materialization-engine.md) component launches a process which loads data into the online store from the offline store. By default, Feast uses a local in-process engine implementation to materialize data. However, additional infrastructure can be used for a more scalable materialization process.
 * **Online Store:** The online store is a database that stores only the latest feature values for each entity. The online store is either populated through materialization jobs or through [stream ingestion](../../reference/data-sources/push.md).

diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
@@ -10,6 +10,9 @@ Feast (Feature Store) is an open-source feature store designed to facilitate the
 
 * *For Data Engineers*: Feast provides a centralized catalog for storing feature definitions allowing one to maintain a single source of truth for feature data. It provides the abstraction for reading and writing to many different types of offline and online data stores. Using either the provided python SDK or the feature server service, users can write data to the online and/or offline stores and then read that data out again in either low-latency online scenarios for model inference, or in batch scenarios for model training.
 
+* *For AI Engineers*: Feast provides a platform designed to scale your AI applications by enabling seamless integration of richer data and facilitating fine-tuning. With Feast, you can optimize the performance of your AI models while ensuring a scalable and efficient data pipeline.
+
+
 For more info refer to [Introduction to feast](../README.md)
 
 ## Prerequisites

diff --git a/docs/how-to-guides/starting-feast-servers-tls-mode.md b/docs/how-to-guides/starting-feast-servers-tls-mode.md
@@ -189,3 +189,8 @@ INFO:     Waiting for application startup.
 INFO:     Application startup complete.
 INFO:     Uvicorn running on https://0.0.0.0:8888 (Press CTRL+C to quit)
 ```
+
+
+## Adding public key to CA trust store and configuring the feast to use the trust store.
+You can pass the public key for SSL verification using the `cert` parameter, however, it is sometimes difficult to maintain individual certificates and pass them individually.
+The alternative recommendation is to add the public certificate to CA trust store and set the path as an environment variable (e.g., `FEAST_CA_CERT_FILE_PATH`). Feast will use the trust store path in the  `FEAST_CA_CERT_FILE_PATH` environment variable.