Warning: This is an experimental feature. To our knowledge, this is stable, but there are still rough edges in the experience. Contributions are welcome!
On demand feature views allows data scientists to use existing features and request time data (features only available at request time) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths.
Currently, these transformations are executed locally. This is fine for online serving, but does not scale well offline.
This enables data scientists to easily impact the online feature retrieval path. For example, a data scientist could
Call get_historical_features to generate a training dataframe
Iterate in notebook on feature engineering in Pandas
Copy transformation logic into on demand feature views and commit to a dev branch of the feature repository
Verify with get_historical_features
There are new CLI commands:
feast on-demand-feature-views list lists all registered on demand feature view after feast apply is run
feast on-demand-feature-views describe [NAME] describes the definition of an on demand feature view
See for an example on how to use on demand feature views.
On Demand Transformations support transformations using Pandas and native Python. Note, Native Python is much faster but not yet tested for offline retrieval.
We register RequestSource inputs and the transform in on_demand_feature_view:
And then to retrieve historical, we can call this in a feature service or reference individual features:
And then to retrieve online, we can call this in a feature service or reference individual features:
Verify with get_online_features on dev branch that the transformation correctly outputs online features
Submit a pull request to the staging / prod branches which impact production traffic
from feast import Field, RequestSource
from feast.types import Float64, Int64
from typing import Any, Dict
import pandas as pd
# Define a request data source which encodes features / information only
# available at request time (e.g. part of the user initiated HTTP request)
input_request = RequestSource(
name="vals_to_add",
schema=[
Field(name='val_to_add', dtype=Int64),
Field(name='val_to_add_2', dtype=Int64)
]
)
# Use the input data and feature view features to create new features Pandas mode
@on_demand_feature_view(
sources=[
driver_hourly_stats_view,
input_request
],
schema=[
Field(name='conv_rate_plus_val1', dtype=Float64),
Field(name='conv_rate_plus_val2', dtype=Float64)
],
mode="pandas",
)
def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
df = pd.DataFrame()
df['conv_rate_plus_val1'] = (features_df['conv_rate'] + features_df['val_to_add'])
df['conv_rate_plus_val2'] = (features_df['conv_rate'] + features_df['val_to_add_2'])
return df
# Use the input data and feature view features to create new features Python mode
@on_demand_feature_view(
sources=[
driver_hourly_stats_view,
input_request
],
schema=[
Field(name='conv_rate_plus_val1_python', dtype=Float64),
Field(name='conv_rate_plus_val2_python', dtype=Float64),
],
mode="python",
)
def transformed_conv_rate_python(inputs: Dict[str, Any]) -> Dict[str, Any]:
output: Dict[str, Any] = {
"conv_rate_plus_val1_python": [
conv_rate + val_to_add
for conv_rate, val_to_add in zip(
inputs["conv_rate"], inputs["val_to_add"]
)
],
"conv_rate_plus_val2_python": [
conv_rate + val_to_add
for conv_rate, val_to_add in zip(
inputs["conv_rate"], inputs["val_to_add_2"]
)
]
}
return outputtraining_df = store.get_historical_features(
entity_df=entity_df,
features=[
"driver_hourly_stats:conv_rate",
"driver_hourly_stats:acc_rate",
"driver_hourly_stats:avg_daily_trips",
"transformed_conv_rate:conv_rate_plus_val1",
"transformed_conv_rate:conv_rate_plus_val2",
],
).to_df()
entity_rows = [
{
"driver_id": 1001,
"val_to_add": 1,
"val_to_add_2": 2,
}
]
online_response = store.get_online_features(
entity_rows=entity_rows,
features=[
"driver_hourly_stats:conv_rate",
"driver_hourly_stats:acc_rate",
"transformed_conv_rate_python:conv_rate_plus_val1_python",
"transformed_conv_rate_python:conv_rate_plus_val2_python",
],
).to_dict()