Parquet folder example
Example directory structure
- bucket_root
- nested_folder/
- 2021-05-03/ -> Date Partition
- training_data/ -> Sub Folder
- data.csv
- training_data/ -> Sub Folder
- 2021-05-04/ -> Date Partition
- training_sample/ -> Sub Folder
- data.csv
- training_sample/ -> Sub Folder
- 2021-05-03/ -> Date Partition
- nested_folder/
- Python
- Scala
from featurestore import Client, ParquetFolder
# Initialise feature store client
client = Client("ip:port")
client.auth.login()
# Set project specifics
project = client.projects.create("demo")
# Create the parquet folder source
parquet_folder = ParquetFolder(
root_folder="s3a://feature-store-test-data/nested_folder",
filter_pattern=".*/training.*"
)
parquet_folder_schema = client.extract_schema_from_source(parquet_folder)
# Register the feature set
my_feature_set = project.feature_sets.register(parquet_folder_schema, "feature_set_name", primary_key=["key_name"])
# Ingest to cache
my_feature_set.ingest(parquet_folder)
# Retrieve feature set
ref = my_feature_set.retrieve()
ref.download()
import ai.h2o.featurestore.Client
import ai.h2o.featurestore.core.sources.ParquetFolder
// Initialise feature store client
val client = Client("url")
client.auth.login()
// Set project specifics
val project = client.projects.create("demo")
// Create the parquet folder source
val parquetFolder = ParquetFolder(
rootFolder="s3a://feature-store-test-data/nested_folder",
filterPattern=".*/training.*"
)
val parquetFolderSchema = client.extractSchemaFromSource(parquetFolder)
// Register the feature set
val myFeatureSet = project.featureSets.register(parquetFolderSchema, "featureSetName", primaryKey=Seq("keyName"))
// Ingest to cache
myFeatureSet.ingest(parquetFolder)
// Retrieve feature set
val ref = myFeatureSet.retrieve()
ref.download()
Feedback
- Submit and view feedback for this page
- Send feedback about H2O Feature Store to cloud-feedback@h2o.ai