JSON folder example
Example directory structure
- bucket_root
- nested_folder/
- 2021-05-03/ -> Date Partition
- training_data/ -> Sub Folder
- data.json
- training_data/ -> Sub Folder
- 2021-05-04/ -> Date Partition
- training_sample/ -> Sub Folder
- data.json
- training_sample/ -> Sub Folder
- 2021-05-03/ -> Date Partition
- nested_folder/
- Python
- Scala
from featurestore import Client, JSONFolder
# Initialise feature store client
client = Client("ip:port")
client.auth.login()
# Set project specifics
project = client.projects.create("demo")
# Create the JSON folder source
json_folder = JSONFolder(
root_folder="s3a://feature-store-test-data/nested_folder",
filter_pattern=".*/training.*"
)
json_folder_schema = client.extract_schema_from_source(json_folder)
# Register the feature set
my_feature_set = project.feature_sets.register(json_folder_schema, "feature_set_name", primary_key=["key_name"])
# Ingest to cache
my_feature_set.ingest(json_folder)
# Retrieve feature set
ref = my_feature_set.retrieve()
ref.download()
import ai.h2o.featurestore.Client
import ai.h2o.featurestore.core.sources.JSONFolder
// Initialise feature store client
val client = Client("url")
client.auth.login()
// Set project specifics
val project = client.projects.create("demo")
// Create the JSON folder source
val jsonFolder = JSONFolder(
rootFolder="s3a://feature-store-test-data/nested_folder",
filterPattern=".*/training.*"
)
val jsonFolderSchema = client.extractSchemaFromSource(jsonFolder)
// Register the feature set
val myFeatureSet = project.featureSets.register(jsonFolderSchema, "featureSetName", primaryKey=Seq("keyName"))
// Ingest to cache
myFeatureSet.ingest(jsonFolder)
// Retrieve feature set
val ref = myFeatureSet.retrieve()
ref.download()
Feedback
- Submit and view feedback for this page
- Send feedback about H2O Feature Store to cloud-feedback@h2o.ai