Searching Events
This guide covers how to search and retrieve audit trail events using various filters and pagination.
Events are always returned from newest to oldest. Search progresses backward in time from end_event_time
to start_event_time
. The first page contains the newest events, and the last page contains the oldest events.
When start_event_time
or end_event_time
are not specified, default values are applied:
- start_event_time: July 1st, 2025 at 00:00:00 UTC
- end_event_time: Current time (now)
For example, event_client.search_events()
with no filter is equivalent to:
event_client.search_events(
filter_=SearchEventsRequestFilter(
start_event_time=datetime(2025, 7, 1, 0, 0, 0, tzinfo=timezone.utc),
end_event_time=datetime.now(timezone.utc)
)
)
Basic search
Simple search
The following example demonstrates a basic event search:
import h2o_audit_trail
from h2o_audit_trail.event.search import SearchEventsRequestFilter
from datetime import datetime, timezone
# Initialize client
clients = h2o_audit_trail.login()
event_client = clients.event_client
# Search with basic filter
response = event_client.search_events(
filter_=SearchEventsRequestFilter(
principal_exact="users/john.doe"
),
page_size=50
)
print(f"Found {len(response.events)} events")
for event in response.events:
print(f"{event.event_time}: {event.action}")
Time-based search
You can filter events by time ranges:
from datetime import datetime, timezone, timedelta
# Search events from the last 24 hours
start_time = datetime.now(timezone.utc) - timedelta(days=1)
end_time = datetime.now(timezone.utc)
response = event_client.search_events(
filter_=SearchEventsRequestFilter(
start_event_time=start_time,
end_event_time=end_time
),
page_size=100
)
Search filters
Exact match filters
Use exact match filters for precise criteria:
filter_ = SearchEventsRequestFilter(
# User-specific events
principal_exact="users/alice.smith",
# Specific event source
event_source_exact="h2oai-enginemanager-server",
# Specific workspace
workspace_exact="workspaces/my-workspace",
# Specific action
action_exact="actions/enginemanager/daiEngines/CREATE",
# Specific resource
resource_exact="//engine-manager/workspaces/ws-123/daiEngines/engine-456",
# Read-only vs. write operations
read_only=True,
# Specific IP address
source_ip_address_exact="192.168.1.100"
)
Regex filters
Use regex filters for pattern matching (Google RE2 syntax):
filter_ = SearchEventsRequestFilter(
# Multiple users with regex
principal_regex="users/(alice|bob|charlie).*",
# All H2O.ai services
event_source_regex="h2oai-.*",
# All CREATE or DELETE actions
action_regex=".*(CREATE|DELETE)$",
# Resources in specific workspace
resource_regex="//engine-manager/workspaces/prod-.*",
# Specific status codes (0=OK, 3=INVALID_ARGUMENT)
status_code_regex="^(0|3)$",
# IP addresses in specific subnet
source_ip_address_regex="192\\.168\\.1\\.[0-9]+"
)
Metadata filters
Search by metadata key-value pairs:
# Exact metadata matching
filter_ = SearchEventsRequestFilter(
metadata_exact={
"environment": "production",
"team": "data-science",
"cost_center": "ml-ops"
}
)
# Regex metadata matching
filter_ = SearchEventsRequestFilter(
metadata_regex={
"operation_id": "op-[0-9]+",
"user_email": ".*@company\\.com$",
"resource_size": "(small|medium|large)"
}
)
# Combine exact and regex metadata filters
filter_ = SearchEventsRequestFilter(
metadata_exact={"environment": "production"},
metadata_regex={"session_id": "sess-[a-f0-9]{8}"}
)
Complex search examples
Multi-criteria search
All filter fields are combined using logical AND
conjunction. The filter below matches events where all of the following conditions are true:
start_event_time
is greater than or equal to last 6 hours,AND
principal_regex
matchesusers/(admin|operator).*
,AND
read_only
equalsFalse
,AND
status_code_regex
matches^[^0].*
,AND
action_regex
matches.*engine.*
,AND
workspace_regex
matchesworkspaces/prod-.*
To express OR
logic, use regex patterns. For example:
principal_regex=users/(admin|operator).*
translates toprincipal_regex=users/admin.*
OR
principal_regex=users/operator.*
action_regex=.*(CREATE|DELETE)$
translates toaction ends with CREATE
OR
action ends with DELETE
OR
logic is not supported across different filter fields. For example, searches for events where read_only=True
OR
the workspace matches a specific value require separate queries. To achieve this, create separate queries and merge the results.
from datetime import datetime, timezone, timedelta
# Search for recent failed operations by specific users
filter_ = SearchEventsRequestFilter(
# Time range: last 6 hours
start_event_time=datetime.now(timezone.utc) - timedelta(hours=6),
# Multiple users (OR logic via regex)
principal_regex="users/(admin|operator).*",
# Write operations only
read_only=False,
# Failed operations (non-zero status codes)
status_code_regex="^[^0].*",
# Engine-related actions
action_regex=".*engine.*",
# Production workspace
workspace_regex="workspaces/prod-.*"
)
response = event_client.search_events(filter_=filter_, page_size=100)
print(f"Found {len(response.events)} failed operations")
Audit trail analysis
# Find all administrative actions in the last week
filter_ = SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(days=7),
principal_regex="users/admin.*",
read_only=False, # Write operations only
action_regex=".*(CREATE|DELETE|UPDATE|MODIFY).*"
)
response = event_client.search_events(filter_=filter_, page_size=500)
# Analyze actions by type
action_counts = {}
for event in response.events:
action_type = event.action.split('/')[-1] # Extract action type
action_counts[action_type] = action_counts.get(action_type, 0) + 1
print("Administrative actions in the last week:")
for action, count in sorted(action_counts.items()):
print(f" {action}: {count}")
Pagination
Manual pagination
all_events = []
page_token = ""
while True:
response = event_client.search_events(
filter_=SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(hours=24)
),
page_size=100,
page_token=page_token
)
all_events.extend(response.events)
print(f"Retrieved page with {len(response.events)} events")
if not response.next_page_token:
break
page_token = response.next_page_token
print(f"Total events retrieved: {len(all_events)}")
Generator-based pagination
def search_all_events(event_client, filter_, page_size=100):
"""Generator that yields all events matching the filter."""
page_token = ""
while True:
response = event_client.search_events(
filter_=filter_,
page_size=page_size,
page_token=page_token
)
for event in response.events:
yield event
if not response.next_page_token:
break
page_token = response.next_page_token
# Usage
filter_ = SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(days=1)
)
for event in search_all_events(event_client, filter_):
print(f"{event.event_time}: {event.action}")
Search response analysis
Response structure
response = event_client.search_events(filter_=filter_)
# Response contains:
print(f"Events: {len(response.events)}")
print(f"Next page available: {'Yes' if response.next_page_token else 'No'}")
print(f"Searched until: {response.searched_until_time}")
# Individual event details
for event in response.events:
print(f"Event name: {event.name}")
print(f"Time: {event.event_time}")
print(f"Source: {event.event_source}")
print(f"Action: {event.action}")
print(f"Principal: {event.principal}")
print(f"Status: {event.status.code}")
print(f"Metadata: {event.metadata}")
print("---")
Extracting insights
response = event_client.search_events(
filter_=SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(days=7)
),
page_size=1000
)
# Group events by user
events_by_user = {}
for event in response.events:
user = event.principal
if user not in events_by_user:
events_by_user[user] = []
events_by_user[user].append(event)
# Find most active users
user_activity = {user: len(events) for user, events in events_by_user.items()}
most_active = sorted(user_activity.items(), key=lambda x: x[1], reverse=True)
print("Most active users (last 7 days):")
for user, count in most_active[:10]:
print(f" {user}: {count} events")
# Analyze error rates
total_events = len(response.events)
failed_events = len([e for e in response.events if e.status.code != 0])
error_rate = (failed_events / total_events * 100) if total_events > 0 else 0
print(f"Error rate: {error_rate:.2f}% ({failed_events}/{total_events})")
Performance optimization
Time range and pagination behavior
Search results are automatically paginated by the server. Each page covers a portion of the requested time range, and you must paginate through all pages to retrieve all matching events.
The server splits broad time ranges into smaller chunks and returns results page by page. Each response contains events from only a portion of the requested time range. For example:
# Both requests return results at similar speed for the first page
event_client.search_events(filter_=SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(days=1)
))
event_client.search_events(filter_=SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(days=30)
))
When searching broad time ranges, expect that each page contains results from only a smaller time range. To retrieve all events from a 30-day range, you must paginate through all pages using next_page_token
. The searched_until_time
field in each response indicates how far into the time range you've searched.
- Narrow time ranges are faster: Searching in narrow time ranges (for example, 1 minute or 1 hour) returns results faster than broad ranges (for example, 30 days or 1 year), though broad ranges still return the first page quickly.
- Empty pages don't indicate completion: You may receive pages with no events, but this doesn't mean pagination is complete. The only indicator that pagination is finished is when
next_page_token
is an empty string. For example, searching the last 30 days with no events returns multiple empty pages, each representing a portion of the time range. - Pagination within narrow ranges: Even narrow time ranges may require pagination if they contain many events. For example, a 10-second range with 10,000 events requires at least 10 pages (with maximum page size of 1,000).
- Track progress with
searched_until_time
: Use thesearched_until_time
field in each response to monitor pagination progress through the requested time range.
Efficient filtering
Whenever possible, filter by event_source
using event_source_exact
or event_source_regex
. The event_source
field is indexed, making it the most important filter for search performance.
Additional performance considerations:
- Prefer exact matches over regex: Exact match filters (for example,
event_source_exact
) are faster than regex filters (for example,event_source_regex
). - Use narrow time ranges: Smaller time ranges return results faster.
- Combine multiple filters: Add specific filters to reduce the result set.
# Best: Narrow time range with event_source exact match
filter_ = SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(hours=1),
event_source_exact="h2oai-enginemanager-server", # Indexed field with exact match
read_only=False
)
# Good: Broad time range with event_source exact match
filter_ = SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(days=30),
event_source_exact="h2oai-appstore-server" # Indexed field helps performance
)
# Less efficient: Broad time range with regex and no event_source filter
filter_ = SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc) - timedelta(days=30),
principal_regex=".*" # Matches all users, no event_source filter
)
Optimal page sizes
# For quick previews
response = event_client.search_events(filter_=filter_, page_size=10)
# For bulk processing
response = event_client.search_events(filter_=filter_, page_size=1000)
# For streaming processing
for page_size in [100, 500, 1000]: # Test different sizes
# Measure performance and choose optimal size
pass
Common search patterns
Security auditing
# Find privileged operations
security_filter = SearchEventsRequestFilter(
action_regex=".*(DELETE|ADMIN|GRANT|REVOKE|MODIFY).*",
read_only=False,
principal_regex="users/admin.*"
)
# Find failed authentication attempts
auth_failures = SearchEventsRequestFilter(
action_regex=".*auth.*",
status_code_regex="^(7|16)$" # PERMISSION_DENIED, UNAUTHENTICATED
)
Usage analytics
# Daily usage patterns (events from today's midnight until now)
daily_usage = SearchEventsRequestFilter(
start_event_time=datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0),
end_event_time=datetime.now(timezone.utc)
)
# Resource utilization
resource_usage = SearchEventsRequestFilter(
action_regex=".*(CREATE|DELETE).*",
resource_regex=".*/(engines|models|datasets)/.*"
)
Troubleshooting
# Find errors for specific user
user_errors = SearchEventsRequestFilter(
principal_exact="users/problematic-user",
status_code_regex="^[^0].*", # Non-zero status codes
start_event_time=datetime.now(timezone.utc) - timedelta(hours=2)
)
# Service health check
service_health = SearchEventsRequestFilter(
event_source_exact="h2oai-my-service",
status_code_regex="^(13|14)$" # INTERNAL, UNAVAILABLE
)
Next steps
- API Reference - Complete API documentation
- Authentication - Authentication setup guide
- Submit and view feedback for this page
- Send feedback about Audit Trail Documentation to cloud-feedback@h2o.ai