Basic Enrichment
import requests
response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"items": [{"part_number": "91251A545", "name": "Hex Head Cap Screw"}],
"output_schema": [
{"name": "material", "type": "string", "description": "Material composition"},
{"name": "thread_size", "type": "string", "description": "Thread size specification"},
{"name": "length", "type": "string", "description": "Screw length"}
],
"speed": "slow"
}
)
print(response.json())
Reuse Configuration with Catalog ID
Usecatalog_id to automatically apply a catalog’s schema, taxonomy, and settings without repeating them in every request.
# Create a catalog with schema and settings via the Rastro dashboard,
# then use its catalog_id to enrich without repeating output_schema.
catalog_id = "cat_abc123" # From dashboard
# Enrich using just the catalog_id - no need to repeat output_schema
response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"catalog_id": catalog_id,
"items": [
{"part_number": "6205-2RS", "name": "Deep Groove Ball Bearing"},
{"part_number": "6206-2RS", "name": "Deep Groove Ball Bearing"}
],
"speed": "slow"
}
)
print(response.json())
output_schema. Any taxonomy or quality settings configured on the catalog are also applied. Catalogs are created through the Rastro dashboard.
Multiple Items
Items are processed concurrently for speed.response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"items": [
{"part_number": "6205-2RS", "name": "Deep Groove Ball Bearing"},
{"part_number": "6206-2RS", "name": "Deep Groove Ball Bearing"},
{"part_number": "6207-2RS", "name": "Deep Groove Ball Bearing"}
],
"output_schema": [
{"name": "bore_diameter", "type": "string", "description": "Inner diameter in mm"},
{"name": "outer_diameter", "type": "string", "description": "Outer diameter in mm"},
{"name": "width", "type": "string", "description": "Bearing width in mm"}
],
"speed": "slow"
}
)
Domain Restrictions
Only pull data from specific manufacturer or distributor sites.response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"items": [{"part_number": "6ES7214-1AG40-0XB0", "name": "SIMATIC S7-1200 CPU"}],
"output_schema": [
{"name": "input_voltage", "type": "string", "description": "Operating voltage range"},
{"name": "digital_inputs", "type": "integer", "description": "Number of digital inputs"},
{"name": "memory", "type": "string", "description": "Work memory size"}
],
"allowed_domains": ["siemens.com", "automation.siemens.com"],
"speed": "slow"
}
)
Data Normalization (No Web Search)
Normalize and structure data from your existing sources without web lookups. Sources trace back to your input fields.response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"items": [{
"raw_title": "SKF 6205-2RS Deep Groove Ball Bearing 25x52x15mm",
"description": "Sealed bearing for high-speed applications"
}],
"output_schema": [
{"name": "manufacturer", "type": "string", "description": "Brand name"},
{"name": "bore_diameter", "type": "string", "description": "Inner diameter"},
{"name": "outer_diameter", "type": "string", "description": "Outer diameter"},
{"name": "width", "type": "string", "description": "Width"}
],
"web_search": False # Only use input data
}
)
# Response sources will reference INPUT_DATA
{
"after_data": {
"manufacturer": "SKF",
"bore_diameter": "25mm",
"outer_diameter": "52mm",
"width": "15mm",
"sources": {
"manufacturer": ["INPUT_DATA"],
"bore_diameter": ["INPUT_DATA"],
"outer_diameter": ["INPUT_DATA"],
"width": ["INPUT_DATA"]
}
}
}
Async Mode
For large batches, get a job ID immediately and poll for results. Results are available progressively — you can access completed items while the job is still running.import requests
import time
API_KEY = "YOUR_API_KEY"
BASE_URL = "https://catalogapi.rastro.ai/api"
# Start async job
response = requests.post(
f"{BASE_URL}/public/enrich",
headers={"Authorization": f"Bearer {API_KEY}"},
json={
"items": [
{"part_number": "6205-2RS"},
{"part_number": "6206-2RS"},
{"part_number": "6207-2RS"},
# ... hundreds more items
],
"output_schema": [
{"name": "bore_diameter", "type": "string", "description": "Inner diameter"}
],
"async_mode": True,
"speed": "slow"
}
)
job_id = response.json()["job_id"]
print(f"Job started: {job_id}")
# Poll for results — partial results are available while running
while True:
status = requests.get(
f"{BASE_URL}/public/enrich/{job_id}",
headers={"Authorization": f"Bearer {API_KEY}"},
params={"page": 1, "page_size": 50}
).json()
print(f"Status: {status['status']} — "
f"{status.get('completed_items', 0)}/{status['total_items']} items ready")
if status["status"] != "running":
break
time.sleep(5)
# Paginate through all results
all_results = []
page = 1
while True:
resp = requests.get(
f"{BASE_URL}/public/enrich/{job_id}",
headers={"Authorization": f"Bearer {API_KEY}"},
params={"page": page, "page_size": 100}
).json()
all_results.extend(resp["results"])
if page >= resp["total_pages"]:
break
page += 1
print(f"Fetched {len(all_results)} results, {resp['credits_used']} credits used")
Taxonomy Prediction
Automatically classify items into your category hierarchy.response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"items": [{"name": "SKF 6205-2RS Deep Groove Ball Bearing"}],
"output_schema": [
{"name": "bore_diameter", "type": "string", "description": "Inner diameter"}
],
"predict_taxonomy": True,
"taxonomy": {
"name": "Industrial Parts",
"hierarchy_levels": ["Category", "Type"],
"nodes": {
"bearings": {"name": "Bearings", "parent": None},
"ball_bearings": {
"name": "Ball Bearings",
"parent": "bearings",
"attributes": [
{"name": "Bore Size", "type": "string"},
{"name": "Seal Type", "type": "string"}
]
}
}
}
}
)
{
"category_id": "ball_bearings",
"category_path": "Ball Bearings",
"taxonomy_attributes": {
"Bore Size": "25 mm",
"Seal Type": "2RS (rubber seal)"
}
}
Quality Scoring
Add a quality prompt to get a 1-5 score for each item.response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"items": [{"part_number": "6205-2RS"}],
"output_schema": [
{"name": "bore_diameter", "type": "string", "description": "Inner diameter"},
{"name": "load_rating", "type": "string", "description": "Dynamic load rating"}
],
"quality_prompt": "Evaluate if product has complete specs for procurement"
}
)
{
"after_data": {
"bore_diameter": "25 mm",
"load_rating": "14.8 kN",
"sources": {
"bore_diameter": ["https://skf.com/products/bearings/6205-2RS"],
"load_rating": ["https://skf.com/products/bearings/6205-2RS"]
}
},
"quality_score": 4,
"quality_result": {
"score": 4,
"explanation": "Complete dimensions, missing load ratings",
"issues": ["No dynamic load rating"],
"suggestions": ["Add C and C0 load ratings"]
}
}
Dry Runs
Usemax_rows to test with a subset before processing everything.
response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"items": large_item_list, # 1000+ items
"output_schema": schema,
"max_rows": 10 # Only process first 10
}
)
# Later, process the rest using source_activity_id
job_id = response.json()["job_id"]
response = requests.post(
"https://catalogapi.rastro.ai/api/public/enrich",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"source_activity_id": job_id, # Resume from previous job
"output_schema": schema
}
)
Full Python Client
import requests
import time
class RastroClient:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://catalogapi.rastro.ai/api"
def _headers(self):
return {"Authorization": f"Bearer {self.api_key}"}
def enrich(self, items, output_schema, speed="slow", **kwargs):
response = requests.post(
f"{self.base_url}/public/enrich",
headers=self._headers(),
json={
"items": items,
"output_schema": output_schema,
"speed": speed,
**kwargs
}
)
return response.json()
def poll(self, job_id, page=1, page_size=1000):
"""Poll job status. Returns partial results while running."""
return requests.get(
f"{self.base_url}/public/enrich/{job_id}",
headers=self._headers(),
params={"page": page, "page_size": page_size}
).json()
def get_all_results(self, job_id, page_size=100):
"""Paginate through all available results."""
all_results = []
page = 1
while True:
resp = self.poll(job_id, page=page, page_size=page_size)
all_results.extend(resp["results"])
if page >= resp.get("total_pages", 1):
break
page += 1
return all_results, resp
def enrich_async(self, items, output_schema, speed="slow", poll_interval=5, **kwargs):
# Start job
result = self.enrich(items, output_schema, speed, async_mode=True, **kwargs)
job_id = result["job_id"]
print(f"Started job: {job_id}")
# Poll until complete — partial results available while running
while True:
status = self.poll(job_id, page_size=1)
completed = status.get("completed_items", 0)
total = status.get("total_items", "?")
if status["status"] != "running":
break
print(f"Progress: {completed}/{total} items ready")
time.sleep(poll_interval)
# Fetch all results with pagination
all_results, final = self.get_all_results(job_id)
final["results"] = all_results
return final
# Example usage
client = RastroClient("YOUR_API_KEY")
# Synchronous enrichment
result = client.enrich(
items=[{"part_number": "6205-2RS"}],
output_schema=[
{"name": "bore_diameter", "type": "string", "description": "Inner diameter"},
{"name": "manufacturer", "type": "string", "description": "Brand name"}
]
)
for item in result["results"]:
data = item["after_data"]
sources = data.get("sources", {})
print(f"\n{item['original_data']}:")
for field, value in data.items():
if field in ("sources", "source_explanations"):
continue
print(f" {field}: {value}")
if field in sources:
print(f" Sources: {sources[field]}")
Full TypeScript Client
const API_KEY = "YOUR_API_KEY";
const BASE_URL = "https://catalogapi.rastro.ai/api";
interface OutputField {
name: string;
type: "string" | "number" | "integer" | "boolean" | "array";
description: string;
unit?: string;
enum?: string[];
}
interface EnrichOptions {
speed?: "fast" | "medium" | "slow" | "cheap";
async_mode?: boolean;
allowed_domains?: string[];
web_search?: boolean;
predict_taxonomy?: boolean;
taxonomy?: object;
quality_prompt?: string;
}
async function enrich(
items: object[],
outputSchema: OutputField[],
options: EnrichOptions = {}
) {
const response = await fetch(`${BASE_URL}/public/enrich`, {
method: "POST",
headers: {
"Authorization": `Bearer ${API_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({
items,
output_schema: outputSchema,
...options
})
});
return response.json();
}
async function poll(jobId: string, page = 1, pageSize = 1000) {
const response = await fetch(
`${BASE_URL}/public/enrich/${jobId}?page=${page}&page_size=${pageSize}`,
{ headers: { "Authorization": `Bearer ${API_KEY}` } }
);
return response.json();
}
async function getAllResults(jobId: string, pageSize = 100) {
const allResults: any[] = [];
let page = 1;
let lastResp: any;
while (true) {
lastResp = await poll(jobId, page, pageSize);
allResults.push(...lastResp.results);
if (page >= (lastResp.total_pages || 1)) break;
page++;
}
return { results: allResults, ...lastResp };
}
async function enrichAsync(
items: object[],
outputSchema: OutputField[],
options: EnrichOptions = {},
pollInterval = 5000
) {
const result = await enrich(items, outputSchema, { ...options, async_mode: true });
const jobId = result.job_id;
console.log(`Started job: ${jobId}`);
// Poll until complete — partial results available while running
while (true) {
const status = await poll(jobId, 1, 1);
if (status.status !== "running") break;
console.log(`Progress: ${status.completed_items || 0}/${status.total_items || "?"} items ready`);
await new Promise(r => setTimeout(r, pollInterval));
}
// Fetch all results with pagination
return getAllResults(jobId);
}
// Example usage
const result = await enrich(
[{ part_number: "6205-2RS" }],
[
{ name: "bore_diameter", type: "string", description: "Inner diameter" },
{ name: "manufacturer", type: "string", description: "Brand name" }
],
{ speed: "slow" }
);
for (const item of result.results) {
const { sources, source_explanations, ...fields } = item.after_data;
console.log(`\n${JSON.stringify(item.original_data)}:`);
for (const [field, value] of Object.entries(fields)) {
console.log(` ${field}: ${value}`);
if (sources?.[field]) {
console.log(` Sources: ${sources[field].join(", ")}`);
}
}
}