Enrich Examples

Basic Enrichment

import requests

response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "items": [{"part_number": "91251A545", "name": "Hex Head Cap Screw"}],
        "output_schema": [
            {"name": "material", "type": "string", "description": "Material composition"},
            {"name": "thread_size", "type": "string", "description": "Thread size specification"},
            {"name": "length", "type": "string", "description": "Screw length"}
        ],
        "speed": "slow"
    }
)
print(response.json())

Reuse Configuration with Catalog ID

Use catalog_id to automatically apply a catalog’s schema, taxonomy, and settings without repeating them in every request.

# Create a catalog with schema and settings via the Rastro dashboard,
# then use its catalog_id to enrich without repeating output_schema.
catalog_id = "cat_abc123"  # From dashboard

# Enrich using just the catalog_id - no need to repeat output_schema
response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "catalog_id": catalog_id,
        "items": [
            {"part_number": "6205-2RS", "name": "Deep Groove Ball Bearing"},
            {"part_number": "6206-2RS", "name": "Deep Groove Ball Bearing"}
        ],
        "speed": "slow"
    }
)
print(response.json())

The catalog’s schema fields are automatically used as the output_schema. Any taxonomy or quality settings configured on the catalog are also applied. Catalogs are created through the Rastro dashboard.

Multiple Items

Items are processed concurrently for speed.

response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "items": [
            {"part_number": "6205-2RS", "name": "Deep Groove Ball Bearing"},
            {"part_number": "6206-2RS", "name": "Deep Groove Ball Bearing"},
            {"part_number": "6207-2RS", "name": "Deep Groove Ball Bearing"}
        ],
        "output_schema": [
            {"name": "bore_diameter", "type": "string", "description": "Inner diameter in mm"},
            {"name": "outer_diameter", "type": "string", "description": "Outer diameter in mm"},
            {"name": "width", "type": "string", "description": "Bearing width in mm"}
        ],
        "speed": "slow"
    }
)

Domain Restrictions

Only pull data from specific manufacturer or distributor sites.

response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "items": [{"part_number": "6ES7214-1AG40-0XB0", "name": "SIMATIC S7-1200 CPU"}],
        "output_schema": [
            {"name": "input_voltage", "type": "string", "description": "Operating voltage range"},
            {"name": "digital_inputs", "type": "integer", "description": "Number of digital inputs"},
            {"name": "memory", "type": "string", "description": "Work memory size"}
        ],
        "allowed_domains": ["siemens.com", "automation.siemens.com"],
        "speed": "slow"
    }
)

Data Normalization (No Web Search)

Normalize and structure data from your existing sources without web lookups. Sources trace back to your input fields.

response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "items": [{
            "raw_title": "SKF 6205-2RS Deep Groove Ball Bearing 25x52x15mm",
            "description": "Sealed bearing for high-speed applications"
        }],
        "output_schema": [
            {"name": "manufacturer", "type": "string", "description": "Brand name"},
            {"name": "bore_diameter", "type": "string", "description": "Inner diameter"},
            {"name": "outer_diameter", "type": "string", "description": "Outer diameter"},
            {"name": "width", "type": "string", "description": "Width"}
        ],
        "web_search": False  # Only use input data
    }
)

# Response sources will reference INPUT_DATA

Response:

{
  "after_data": {
    "manufacturer": "SKF",
    "bore_diameter": "25mm",
    "outer_diameter": "52mm",
    "width": "15mm",
    "sources": {
      "manufacturer": ["INPUT_DATA"],
      "bore_diameter": ["INPUT_DATA"],
      "outer_diameter": ["INPUT_DATA"],
      "width": ["INPUT_DATA"]
    }
  }
}

Async Mode

For large batches, get a job ID immediately and poll for results. Results are available progressively — you can access completed items while the job is still running.

import requests
import time

API_KEY = "YOUR_API_KEY"
BASE_URL = "https://catalogapi.rastro.ai/api"

# Start async job
response = requests.post(
    f"{BASE_URL}/public/enrich",
    headers={"Authorization": f"Bearer {API_KEY}"},
    json={
        "items": [
            {"part_number": "6205-2RS"},
            {"part_number": "6206-2RS"},
            {"part_number": "6207-2RS"},
            # ... hundreds more items
        ],
        "output_schema": [
            {"name": "bore_diameter", "type": "string", "description": "Inner diameter"}
        ],
        "async_mode": True,
        "speed": "slow"
    }
)
job_id = response.json()["job_id"]
print(f"Job started: {job_id}")

# Poll for results — partial results are available while running
while True:
    status = requests.get(
        f"{BASE_URL}/public/enrich/{job_id}",
        headers={"Authorization": f"Bearer {API_KEY}"},
        params={"page": 1, "page_size": 50}
    ).json()

    print(f"Status: {status['status']} — "
          f"{status.get('completed_items', 0)}/{status['total_items']} items ready")

    if status["status"] != "running":
        break
    time.sleep(5)

# Paginate through all results
all_results = []
page = 1
while True:
    resp = requests.get(
        f"{BASE_URL}/public/enrich/{job_id}",
        headers={"Authorization": f"Bearer {API_KEY}"},
        params={"page": page, "page_size": 100}
    ).json()

    all_results.extend(resp["results"])
    if page >= resp["total_pages"]:
        break
    page += 1

print(f"Fetched {len(all_results)} results, {resp['credits_used']} credits used")

Taxonomy Prediction

Automatically classify items into your category hierarchy.

response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "items": [{"name": "SKF 6205-2RS Deep Groove Ball Bearing"}],
        "output_schema": [
            {"name": "bore_diameter", "type": "string", "description": "Inner diameter"}
        ],
        "predict_taxonomy": True,
        "taxonomy": {
            "name": "Industrial Parts",
            "hierarchy_levels": ["Category", "Type"],
            "nodes": {
                "bearings": {"name": "Bearings", "parent": None},
                "ball_bearings": {
                    "name": "Ball Bearings",
                    "parent": "bearings",
                    "attributes": [
                        {"name": "Bore Size", "type": "string"},
                        {"name": "Seal Type", "type": "string"}
                    ]
                }
            }
        }
    }
)

Response includes:

{
  "category_id": "ball_bearings",
  "category_path": "Ball Bearings",
  "taxonomy_attributes": {
    "Bore Size": "25 mm",
    "Seal Type": "2RS (rubber seal)"
  }
}

Quality Scoring

Add a quality prompt to get a 1-5 score for each item.

response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "items": [{"part_number": "6205-2RS"}],
        "output_schema": [
            {"name": "bore_diameter", "type": "string", "description": "Inner diameter"},
            {"name": "load_rating", "type": "string", "description": "Dynamic load rating"}
        ],
        "quality_prompt": "Evaluate if product has complete specs for procurement"
    }
)

Response includes:

{
  "after_data": {
    "bore_diameter": "25 mm",
    "load_rating": "14.8 kN",
    "sources": {
      "bore_diameter": ["https://skf.com/products/bearings/6205-2RS"],
      "load_rating": ["https://skf.com/products/bearings/6205-2RS"]
    }
  },
  "quality_score": 4,
  "quality_result": {
    "score": 4,
    "explanation": "Complete dimensions, missing load ratings",
    "issues": ["No dynamic load rating"],
    "suggestions": ["Add C and C0 load ratings"]
  }
}

Dry Runs

Use max_rows to test with a subset before processing everything.

response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "items": large_item_list,  # 1000+ items
        "output_schema": schema,
        "max_rows": 10  # Only process first 10
    }
)

# Later, process the rest using source_activity_id
job_id = response.json()["job_id"]
response = requests.post(
    "https://catalogapi.rastro.ai/api/public/enrich",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "source_activity_id": job_id,  # Resume from previous job
        "output_schema": schema
    }
)

Full Python Client

import requests
import time

class RastroClient:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://catalogapi.rastro.ai/api"

    def _headers(self):
        return {"Authorization": f"Bearer {self.api_key}"}

    def enrich(self, items, output_schema, speed="slow", **kwargs):
        response = requests.post(
            f"{self.base_url}/public/enrich",
            headers=self._headers(),
            json={
                "items": items,
                "output_schema": output_schema,
                "speed": speed,
                **kwargs
            }
        )
        return response.json()

    def poll(self, job_id, page=1, page_size=1000):
        """Poll job status. Returns partial results while running."""
        return requests.get(
            f"{self.base_url}/public/enrich/{job_id}",
            headers=self._headers(),
            params={"page": page, "page_size": page_size}
        ).json()

    def get_all_results(self, job_id, page_size=100):
        """Paginate through all available results."""
        all_results = []
        page = 1
        while True:
            resp = self.poll(job_id, page=page, page_size=page_size)
            all_results.extend(resp["results"])
            if page >= resp.get("total_pages", 1):
                break
            page += 1
        return all_results, resp

    def enrich_async(self, items, output_schema, speed="slow", poll_interval=5, **kwargs):
        # Start job
        result = self.enrich(items, output_schema, speed, async_mode=True, **kwargs)
        job_id = result["job_id"]
        print(f"Started job: {job_id}")

        # Poll until complete — partial results available while running
        while True:
            status = self.poll(job_id, page_size=1)
            completed = status.get("completed_items", 0)
            total = status.get("total_items", "?")

            if status["status"] != "running":
                break

            print(f"Progress: {completed}/{total} items ready")
            time.sleep(poll_interval)

        # Fetch all results with pagination
        all_results, final = self.get_all_results(job_id)
        final["results"] = all_results
        return final


# Example usage
client = RastroClient("YOUR_API_KEY")

# Synchronous enrichment
result = client.enrich(
    items=[{"part_number": "6205-2RS"}],
    output_schema=[
        {"name": "bore_diameter", "type": "string", "description": "Inner diameter"},
        {"name": "manufacturer", "type": "string", "description": "Brand name"}
    ]
)

for item in result["results"]:
    data = item["after_data"]
    sources = data.get("sources", {})
    print(f"\n{item['original_data']}:")
    for field, value in data.items():
        if field in ("sources", "source_explanations"):
            continue
        print(f"  {field}: {value}")
        if field in sources:
            print(f"    Sources: {sources[field]}")

Full TypeScript Client

const API_KEY = "YOUR_API_KEY";
const BASE_URL = "https://catalogapi.rastro.ai/api";

interface OutputField {
  name: string;
  type: "string" | "number" | "integer" | "boolean" | "array";
  description: string;
  unit?: string;
  enum?: string[];
}

interface EnrichOptions {
  speed?: "fast" | "medium" | "slow" | "cheap";
  async_mode?: boolean;
  allowed_domains?: string[];
  web_search?: boolean;
  predict_taxonomy?: boolean;
  taxonomy?: object;
  quality_prompt?: string;
}

async function enrich(
  items: object[],
  outputSchema: OutputField[],
  options: EnrichOptions = {}
) {
  const response = await fetch(`${BASE_URL}/public/enrich`, {
    method: "POST",
    headers: {
      "Authorization": `Bearer ${API_KEY}`,
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      items,
      output_schema: outputSchema,
      ...options
    })
  });
  return response.json();
}

async function poll(jobId: string, page = 1, pageSize = 1000) {
  const response = await fetch(
    `${BASE_URL}/public/enrich/${jobId}?page=${page}&page_size=${pageSize}`,
    { headers: { "Authorization": `Bearer ${API_KEY}` } }
  );
  return response.json();
}

async function getAllResults(jobId: string, pageSize = 100) {
  const allResults: any[] = [];
  let page = 1;
  let lastResp: any;
  while (true) {
    lastResp = await poll(jobId, page, pageSize);
    allResults.push(...lastResp.results);
    if (page >= (lastResp.total_pages || 1)) break;
    page++;
  }
  return { results: allResults, ...lastResp };
}

async function enrichAsync(
  items: object[],
  outputSchema: OutputField[],
  options: EnrichOptions = {},
  pollInterval = 5000
) {
  const result = await enrich(items, outputSchema, { ...options, async_mode: true });
  const jobId = result.job_id;
  console.log(`Started job: ${jobId}`);

  // Poll until complete — partial results available while running
  while (true) {
    const status = await poll(jobId, 1, 1);

    if (status.status !== "running") break;

    console.log(`Progress: ${status.completed_items || 0}/${status.total_items || "?"} items ready`);
    await new Promise(r => setTimeout(r, pollInterval));
  }

  // Fetch all results with pagination
  return getAllResults(jobId);
}

// Example usage
const result = await enrich(
  [{ part_number: "6205-2RS" }],
  [
    { name: "bore_diameter", type: "string", description: "Inner diameter" },
    { name: "manufacturer", type: "string", description: "Brand name" }
  ],
  { speed: "slow" }
);

for (const item of result.results) {
  const { sources, source_explanations, ...fields } = item.after_data;
  console.log(`\n${JSON.stringify(item.original_data)}:`);
  for (const [field, value] of Object.entries(fields)) {
    console.log(`  ${field}: ${value}`);
    if (sources?.[field]) {
      console.log(`    Sources: ${sources[field].join(", ")}`);
    }
  }
}

​Basic Enrichment

​Reuse Configuration with Catalog ID

​Multiple Items

​Domain Restrictions

​Data Normalization (No Web Search)

​Async Mode

​Taxonomy Prediction

​Quality Scoring

​Dry Runs

​Full Python Client

​Full TypeScript Client