server
server copied to clipboard
User defined pipeline configuration
heres a starting point
task_id:
- Enums with our own mappings.
- versions with the rules/logic
pipeline:
tasks:
- task: "extract"
container: "docker.io/mixpeek/data-extractor:latest"
parameters:
field:
- "document_url"
strategy:
- "file_url"
- task: "iterate"
description: "Iterate through text data for processing."
iterator:
type: "text"
tasks:
- task: "generate"
description: "Generate summaries and tags from extracted text."
container: "docker.io/mixpeek/text-processor:latest"
parameters:
operations:
- "summarize"
- "tag"
- task: "embed"
description: "Embed textual data using a Sentence Transformer model."
container: "sentence-transformers/all-MiniLM-L6-v2"
parameters:
input_format: "text"
output_vector: "true"
- task: "iterate"
description: "Iterate through image data for processing."
iterator:
type: "image"
tasks:
- task: "generate"
description: "Generate tags from extracted images."
container: "docker.io/mixpeek/image-tagger:latest"
parameters:
model: "resnet50"
- task: "embed"
description: "Embed image data using a deep neural network model."
container: "openai/clip-vit-base-patch32"
parameters:
input_format: "image"
output_vector: "true"
output:
destination: "mongodb://target-db"
structure:
type: "json"
includes:
- "text_summaries"
- "text_tags"
- "text_vectors"
- "image_tags"
- "image_vectors"