diff --git a/.github/workflows/validate-xml.yml b/.github/workflows/validate-xml.yml new file mode 100644 index 0000000..f107f90 --- /dev/null +++ b/.github/workflows/validate-xml.yml @@ -0,0 +1,50 @@ +name: Validate XML Samples + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + validate-xml: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install libxml2-utils + run: sudo apt-get update && sudo apt-get install -y libxml2-utils + + - name: Validate XML against local XSD + run: | + find SampleDomains -type f -name "*.xml" | while read xml_file; do + echo "INFO: Processing '$xml_file'" + + # Extract the schema URL from the XML file + schema_url=$(grep -o 'xsi:noNamespaceSchemaLocation="[^"]*"' "$xml_file" | cut -d'"' -f2) + + if [ -z "$schema_url" ]; then + echo "WARN: No schemaLocation found in '$xml_file'. Skipping." + continue + fi + + # Convert the GitHub raw URL to a local file path. + # This handles different branch names in the URL. + schema_file=$(echo "$schema_url" | sed 's|https://raw.githubusercontent.com/ARCOS-System/ARCOS/[^/]\+/\(.*\)|\1|') + + if [ ! -f "$schema_file" ]; then + echo "ERROR: Could not find local schema file '$schema_file' referenced in '$xml_file'. Failing the check." + exit 1 + fi + + echo "INFO: Validating '$xml_file' against '$schema_file'..." + if xmllint --noout --schema "$schema_file" "$xml_file"; then + echo "SUCCESS: '$xml_file' is valid." + else + echo "ERROR: Validation of '$xml_file' failed." + exit 1 + fi + echo "" + done \ No newline at end of file diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..fce904e --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,3 @@ +theme: jekyll-theme-minimal +title: ARCOS Documentation +description: "Official documentation for the AI Rule-Constrained Orchestration System." \ No newline at end of file diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..4df9dcc --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,13 @@ +--- +layout: default +title: Architecture +nav_order: 2 +--- + +# ARCOS Architecture + +The following diagram illustrates the high-level architecture of the ARCOS system. It shows the interaction between the **Maestro** coordinator and the various agents: **Speculus**, **Producer**, **Validator**, and **Post-Processor**. + +All communication between these components is handled via standardized XML messages, which are validated against the official ARCOS schemas. + +![ARCOS Architecture Diagram](./assets/ARCOS-Architecture.svg) \ No newline at end of file diff --git a/docs/assets/ARCOS-Architecture.svg b/docs/assets/ARCOS-Architecture.svg new file mode 100644 index 0000000..e1f065e --- /dev/null +++ b/docs/assets/ARCOS-Architecture.svg @@ -0,0 +1,354 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + artefact_manifest.xsd + Artefact Manifest + + + + + Artefact Manifest + + Domain_Inputs + + + + Domain + + + + Domain_loop + + + + Producer + Producer + + Producer + + Validator + Validator + + Validator + + Post-Processor + Post-Processor + + Post-Processor + + ARCOS_Speculus + ARCOS-Speculus + + ARCOS-Speculus + + Domain_Speculus + Domain Speculus + + Domain Speculus + + Maestro_ARCOS_Speculus_Messaging.xsd + + + + + + ARCOS_Speculus_Response.xsd + + + + + + Maestro_Domain_Speculus_Messaging.xsd + + + + + + Domain_Speculus_Response.xsd + + + + + + Maestro_Producer_Messaging.xsd + + + + + + Producer_Response.xsd + + + + + + Maestro_Validator_Messaging.xsd + + + + + + Validator_Response.xsd + + + + + + Maestro_Post_Processor_Messaging.xsd + + + + + + Post_Processor_Response.xsd + + + + + + User + User + + User + + ARCOS_Speculus_User_Interactions + + + + Domain_Speculus_User_Interactions + + + + Maestro_User_Interactions + + + + ARCOS_Project.xsd + ARCOS_Project.XSD + + + + + ARCOS_Project.XSD + + ARCOS_Project.xml + ARCOS_Project.XML + + + + + ARCOS_Project.XML + + Domain_Rules.xsd + Domain_Rules.XSD + + + + + Domain_Rules.XSD + + Domain_Rules.xml + Domain_Rules.XML + + + + + Domain_Rules.XML + + Predefined_Domain_Rules.xsd + Pre Defined Domain Rules.XSD + + + + + Pre DefinedDomain Rules.XSD + + BLEU_Predefined_CRUD_Rules.xml + Pre Defined Domain Rules.XML + + + + + Pre DefinedDomain Rules.XML + + BLEU_parts_v5.xsd + Domain.XSD + + + + + Domain.XSD + + BLEU_inventory_v5_sample.xml + Inventory.XML + + + + + Inventory.XML + + Output.zip + Output.zip + + + + + + Output.zip + + Validator_Report.xml + Validator_Report.XML + + + + + Validator_Report.XML + + Validator_Report.xsd + Validator_Report.XSD + + + + + Validator_Report.XSD + + Post_Processor_Report.xml + Post_Processor_Report.XML + + + + + Post_Processor_Report.XML + + Post_Processor_Report.xsd + Post_Processor_Report.XSD + + + + + Post_Processor_Report.XSD + + Sheet.37 + Domain - Loop + + Domain - Loop + + Sheet.38 + Domain + + Domain + + Domain_Vocabulary.xsd + Domain_Vocabulary.xsd + + + + + Domain_Vocabulary.xsd + + Domain_Vocabulary.xml + Domain_Vocabulary.XML + + + + + Domain_Vocabulary.XML + + Focus Extractor + Focus Extractor + + FocusExtractor + + focus_manifest.xsd + Focus Manifest + + + + + Focus Manifest + + Artefact Extractor + Artefact Extractor + + ArtefactExtractor + + Orchestrator + Orchestrator + + Orchestrator + + \ No newline at end of file diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..5e381ba --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,65 @@ +--- +layout: default +title: Getting Started +nav_order: 4 +--- + +# Getting Started Tutorial + +This tutorial provides a hands-on walkthrough of the ARCOS repository. You will learn how to clone the project, locate key files, and perform a basic validation of a sample XML file against its schema. + +## Prerequisites + +Before you begin, ensure you have the following tools installed: +* **Git:** For cloning the repository. +* **An XML Schema Validator:** We will use `xmllint`, which is a standard command-line tool for XML validation, often pre-installed on Linux and macOS. + +## 1. Clone the Repository + +First, clone the ARCOS repository to your local machine using the following command: + +```bash +git clone https://github.com/ARCOS-System/ARCOS.git +cd ARCOS +``` + +## 2. Locate the Sample Files + +This repository includes sample domains to help you understand how ARCOS works. For this tutorial, we will use the `BLEU` sample domain. + +Navigate to the directory containing the sample XML and its corresponding schema: + +```bash +cd SampleDomains/BLEU/v5/ +``` + +You will find two key files here: +* `BLEU_parts_v5.xsd`: The XML Schema Definition (XSD) that defines the structure and rules for the inventory of a fictional parts manufacturer. +* `BLEU_inventory_v5_sample.xml`: A sample XML file containing inventory data that conforms to the schema. + +## 3. Understand the Schema and XML + +The **XSD file (`BLEU_parts_v5.xsd`)** acts as a blueprint, defining the expected structure of the data. It specifies what elements are allowed (e.g., `Bolt`, `Nut`, `Washer`), their attributes (e.g., `id`, `code`), and their data types. + +The **XML file (`BLEU_inventory_v5_sample.xml`)** is an instance of this blueprint. It contains the actual inventory data, structured according to the rules defined in the XSD. Notice the `xsi:noNamespaceSchemaLocation` attribute in the XML, which points to the location of the schema file that should be used for validation. + +## 4. Validate the XML + +The core principle of ARCOS is schema-driven validation. You can test this yourself by validating the sample XML against its schema using `xmllint`: + +```bash +xmllint --noout --schema BLEU_parts_v5.xsd BLEU_inventory_v5_sample.xml +``` + +If the validation is successful, the command will output: +``` +BLEU_inventory_v5_sample.xml validates +``` + +This confirms that the sample data adheres to the rules defined in the schema. + +## Conclusion + +Congratulations! You have successfully cloned the ARCOS repository and validated a sample XML file. This simple exercise demonstrates the fundamental concept of ARCOS: ensuring that all data and messages conform to a predefined, verifiable structure. + +From here, you can explore the other documentation sections to learn more about the system's [Architecture](./architecture.md) and [Component Guides](./guides.md). \ No newline at end of file diff --git a/docs/guides.md b/docs/guides.md new file mode 100644 index 0000000..07dabd8 --- /dev/null +++ b/docs/guides.md @@ -0,0 +1,12 @@ +--- +layout: default +title: Guides +nav_order: 3 +has_children: true +--- + +# Component Guides + +This section provides detailed guides for each of the major components in the ARCOS system. + +Select a guide from the navigation menu to learn more about a specific component. \ No newline at end of file diff --git a/docs/guides/arcos-speculus.md b/docs/guides/arcos-speculus.md new file mode 100644 index 0000000..4b49c35 --- /dev/null +++ b/docs/guides/arcos-speculus.md @@ -0,0 +1,12 @@ +--- +layout: default +title: ARCOS Speculus Guide +parent: Guides +nav_order: 1 +--- + +# ARCOS Speculus Guide + +*This guide provides a detailed overview of the ARCOS Speculus component.* + +**(Content to be migrated from `3-Domain and ARCOS_Speculus_Guide.pdf`)** \ No newline at end of file diff --git a/docs/guides/post-processor.md b/docs/guides/post-processor.md new file mode 100644 index 0000000..8bd0b1a --- /dev/null +++ b/docs/guides/post-processor.md @@ -0,0 +1,12 @@ +--- +layout: default +title: Domain Post-Processor Guide +parent: Guides +nav_order: 4 +--- + +# Domain Post-Processor Guide + +*This guide provides a detailed overview of the Domain Post-Processor component.* + +**(Content to be migrated from `6-Domain_Post-Processor_Guide.pdf`)** \ No newline at end of file diff --git a/docs/guides/producer.md b/docs/guides/producer.md new file mode 100644 index 0000000..c381c8d --- /dev/null +++ b/docs/guides/producer.md @@ -0,0 +1,12 @@ +--- +layout: default +title: Domain Producer Guide +parent: Guides +nav_order: 2 +--- + +# Domain Producer Guide + +*This guide provides a detailed overview of the Domain Producer component.* + +**(Content to be migrated from `4-Domain_Producer_Guide.pdf`)** \ No newline at end of file diff --git a/docs/guides/validator.md b/docs/guides/validator.md new file mode 100644 index 0000000..cb6922b --- /dev/null +++ b/docs/guides/validator.md @@ -0,0 +1,12 @@ +--- +layout: default +title: Domain Validator Guide +parent: Guides +nav_order: 3 +--- + +# Domain Validator Guide + +*This guide provides a detailed overview of the Domain Validator component.* + +**(Content to be migrated from `5-Domain_Validator_Guide.pdf`)** \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..13cfdb8 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,15 @@ +--- +layout: default +title: Home +nav_order: 1 +--- + +# Welcome to ARCOS Documentation + +**ARCOS** is a schema-driven framework for orchestrating AI agents under strict specification and validation rules. + +This site provides the official documentation for the ARCOS project. Use the navigation to explore the different components and concepts. + +## Getting Started + +To get started, check out the [Getting Started](./getting-started.md) page. \ No newline at end of file diff --git a/docs/introduction.md b/docs/introduction.md new file mode 100644 index 0000000..a900d32 --- /dev/null +++ b/docs/introduction.md @@ -0,0 +1,18 @@ +--- +layout: default +title: Introduction +nav_order: 1 +--- + +# Introduction to ARCOS + +**ARCOS (AI Rule-Constrained Orchestration System)** is a schema-driven framework for orchestrating AI agents under strict specification and validation rules. It defines how a Coordinator (**Maestro**) interacts with **Speculus**, **Producer**, **Validator**, and **Post-Processor** agents through domain-agnostic XML messaging validated against schemas. + +## Why ARCOS? + +- **Deterministic AI orchestration**: Every interaction is validated against XSD contracts. +- **Composable agents**: Swap in your own Producers, Validators, or Post-Processors. +- **Domain-agnostic**: Bring your own schema; ARCOS will give it to each domain component. +- **Fail-fast philosophy**: Invalid XML is rejected immediately. + +This documentation site will guide you through the core concepts, architecture, and components of the ARCOS system. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fcba197 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +lxml~=5.2.0 \ No newline at end of file diff --git a/src/arcos/__init__.py b/src/arcos/__init__.py new file mode 100644 index 0000000..7f016be --- /dev/null +++ b/src/arcos/__init__.py @@ -0,0 +1 @@ +# This file marks the 'arcos' directory as a Python package. \ No newline at end of file diff --git a/src/arcos/__pycache__/__init__.cpython-312.pyc b/src/arcos/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..ff48379 Binary files /dev/null and b/src/arcos/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/arcos/__pycache__/xml_utils.cpython-312.pyc b/src/arcos/__pycache__/xml_utils.cpython-312.pyc new file mode 100644 index 0000000..51b7e18 Binary files /dev/null and b/src/arcos/__pycache__/xml_utils.cpython-312.pyc differ diff --git a/src/arcos/agents.py b/src/arcos/agents.py new file mode 100644 index 0000000..74a0e73 --- /dev/null +++ b/src/arcos/agents.py @@ -0,0 +1,82 @@ +from abc import ABC, abstractmethod + +class Maestro(ABC): + """ + Abstract base class for the Maestro orchestrator. + + The Maestro is responsible for coordinating the workflow between the + Producer, Validator, and PostProcessor agents. + """ + + @abstractmethod + def orchestrate(self, spec_path: str, output_path: str): + """ + Executes the main orchestration logic. + + Args: + spec_path: Path to the specification for the Producer. + output_path: Path to store the final, post-processed output. + """ + pass + +class Producer(ABC): + """ + Abstract base class for a Producer agent. + + A Producer generates an output artifact based on a given specification. + """ + + @abstractmethod + def produce(self, spec: dict) -> str: + """ + Generates an artifact based on the provided specification. + + Args: + spec: A dictionary representing the production specification. + + Returns: + A string representing the generated artifact (e.g., XML content). + """ + pass + +class Validator(ABC): + """ + Abstract base class for a Validator agent. + + A Validator checks if a given artifact conforms to a set of rules or a schema. + """ + + @abstractmethod + def validate(self, artifact_path: str, rules_path: str) -> bool: + """ + Validates an artifact against a set of rules. + + Args: + artifact_path: The path to the artifact to be validated. + rules_path: The path to the rules or schema to validate against. + + Returns: + True if the artifact is valid, False otherwise. + """ + pass + +class PostProcessor(ABC): + """ + Abstract base class for a Post-Processor agent. + + A Post-Processor performs some action on a validated artifact, such as + transformation, enrichment, or reporting. + """ + + @abstractmethod + def process(self, artifact_path: str) -> str: + """ + Processes a validated artifact. + + Args: + artifact_path: The path to the validated artifact. + + Returns: + A string representing the result of the post-processing. + """ + pass \ No newline at end of file diff --git a/src/arcos/maestro.py b/src/arcos/maestro.py new file mode 100644 index 0000000..a8c0921 --- /dev/null +++ b/src/arcos/maestro.py @@ -0,0 +1,31 @@ +from .agents import Maestro, Producer, Validator, PostProcessor + +class BasicMaestro(Maestro): + """ + A basic implementation of the Maestro orchestrator. + + This implementation provides a simple, linear workflow for demonstration. + """ + def __init__(self, producer: Producer, validator: Validator, post_processor: PostProcessor): + self.producer = producer + self.validator = validator + self.post_processor = post_processor + + def orchestrate(self, spec_path: str, output_path: str): + """ + A placeholder for the orchestration logic. + + In a real implementation, this would involve: + 1. Reading and parsing the spec. + 2. Calling the producer. + 3. Calling the validator on the produced artifact. + 4. Calling the post-processor on the validated artifact. + 5. Writing the final result to the output path. + """ + print("--- BasicMaestro: Orchestration process started. ---") + print(f" - Specification file: {spec_path}") + print(f" - Producer: {self.producer.__class__.__name__}") + print(f" - Validator: {self.validator.__class__.__name__}") + print(f" - Post-Processor: {self.post_processor.__class__.__name__}") + print(f" - Final output will be at: {output_path}") + print("--- Orchestration logic is not yet implemented. ---") \ No newline at end of file diff --git a/src/arcos/producers.py b/src/arcos/producers.py new file mode 100644 index 0000000..fcda264 --- /dev/null +++ b/src/arcos/producers.py @@ -0,0 +1,35 @@ +from .agents import Producer + +class BleuProducer(Producer): + """ + A sample Producer for the BLEU parts inventory domain. + + This producer generates a sample XML artifact based on a predefined + template file. + """ + + def __init__(self, template_path: str): + """ + Initializes the producer with a path to a template XML file. + + Args: + template_path: The path to the sample XML file to use as a template. + """ + self.template_path = template_path + + def produce(self, spec: dict) -> str: + """ + "Produces" an XML artifact by reading it from a template file. + + In a real implementation, this method would dynamically generate + the XML based on the content of the `spec` dictionary. + + Args: + spec: A dictionary representing the production specification (unused in this sample). + + Returns: + A string containing the content of the template XML file. + """ + print(f"--- BleuProducer: 'Producing' artifact from template: {self.template_path} ---") + with open(self.template_path, 'r', encoding='utf-8') as f: + return f.read() \ No newline at end of file diff --git a/src/arcos/xml_utils.py b/src/arcos/xml_utils.py new file mode 100644 index 0000000..dabfdbd --- /dev/null +++ b/src/arcos/xml_utils.py @@ -0,0 +1,67 @@ +from lxml import etree + +def _add_namespace_to_tree(element, namespace): + """ + Recursively adds a namespace to an element and all its children. + This is for validating a no-namespace XML against a schema with a targetNamespace. + """ + element.tag = f"{{{namespace}}}{etree.QName(element).localname}" + for child in element: + _add_namespace_to_tree(child, namespace) + +def validate_xml(xml_path: str, xsd_path: str) -> bool: + """ + Validates an XML file against an XSD schema. + + This function handles the case where the XML file has no namespace but the + schema defines a targetNamespace by adding the namespace to the XML tree + in memory before validation. + + Args: + xml_path: The path to the XML file to validate. + xsd_path: The path to the XSD schema file. + + Returns: + True if the XML is valid against the schema. + + Raises: + etree.XMLSyntaxError: If the XML or XSD file is not well-formed. + etree.DocumentInvalid: If the XML document is not valid against the schema. + """ + try: + # Parse the XSD schema and get its target namespace from the root element + with open(xsd_path, 'rb') as f: + schema_doc = etree.parse(f) + target_namespace = schema_doc.getroot().get('targetNamespace') + + # Compile the schema + schema = etree.XMLSchema(schema_doc) + + # Parse the XML file + with open(xml_path, 'rb') as f: + xml_doc = etree.parse(f) + + root = xml_doc.getroot() + + # If the root element has no namespace, assume it should be in the schema's target namespace. + if etree.QName(root).namespace is None and target_namespace: + _add_namespace_to_tree(root, target_namespace) + + # Validate the (potentially modified) XML against the schema + schema.assertValid(xml_doc) + + print(f"SUCCESS: '{xml_path}' is valid against '{xsd_path}'.") + return True + + except etree.XMLSyntaxError as e: + print(f"ERROR: XML or XSD syntax error in '{xml_path}' or '{xsd_path}'.") + raise e + except etree.DocumentInvalid as e: + print(f"ERROR: XML document '{xml_path}' is invalid against schema '{xsd_path}'.") + # Print the detailed validation errors for better debugging + for error in schema.error_log: + print(f" - Line {error.line}, Column {error.column}: {error.message}") + raise e + except Exception as e: + print(f"ERROR: An unexpected error occurred during validation.") + raise e \ No newline at end of file diff --git a/tests/__pycache__/test_xml_utils.cpython-312.pyc b/tests/__pycache__/test_xml_utils.cpython-312.pyc new file mode 100644 index 0000000..8461e32 Binary files /dev/null and b/tests/__pycache__/test_xml_utils.cpython-312.pyc differ diff --git a/tests/test_xml_utils.py b/tests/test_xml_utils.py new file mode 100644 index 0000000..9d00144 --- /dev/null +++ b/tests/test_xml_utils.py @@ -0,0 +1,33 @@ +import unittest +import os +from src.arcos.xml_utils import validate_xml +from lxml import etree + +class TestXmlUtils(unittest.TestCase): + + def setUp(self): + """Set up test files and paths.""" + self.valid_xml_path = "SampleDomains/BLEU/v5/BLEU_inventory_v5_sample.xml" + self.xsd_path = "SampleDomains/BLEU/v5/BLEU_parts_v5.xsd" + + # Create a temporary invalid XML file for testing failure cases + self.invalid_xml_path = "tests/invalid_sample.xml" + with open(self.invalid_xml_path, "w") as f: + f.write("") + + def tearDown(self): + """Clean up temporary files.""" + if os.path.exists(self.invalid_xml_path): + os.remove(self.invalid_xml_path) + + def test_validate_xml_success(self): + """Test that a valid XML file passes validation.""" + self.assertTrue(validate_xml(self.valid_xml_path, self.xsd_path)) + + def test_validate_xml_failure(self): + """Test that an invalid XML file raises DocumentInvalid exception.""" + with self.assertRaises(etree.DocumentInvalid): + validate_xml(self.invalid_xml_path, self.xsd_path) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file