Source code for gxformat2.schema.gxformat2

# Auto-generated by schema-salad-plus-pydantic — do not edit.

from __future__ import annotations

import json
from enum import Enum
from pathlib import Path
from typing import Any, Annotated, Literal, Union

from pydantic import BaseModel, ConfigDict, Field, Discriminator, Tag



[docs]
def parser_info() -> str:
    return ""





[docs]
class PrimitiveType(str, Enum):
    """Salad data types are based on Avro schema declarations.  Refer to the
[Avro schema declaration documentation](https://avro.apache.org/docs/current/spec.html#schemas) for
detailed information.

null: no value
boolean: a binary value
int: 32-bit signed integer
long: 64-bit signed integer
float: single precision (32-bit) IEEE 754 floating-point number
double: double precision (64-bit) IEEE 754 floating-point number
string: Unicode character sequence"""

    null = "null"
    boolean = "boolean"
    int = "int"
    long = "long"
    float = "float"
    double = "double"
    string = "string"




[docs]
class GalaxyType(str, Enum):
    """Extends primitive types with the native Galaxy concepts such as datasets and collections.
Normalized gxformat2 workflow input declaration spellings are ``data``, ``collection``, ``string``, ``int``, ``float``, and ``boolean``. Other spellings are accepted as compatibility aliases on import but normalized gxformat2 output emits the normalized spellings.
data: one Galaxy dataset input. Native Galaxy ``data_input`` converts to this spelling.
File: accepted alias for ``data``, but normalized gxformat2 output emits ``data``. Note: workflow **test job** YAML uses ``type: File`` to mean 'stage this file as test input data', which is a separate concept from workflow input declaration.
collection: one Galaxy dataset collection input. Native Galaxy ``data_collection_input`` converts to this spelling.
string: normalized gxformat2 spelling for native Galaxy text workflow parameters.
text: accepted alias for ``string`` because native Galaxy parameter state and Galaxy tool XML terminology use ``text``.
int: normalized gxformat2 spelling for native Galaxy integer workflow parameters.
integer: accepted alias for ``int`` because native Galaxy parameter state and Galaxy tool XML terminology use ``integer``."""

    null = "null"
    boolean = "boolean"
    int = "int"
    long = "long"
    float = "float"
    double = "double"
    string = "string"
    integer = "integer"
    text = "text"
    File = "File"
    data = "data"
    collection = "collection"




[docs]
class WorkflowStepType(str, Enum):
    """Module types used by Galaxy steps. Galaxy's native format allows additional types such as data_input, data_input_collection, and parameter_type
but these should be represented as ``inputs`` in Format2.

tool: Run a tool.
subworkflow: Run a subworkflow.
pause: Pause computation on this branch of workflow until user allows it to continue.
pick_value: Select the first non-null value from multiple inputs. Used to merge branches of conditional or optional workflow paths."""

    tool = "tool"
    subworkflow = "subworkflow"
    pause = "pause"
    pick_value = "pick_value"



def _discriminate_inputs(v: Any) -> str:
    disc_map: dict[str, str] = {"data": "WorkflowDataParameter", "File": "WorkflowDataParameter", "data_input": "WorkflowDataParameter", "collection": "WorkflowCollectionParameter", "data_collection": "WorkflowCollectionParameter", "data_collection_input": "WorkflowCollectionParameter", "integer": "WorkflowIntegerParameter", "int": "WorkflowIntegerParameter", "text": "WorkflowTextParameter", "string": "WorkflowTextParameter", "float": "WorkflowFloatParameter", "boolean": "WorkflowBooleanParameter", "color": "WorkflowTextParameter"}
    if isinstance(v, dict):
        disc_val: str = str(v.get("type", ""))
    else:
        disc_val = str(getattr(v, "type_", ""))
    return disc_map.get(disc_val, "WorkflowDataParameter")



[docs]
class Documented(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")



[docs]
class RecordField(Documented):
    """A field of a record."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    name: str = Field(description="The name of the field")
    type_: PrimitiveType | RecordSchema | EnumSchema | ArraySchema | str | list[PrimitiveType | RecordSchema | EnumSchema | ArraySchema | str] = Field(alias="type", description="The field type")



[docs]
class RecordSchema(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    fields: None | list[RecordField] = Field(default=None, description="Defines the fields of the record.")
    type_: Literal["record"] = Field(default="record", alias="type", description="Must be `record`")



[docs]
class EnumSchema(BaseModel):
    """Define an enumerated type."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    symbols: list[str] = Field(description="Defines the set of valid symbols.")
    type_: Literal["enum"] = Field(default="enum", alias="type", description="Must be `enum`")



[docs]
class ArraySchema(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    items: PrimitiveType | RecordSchema | EnumSchema | ArraySchema | str | list[PrimitiveType | RecordSchema | EnumSchema | ArraySchema | str] = Field(description="Defines the type of the array elements.")
    type_: Literal["array"] = Field(default="array", alias="type", description="Must be `array`")



[docs]
class Labeled(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    label: None | str = Field(default=None, description="A short, human-readable label of this object.")



[docs]
class Identified(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")



[docs]
class Parameter(Labeled, Documented, Identified):
    """Define an input or output parameter to a process."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")




[docs]
class InputParameter(Parameter):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")



[docs]
class OutputParameter(Parameter):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")



[docs]
class Process(Identified, Labeled, Documented):
    """The base executable type in CWL is the `Process` object defined by the
document.  Note that the `Process` object is abstract and cannot be
directly executed."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    # Discriminated union on 'type'
    inputs: list[Annotated[Annotated[WorkflowDataParameter, Tag("WorkflowDataParameter")] | Annotated[WorkflowCollectionParameter, Tag("WorkflowCollectionParameter")] | Annotated[WorkflowIntegerParameter, Tag("WorkflowIntegerParameter")] | Annotated[WorkflowFloatParameter, Tag("WorkflowFloatParameter")] | Annotated[WorkflowTextParameter, Tag("WorkflowTextParameter")] | Annotated[WorkflowBooleanParameter, Tag("WorkflowBooleanParameter")], Discriminator(_discriminate_inputs)]] | dict[str, WorkflowDataParameter | WorkflowCollectionParameter | WorkflowIntegerParameter | WorkflowFloatParameter | WorkflowTextParameter | WorkflowBooleanParameter | str] | dict[str, Any]
    outputs: list[WorkflowOutputParameter] | dict[str, WorkflowOutputParameter | str] | dict[str, Any] = Field(description="Defines the parameters representing the output of the process.  May be used to generate and/or validate the output object.")



[docs]
class HasUUID(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    uuid: None | str = Field(default=None, description="UUID uniquely representing this element.")



[docs]
class HasStepErrors(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    errors: None | str = Field(default=None, description="During Galaxy export there may be some problem validating the tool state, tool used, etc.. that will be indicated by this field. The Galaxy user should be warned of these problems before the workfl...")



[docs]
class HasStepPosition(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    position: None | StepPosition = Field(default=None)



[docs]
class StepPosition(BaseModel):
    """This field specifies the location of the step's node when rendered in the workflow editor."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    top: float | int = Field(description="Relative vertical position of the step's node when rendered in the workflow editor.")
    left: float | int = Field(description="Relative horizontal position of the step's node when rendered in the workflow editor.")



[docs]
class ReferencesTool(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    tool_id: None | str = Field(default=None, description="The tool ID used to run this step of the workflow (e.g. 'cat1' or 'toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.0').")
    tool_shed_repository: None | ToolShedRepository = Field(default=None, description="The Galaxy Tool Shed repository that should be installed in order to use this tool.")
    tool_version: None | str = Field(default=None, description="The tool version corresponding used to run this step of the workflow. For tool shed installed tools, the ID generally uniquely specifies a version and this field is optional.")



[docs]
class SampleSheetColumnDefinition(BaseModel):
    """Describes one column of a sample-sheet collection input.
Used in `column_definitions` on a `collection_type: sample_sheet[:<type>]`
workflow input."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    name: str = Field(description="Column name. Must not contain special characters (matches `^[\\w\\-_ \\?]*$`).")
    description: None | str = Field(default=None, description="Optional human-readable column description.")
    type_: Literal["string", "int", "float", "boolean", "element_identifier"] = Field(default="string", alias="type", description="Value type for this column. One of `string`, `int`, `float`, `boolean`, or `element_identifier`. Mirrors Galaxy's runtime `SampleSheetColumnType`.")
    optional: bool = Field(description="If true, rows may omit a value for this column.")
    default_value: None | str | int | float | bool = Field(default=None, description="Default value used when a row omits this column. Type must be compatible with `type` - validated by the pydantic post-validator.")
    validators: None | list[Any] = Field(default=None, description="Galaxy-style parameter validators. Modelled as opaque records here - full validator schema lives in galaxy.tool_util_models.")
    restrictions: None | list[str | int | float | bool] = Field(default=None, description="Closed set of permitted values for this column. Item type must be compatible with the column `type` (post-validated).")
    suggestions: None | list[str | int | float | bool] = Field(default=None, description="Open suggestion list for this column.")



[docs]
class RecordFieldDefinition(BaseModel):
    """Describes one field of a `record` collection input.
Used in `fields` on a `collection_type` containing `record` (e.g.
`record`, `list:record`, `sample_sheet:record`). Mirrors a subset of
the CWL `InputRecordSchema` shape that Galaxy persists on
`DatasetCollection.fields`."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    name: str = Field(description="Field name. Must equal the corresponding element identifier in the materialized record collection.")
    type_: Literal["File", "null", "boolean", "int", "float", "string"] | list[Literal["File", "null", "boolean", "int", "float", "string"]] = Field(default="File", alias="type", description="Field value type. A subset of the CWL primitive types: `File`, `null`, `boolean`, `int`, `float`, `string`. May be a list to express a union (e.g. `[\"File\", \"null\"]` for an optional file).")
    format: None | str = Field(default=None, description="Optional Galaxy datatype hint for `File`-typed fields.")



[docs]
class WorkflowTextOption(BaseModel):
    """A `{value, label}` option used in `restrictions` or `suggestions` on a
text workflow parameter. Plain strings are also accepted in those
arrays as shorthand for `{value: <str>, label: <str>}`."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    value: str = Field(description="Machine value submitted to the connected tool input.")
    label: None | str = Field(default=None, description="Human label shown in Galaxy. Defaults to `value` when omitted.")



[docs]
class ToolShedRepository(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    name: str = Field(description="The name of the tool shed repository this tool can be found in.")
    changeset_revision: str = Field(description="The revision of the tool shed repository this tool can be found in.")
    owner: str = Field(description="The owner of the tool shed repository this tool can be found in.")
    tool_shed: str = Field(description="The URI of the tool shed containing the repository this tool can be found in - typically this should be toolshed.g2.bx.psu.edu.")



[docs]
class BaseInputParameter(InputParameter, HasStepPosition):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    optional: bool | None = Field(default=None, description="Controls whether Galaxy allows invocation of the workflow without a user-supplied value for this input. If ``true``, the input may be omitted at invocation time. ``optional`` and ``default`` are in...")



[docs]
class BaseDataParameter(BaseInputParameter):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    format: None | list[str] = Field(default=None, description="Specify datatype extensions for valid input datasets.")



[docs]
class WorkflowDataParameter(BaseDataParameter):
    """A data input parameter for a Galaxy workflow. Represents one Galaxy dataset.
Normalized gxformat2 output uses ``type: data``. ``type: File`` is accepted as
an alias, but should not be confused with workflow test job syntax where
``type: File`` means stage a file as test input data."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    optional: bool | None = Field(default=None, description="Controls whether Galaxy allows invocation of the workflow without a user-supplied value for this input. If ``true``, the input may be omitted at invocation time. ``optional`` and ``default`` are in...")
    type_: Literal["data", "File"] | None = Field(default=None, alias="type", description="Specify valid types of data that may be assigned to this parameter.")



[docs]
class WorkflowCollectionParameter(BaseDataParameter):
    """A collection input parameter for a Galaxy workflow - represents a dataset collection."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    optional: bool | None = Field(default=None, description="Controls whether Galaxy allows invocation of the workflow without a user-supplied value for this input. If ``true``, the input may be omitted at invocation time. ``optional`` and ``default`` are in...")
    type_: Literal["collection"] = Field(default="collection", alias="type", description="Must be ``collection``.")
    collection_type: None | str = Field(default=None, description="Collection type (defaults to `list` if `type` is `collection`). Nested collection types are separated with colons, e.g. `list:list:paired`.")
    column_definitions: None | list[SampleSheetColumnDefinition] = Field(default=None, description="Column schema for sample-sheet collection inputs. Only meaningful when `collection_type` begins with `sample_sheet` - cross-field validation is applied in the pydantic post-validator.")
    fields: None | list[RecordFieldDefinition] = Field(default=None, description="Field schema for `record` collection inputs. Only meaningful when `collection_type` contains `record` (e.g. `record`, `list:record`, `sample_sheet:record`).")



[docs]
class MinMax(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    min: int | float | None = Field(default=None, description="Minimum allowed value (inclusive).")
    max: int | float | None = Field(default=None, description="Maximum allowed value (inclusive).")



[docs]
class WorkflowIntegerParameter(BaseInputParameter, MinMax):
    """A scalar integer workflow parameter. Normalized gxformat2 output uses
``type: int``. ``type: integer`` is accepted for compatibility with native
Galaxy parameter state and Galaxy tool XML terminology."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    type_: Literal["integer", "int"] = Field(default="integer", alias="type", description="Must be ``integer`` or ``int``.")



[docs]
class WorkflowFloatParameter(BaseInputParameter, MinMax):
    """A float input parameter for a Galaxy workflow."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    type_: Literal["float"] = Field(default="float", alias="type", description="Must be ``float``.")



[docs]
class WorkflowTextParameter(BaseInputParameter):
    """A scalar text workflow parameter. Normalized gxformat2 output uses
``type: string``. ``type: text`` is accepted for compatibility with native
Galaxy parameter state and Galaxy tool XML terminology."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    type_: Literal["text", "string"] = Field(default="text", alias="type", description="Must be ``text`` or ``string``.")
    restrictions: None | list[str | WorkflowTextOption] = Field(default=None, description="Closed set of permitted values. When present, Galaxy renders the runtime input as a select. Items may be plain strings or `{value, label}` records.")
    suggestions: None | list[str | WorkflowTextOption] = Field(default=None, description="Open suggestion list. Galaxy still treats the input as text but offers these as suggestions.")
    restrictOnConnections: None | bool = Field(default=None, description="Ask Galaxy to derive valid choices from connected tool or subworkflow select inputs at runtime. Falls back to free text when derivation fails.")



[docs]
class WorkflowBooleanParameter(BaseInputParameter):
    """A boolean input parameter for a Galaxy workflow."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    type_: Literal["boolean"] = Field(default="boolean", alias="type", description="Must be ``boolean``.")



[docs]
class WorkflowInputParameter(BaseDataParameter, MinMax):
    """An input parameter to a Galaxy workflow. This is the catch-all type used
by the Schema Salad codegen. The pydantic layer uses a discriminated union
of the specific parameter types instead."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    default: None | Any = Field(default=None, description="The default value to use for this parameter if the parameter is missing from the input object, or if the value of the parameter in the input object is `null`.  Default values are applied before eva...")
    position: None | StepPosition = Field(default=None)
    optional: bool | None = Field(default=None, description="Controls whether Galaxy allows invocation of the workflow without a user-supplied value for this input. If ``true``, the input may be omitted at invocation time. ``optional`` and ``default`` are in...")
    type_: GalaxyType | None | list[GalaxyType] = Field(default=None, alias="type", description="Specify valid types of data that may be assigned to this parameter.")
    collection_type: None | str = Field(default=None, description="Collection type (defaults to `list` if `type` is `collection`). Nested collection types are separated with colons, e.g. `list:list:paired`.")
    column_definitions: None | list[SampleSheetColumnDefinition] = Field(default=None, description="Column schema for sample-sheet collection inputs. Only meaningful when `collection_type` begins with `sample_sheet`.")
    fields: None | list[RecordFieldDefinition] = Field(default=None, description="Field schema for `record` collection inputs. Only meaningful when `collection_type` contains `record`.")
    restrictions: None | list[str | WorkflowTextOption] = Field(default=None, description="Closed set of permitted values for text-typed inputs. See `WorkflowTextParameter.restrictions`.")
    suggestions: None | list[str | WorkflowTextOption] = Field(default=None, description="Open suggestion list for text-typed inputs.")
    restrictOnConnections: None | bool = Field(default=None, description="For text-typed inputs - derive runtime choices from connected tool/subworkflow select inputs.")



[docs]
class WorkflowOutputParameter(OutputParameter):
    """Describe an output parameter of a workflow.  The parameter must be
connected to one parameter defined in the workflow that
will provide the value of the output parameter. It is legal to
connect a WorkflowInputParameter to a WorkflowOutputParameter."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    outputSource: None | str = Field(default=None, description="Specifies workflow parameter that supply the value of to the output parameter.")
    type_: None | GalaxyType = Field(default=None, alias="type", description="Specify valid types of data that may be assigned to this parameter.")



[docs]
class WorkflowStep(Identified, Labeled, Documented, HasStepPosition, ReferencesTool, HasStepErrors, HasUUID):
    """This represents a non-input step a Galaxy Workflow.

# A note about `state` and `tool_state` fields.

Only one or the other should be specified. These are two ways to represent the "state"
of a tool at this workflow step. Both are essentially maps from parameter names to
parameter values.

`tool_state` is much more low-level and expects a flat dictionary with each value a JSON
dump. Nested tool structures such as conditionals and repeats should have all their values
in the JSON dumped string. In general `tool_state` may be present in workflows exported from
Galaxy but shouldn't be written by humans.

`state` can contained a typed map. Repeat values can be represented as YAML arrays. An alternative
to representing `state` this way is defining inputs with default values."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    in_: list[WorkflowStepInput] | dict[str, WorkflowStepInput | str | list[str]] | None = Field(default=None, alias="in", description="Defines the input parameters of the workflow step.  The process is ready to run when all required input parameters are associated with concrete values.  Input parameters include a schema for each p...")
    out: list[WorkflowStepOutput | str] | dict[str, WorkflowStepOutput | str] | None = Field(default=None, description="Defines the parameters representing the output of the process.  May be used to generate and/or validate the output object.  This can also be called 'outputs' for legacy reasons - but the resulting ...")
    state: dict[str, Any] | None = Field(default=None, description="Structured tool state.")
    tool_state: str | dict[str, Any] | None = Field(default=None, description="Unstructured tool state.")
    post_job_actions: dict[str, Any] | None = Field(default=None, description="Optional dict of post-job actions keyed by ``{ActionType}{OutputName}`` compound strings.  Same shape as the native ``post_job_actions`` field; each value is a record with ``action_type``, ``output...")
    type_: None | WorkflowStepType = Field(default=None, alias="type", description="Workflow step module's type (defaults to 'tool').")
    run: GalaxyWorkflow | str | dict[str, Any] | None = Field(default=None, description="Specifies a subworkflow to run. May be an inline workflow definition, a URL string, or an @import reference dict.")
    runtime_inputs: None | list[str] = Field(default=None)
    when: None | str = Field(default=None, description="If defined, only run the step when the expression evaluates to `true`.  If `false` the step is skipped.  A skipped step produces a `null` on each output.  Expression should be an ecma5.1 expression.")



[docs]
class Sink(BaseModel):
    model_config = ConfigDict(populate_by_name=True, extra="allow")

    source: None | str | list[str] = Field(default=None, description="Specifies one or more workflow parameters that will provide input to the underlying step parameter.")



[docs]
class WorkflowStepInput(Identified, Sink, Labeled):
    """TODO:"""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    default: None | Any = Field(default=None, description="The default value for this parameter to use if either there is no `source` field, or the value produced by the `source` is `null`.  The default must be applied prior to scattering or evaluating `va...")



[docs]
class Report(BaseModel):
    """Definition of an invocation report for this workflow. Currently the only
field is 'markdown'."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    markdown: str = Field(description="Galaxy flavored Markdown to define an invocation report.")



[docs]
class WorkflowStepOutput(Identified):
    """Associate an output parameter of the underlying process with a workflow
parameter.  The workflow parameter (given in the `id` field) be may be used
as a `source` to connect with input parameters of other workflow steps, or
with an output parameter of the process.

A unique identifier for this workflow output parameter.  This is
the identifier to use in the `source` field of `WorkflowStepInput`
to connect the output value to downstream parameters."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    add_tags: None | list[str] = Field(default=None)
    change_datatype: None | str = Field(default=None)
    delete_intermediate_datasets: None | bool = Field(default=None)
    hide: None | bool = Field(default=None)
    remove_tags: None | list[str] = Field(default=None)
    rename: None | str = Field(default=None)
    set_columns: dict[str, Any] | None = Field(default=None)



[docs]
class BaseComment(BaseModel):
    """Base fields shared by all comment types."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    position: list[float] | None = Field(default=None, description="Position of the comment on the editor canvas as ``[x, y]`` coordinates.")
    size: list[float] | None = Field(default=None, description="Size of the comment as ``[width, height]``.")
    color: None | str = Field(default=None, description="Display color of the comment (e.g. ``\"none\"``, ``\"blue\"``).")
    label: None | str = Field(default=None, description="Optional label for referencing this comment from frame ``contains_comments`` fields or for use as a map key when comments are represented as a mapping.")



[docs]
class TextComment(BaseComment):
    """A plain text annotation in the workflow editor."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    type_: Literal["text"] = Field(default="text", alias="type", description="Comment type (``text``).")
    text: None | str = Field(default=None, description="The text content.")
    bold: None | bool = Field(default=None, description="Whether the text is displayed in bold.")
    italic: None | bool = Field(default=None, description="Whether the text is displayed in italic.")
    text_size: None | float | int = Field(default=None, description="Font size of the text.")



[docs]
class MarkdownComment(BaseComment):
    """A Markdown-rendered annotation in the workflow editor."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    type_: Literal["markdown"] = Field(default="markdown", alias="type", description="Comment type (``markdown``).")
    text: None | str = Field(default=None, description="Markdown content.")



[docs]
class FrameComment(BaseComment):
    """A rectangular grouping box that visually contains steps and other comments."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    type_: Literal["frame"] = Field(default="frame", alias="type", description="Comment type (``frame``).")
    title: None | str = Field(default=None, description="Title displayed on the frame header.")
    contains_steps: None | list[str | int] = Field(default=None, description="Step labels or indices contained within this frame.")
    contains_comments: None | list[str | int] = Field(default=None, description="Comment labels or indices contained within this frame.")



[docs]
class FreehandComment(BaseComment):
    """A freehand drawn line on the editor canvas."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    type_: Literal["freehand"] = Field(default="freehand", alias="type", description="Comment type (``freehand``).")
    thickness: None | float | int = Field(default=None, description="Line thickness.")
    line: list[list[float]] | None = Field(default=None, description="Array of ``[x, y]`` coordinate pairs defining the freehand line path.")



[docs]
class BaseCreator(BaseModel):
    """Base fields shared by all creator types, corresponding to schema.org
Thing properties common to both Person and Organization."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    name: None | str = Field(default=None, description="Full name of the person or organization.")
    identifier: None | str = Field(default=None, description="Persistent identifier, typically an ORCID URL (e.g. ``https://orcid.org/0000-0001-2345-6789``) or bare ORCID.")
    url: None | str = Field(default=None, description="Website or profile URL.")
    email: None | str = Field(default=None, description="Email address. May include a ``mailto:`` prefix.")
    image: None | str = Field(default=None, description="URL to an image or avatar.")
    address: None | str = Field(default=None, description="Physical or mailing address.")
    alternateName: None | str = Field(default=None, description="An alternate name or alias.")
    telephone: None | str = Field(default=None, description="Telephone number.")
    faxNumber: None | str = Field(default=None, description="Fax number.")



[docs]
class CreatorPerson(BaseCreator):
    """A person who created or contributed to the workflow.
Corresponds to a `schema.org Person <https://schema.org/Person>`_."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    class_: Literal["Person"] = Field(default="Person", alias="class", description="Creator type discriminator (``Person``).")
    givenName: None | str = Field(default=None, description="Given (first) name.")
    familyName: None | str = Field(default=None, description="Family (last) name.")
    honorificPrefix: None | str = Field(default=None, description="Honorific prefix (e.g. ``Dr``, ``Prof``).")
    honorificSuffix: None | str = Field(default=None, description="Honorific suffix (e.g. ``M.D.``, ``PhD``).")
    jobTitle: None | str = Field(default=None, description="Job title or role.")



[docs]
class CreatorOrganization(BaseCreator):
    """An organization that created or contributed to the workflow.
Corresponds to a `schema.org Organization <https://schema.org/Organization>`_."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    class_: Literal["Organization"] = Field(default="Organization", alias="class", description="Creator type discriminator (``Organization``).")



[docs]
class GalaxyWorkflow(Process, HasUUID):
    """A Galaxy workflow description. This record corresponds to the description of a workflow that should be executable
on a Galaxy server that includes the contained tool definitions.

The workflows API or the user interface of Galaxy instances that are of version 19.09 or newer should be able to
import a document defining this record.

## A note about `label` field.

This is the name of the workflow in the Galaxy user interface. This is the mechanism that
users will primarily identify the workflow using. Legacy support - this may also be called 'name' and Galaxy will
consume the workflow document fine and treat this attribute correctly - however in order to validate against this
workflow definition schema the attribute should be called `label`."""

    model_config = ConfigDict(populate_by_name=True, extra="allow")

    id: None | str = Field(default=None, description="The unique identifier for this object.")
    class_: Literal["GalaxyWorkflow"] = Field(default="GalaxyWorkflow", alias="class")
    label: None | str = Field(default=None, description="A short, human-readable label of this object.")
    doc: None | str | list[str] = Field(default=None, description="A documentation string for this object, or an array of strings which should be concatenated.")
    steps: list[WorkflowStep] | dict[str, WorkflowStep] = Field(description="The individual steps that make up the workflow. Each step is executed when all of its input data links are fulfilled.")
    report: None | Report = Field(default=None, description="Workflow invocation report template.")
    tags: list[str] | None = Field(default=None, description="Tags for the workflow.")
    comments: list[TextComment | MarkdownComment | FrameComment | FreehandComment] | dict[str, TextComment | MarkdownComment | FrameComment | FreehandComment] | None = Field(default=None, description="Visual annotations for the workflow editor canvas. Comments are non-functional and do not affect workflow execution. May be specified as a list or as a mapping keyed by label.")
    creator: list[CreatorPerson | CreatorOrganization] | None = Field(default=None, description="Workflow creators. Can be schema.org Person (https://schema.org/Person) or Organization (https://schema.org/Organization) entities.")
    license: None | str = Field(default=None, description="Must be a valid license listed at https://spdx.org/licenses/")
    release: None | str = Field(default=None, description="If listed should correspond to the release of the workflow in its source reposiory.")


# Rebuild models for forward references
Documented.model_rebuild()
RecordField.model_rebuild()
RecordSchema.model_rebuild()
EnumSchema.model_rebuild()
ArraySchema.model_rebuild()
Labeled.model_rebuild()
Identified.model_rebuild()
Parameter.model_rebuild()
InputParameter.model_rebuild()
OutputParameter.model_rebuild()
Process.model_rebuild()
HasUUID.model_rebuild()
HasStepErrors.model_rebuild()
HasStepPosition.model_rebuild()
StepPosition.model_rebuild()
ReferencesTool.model_rebuild()
SampleSheetColumnDefinition.model_rebuild()
RecordFieldDefinition.model_rebuild()
WorkflowTextOption.model_rebuild()
ToolShedRepository.model_rebuild()
BaseInputParameter.model_rebuild()
BaseDataParameter.model_rebuild()
WorkflowDataParameter.model_rebuild()
WorkflowCollectionParameter.model_rebuild()
MinMax.model_rebuild()
WorkflowIntegerParameter.model_rebuild()
WorkflowFloatParameter.model_rebuild()
WorkflowTextParameter.model_rebuild()
WorkflowBooleanParameter.model_rebuild()
WorkflowInputParameter.model_rebuild()
WorkflowOutputParameter.model_rebuild()
WorkflowStep.model_rebuild()
Sink.model_rebuild()
WorkflowStepInput.model_rebuild()
Report.model_rebuild()
WorkflowStepOutput.model_rebuild()
BaseComment.model_rebuild()
TextComment.model_rebuild()
MarkdownComment.model_rebuild()
FrameComment.model_rebuild()
FreehandComment.model_rebuild()
BaseCreator.model_rebuild()
CreatorPerson.model_rebuild()
CreatorOrganization.model_rebuild()
GalaxyWorkflow.model_rebuild()



[docs]
def load_document(path: str | Path) -> GalaxyWorkflow | list[GalaxyWorkflow]:
    """Load and validate a document from a JSON file."""
    with open(path) as f:
        data = json.load(f)
    if isinstance(data, list):
        return [_load_single(item) for item in data]
    return _load_single(data)



def _load_single(data: dict[str, Any]) -> GalaxyWorkflow:
    """Load a single document dict."""
    return GalaxyWorkflow.model_validate(data)


_INPUT_TYPE_TO_CLASS: dict[str, type[BaseInputParameter]] = {
    "data": WorkflowDataParameter,
    "File": WorkflowDataParameter,
    "data_input": WorkflowDataParameter,
    "collection": WorkflowCollectionParameter,
    "data_collection": WorkflowCollectionParameter,
    "data_collection_input": WorkflowCollectionParameter,
    "integer": WorkflowIntegerParameter,
    "int": WorkflowIntegerParameter,
    "text": WorkflowTextParameter,
    "string": WorkflowTextParameter,
    "float": WorkflowFloatParameter,
    "boolean": WorkflowBooleanParameter,
    "color": WorkflowTextParameter,
}



[docs]
def input_parameter_class(type_value: str | None) -> type[BaseInputParameter]:
    """Return the specific input parameter class for a Format2 type string.

    Falls back to WorkflowDataParameter for unknown or None types.
    """
    if type_value is None:
        return WorkflowDataParameter
    return _INPUT_TYPE_TO_CLASS.get(type_value, WorkflowDataParameter)