Updated on 2024-08-14 GMT+08:00

Examples

There are seven scenarios:

  • Using an algorithm subscribed to in AI Gallery
  • Using an algorithm in Algorithm Management
  • Using a custom algorithm (code directory+boot file+official image)
  • Using a custom algorithm (code directory+boot command+official image)
  • Creating a job phase based on the dataset release phase
  • Job phase with visualization
  • Using the DataSelector object as the input, which supports OBS or datasets

Using an Algorithm Subscribed to in AI Gallery

from modelarts import workflow as wf

# Create an OutputStorage object to centrally manage training output directories.
storage = wf.data.OutputStorage(name="storage_name", title="title_info", description="description_info") # Only name is mandatory.

# Define the input dataset.
dataset = wf.data.DatasetPlaceholder(name="input_dataset")

# Use JobStep to define a training phase. Use a dataset as the input, and use OBS to store the output.
job_step = wf.steps.JobStep(
    name="training_job", # Name of a training phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
    title="Image classification training", # Title, which defaults to the value of name.
    algorithm=wf.AIGalleryAlgorithm(
        subscription_id="subscription_id", # algorithm subscription ID. You can also enter the version number.
        item_version_id="item_version_id", # Algorithm version ID. You can also enter the version number instead.
        parameters=[
            wf.AlgorithmParameters(
                name="parameter_name", 
                value=wf.Placeholder(name="parameter_name", placeholder_type=wf.PlaceholderType.STR, default="fake_value",description="description_info")
            ) # Algorithm hyperparameters are represented using placeholders, which can be integer, bool, float, or string.
        ]
    ), # Algorithm used for training. An algorithm subscribed to in AI Gallery is used in this example. If the value of an algorithm hyperparameter does not need to be changed, you do not need to configure the hyperparameter in parameters. Hyperparameter values will be automatically filled.
    
    inputs=wf.steps.JobInput(name="data_url", data=dataset), # JobStep inputs are configured when the workflow is running. You can also use wf.data.Dataset(dataset_name="fake_dataset_name", version_name="fake_version_name") for the data field.
    outputs=wf.steps.JobOutput(name="train_url", obs_config=wf.data.OBSOutputConfig(obs_path=storage.join("directory_path"))), # JobStep outputs
    spec=wf.steps.JobSpec(
        resource=wf.steps.JobResource(
            flavor=wf.Placeholder(name="train_flavor", placeholder_type=wf.PlaceholderType.JSON, description="Training flavor")
           
        )
    )# Training flavors
)

workflow = wf.Workflow(
    name="job-step-demo",
    desc="this is a demo workflow",
    steps=[job_step],
    storages=[storage]
)

Using an algorithm in Algorithm Management

from modelarts import workflow as wf

# Create an OutputStorage object to centrally manage training output directories.
storage = wf.data.OutputStorage(name="storage_name", title="title_info", description="description_info") # Only name is mandatory.

# Define the input dataset.
dataset = wf.data.DatasetPlaceholder(name="input_dataset")

# Use JobStep to define a training phase. Use a dataset as the input, and use OBS to store the output.
job_step = wf.steps.JobStep(
    name="training_job", # Name of a training phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
    title="Image classification training", # Title, which defaults to the value of name.
    algorithm=wf.Algorithm(
        algorithm_id="algorithm_id", # Algorithm ID
        parameters=[
            wf.AlgorithmParameters(
                name="parameter_name", 
                value=wf.Placeholder(name="parameter_name", placeholder_type=wf.PlaceholderType.STR, default="fake_value",description="description_info")
            ) # Algorithm hyperparameters are represented using placeholders, which can be integer, bool, float, or string.
        ]
    ), # Algorithm used for training. An algorithm from Algorithm Management is used in this example. If the value of an algorithm hyperparameter does not need to be changed, you do not need to configure the hyperparameter in parameters. Hyperparameter values will be automatically filled.

    inputs=wf.steps.JobInput(name="data_url", data=dataset), # JobStep inputs are configured when the workflow is running. You can also use wf.data.Dataset(dataset_name="fake_dataset_name", version_name="fake_version_name") for the data field.
    outputs=wf.steps.JobOutput(name="train_url", obs_config=wf.data.OBSOutputConfig(obs_path=storage.join("directory_path"))), # JobStep outputs
    spec=wf.steps.JobSpec(
        resource=wf.steps.JobResource(
            flavor=wf.Placeholder(name="train_flavor", placeholder_type=wf.PlaceholderType.JSON, description="Training flavor")

        )
    )# Training flavors
)

workflow = wf.Workflow(
    name="job-step-demo",
    desc="this is a demo workflow",
    steps=[job_step],
    storages=[storage]
)

Using a Custom Algorithm (Code Directory+Boot File+Official Image)

from modelarts import workflow as wf

# Create an OutputStorage object to centrally manage training output directories.
storage = wf.data.OutputStorage(name="storage_name", title="title_info", description="description_info") # Only name is mandatory.

# Define the input dataset.
dataset = wf.data.DatasetPlaceholder(name="input_dataset")

# Use JobStep to define a training phase. Use a dataset as the input, and use OBS to store the output.
job_step = wf.steps.JobStep(
    name="training_job", # Name of a training phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
    title="Image classification training", # Title, which defaults to the value of name.
    algorithm=wf.BaseAlgorithm(
        code_dir="fake_code_dir", # Code directory
        boot_file="fake_boot_file", # Boot file path, which must be in the code directory
        engine=wf.steps.JobEngine(engine_name="fake_engine_name", engine_version="fake_engine_version"), # Name and version of the official image

        parameters=[
            wf.AlgorithmParameters(
                name="parameter_name", 
                value=wf.Placeholder(name="parameter_name", placeholder_type=wf.PlaceholderType.STR, default="fake_value",description="description_info")
            ) # Algorithm hyperparameters are represented using placeholders, which can be integer, bool, float, or string.
        ]
    ), # The custom algorithm is implemented using the code directory, boot file, and official image.

    
    inputs=wf.steps.JobInput(name="data_url", data=dataset), # JobStep inputs are configured when the workflow is running. You can also use wf.data.Dataset(dataset_name="fake_dataset_name", version_name="fake_version_name") for the data field.
    outputs=wf.steps.JobOutput(name="train_url", obs_config=wf.data.OBSOutputConfig(obs_path=storage.join("directory_path"))), # JobStep outputs
    spec=wf.steps.JobSpec(
        resource=wf.steps.JobResource(
            flavor=wf.Placeholder(name="train_flavor", placeholder_type=wf.PlaceholderType.JSON, description="Training flavor")
            
        )
    )# Training flavors
)

workflow = wf.Workflow(
    name="job-step-demo",
    desc="this is a demo workflow",
    steps=[job_step],
    storages=[storage]
)

Using a Custom Algorithm (Code Directory+Boot Command+Official Image)

from modelarts import workflow as wf

# Create an OutputStorage object to centrally manage training output directories.
storage = wf.data.OutputStorage(name="storage_name", title="title_info", description="description_info") # Only name is mandatory.

# Define the input dataset.
dataset = wf.data.DatasetPlaceholder(name="input_dataset")

# Use JobStep to define a training phase. Use a dataset as the input, and use OBS to store the output.
job_step = wf.steps.JobStep(
    name="training_job", # Name of a training phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
    title="Image classification training", # Title, which defaults to the value of name.
    algorithm=wf.BaseAlgorithm(
        code_dir="fake_code_dir", # Code directory
        command="fake_command", # Boot command
        engine=wf.steps.JobEngine(image_url="fake_image_url"), # Custom image URL, in the format of Organization name/Image name:Version name. Do not contain the domain name; If image_url is required to be configurable in the running state, use the following: image_url=wf.Placeholder(name="image_url", placeholder_type=wf.PlaceholderType.STR, placeholder_format="swr", description="Custom image")
        parameters=[
            wf.AlgorithmParameters(
                name="parameter_name", 
                value=wf.Placeholder(name="parameter_name", placeholder_type=wf.PlaceholderType.STR, default="fake_value",description="description_info")
            ) # Algorithm hyperparameters are represented using placeholders, which can be integer, bool, float, or string.
        ]
    ), The custom algorithm is implemented using the code directory, boot command, and official image.

    inputs=wf.steps.JobInput(name="data_url", data=dataset), # JobStep inputs are configured when the workflow is running. You can also use wf.data.Dataset(dataset_name="fake_dataset_name", version_name="fake_version_name") for the data field.
    outputs=wf.steps.JobOutput(name="train_url", obs_config=wf.data.OBSOutputConfig(obs_path=storage.join("directory_path"))), # JobStep outputs
    spec=wf.steps.JobSpec(
        resource=wf.steps.JobResource(
            flavor=wf.Placeholder(name="train_flavor", placeholder_type=wf.PlaceholderType.JSON, description="Training flavor")
            
        )
    )# Training flavors
)

workflow = wf.Workflow(
    name="job-step-demo",
    desc="this is a demo workflow",
    steps=[job_step],
    storages=[storage]
)

The preceding four methods use a dataset as the input. If you want to use an OBS path as the input, set data of JobInput to data=wf.data.OBSPlaceholder(name="obs_placeholder_name", object_type="directory") or data=wf.data.OBSPath(obs_path="fake_obs_path").

In addition, you can specify a dataset or OBS path when creating a workflow to reduce configuration operations and facilitate debugging in the development state. You are advised to use placeholders to create a workflow you want to publish to the running state or AI Gallery. In this case, you can configure parameters before workflow execution.

Creating a Job Phase Based on the Dataset Release Phase

Scenario: The output of the dataset release phase is used as the input of the job phase.

from modelarts import workflow as wf

# Define the dataset object.
dataset = wf.data.DatasetPlaceholder(name="input_dataset")

# Define the split ratio between the training set and validation set
train_ration = wf.Placeholder(name="placeholder_name", placeholder_type=wf.PlaceholderType.STR, default="0.8")

release_version_step = wf.steps.ReleaseDatasetStep(
    name="release_dataset", # Name of the dataset release phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
    title="Dataset version release", # Title, which defaults to the value of name
    inputs=wf.steps.ReleaseDatasetInput(name="input_name", data=dataset), # ReleaseDatasetStep inputs. The dataset object is configured when the workflow is running. You can also use wf.data.Dataset(dataset_name="dataset_name") for the data field.
    outputs=wf.steps.ReleaseDatasetOutput(
        name="output_name", 
        dataset_version_config=wf.data.DatasetVersionConfig(
            label_task_type=wf.data.LabelTaskTypeEnum.IMAGE_CLASSIFICATION,  # Labeling job type for dataset version release
            train_evaluate_sample_ratio=train_ration # Split ratio between the training set and validation set
            )
    ) # ReleaseDatasetStep outputs
)

# Create an OutputStorage object to centrally manage training output directories.
storage = wf.data.OutputStorage(name="storage_name", title="title_info", description="description_info") # Only name is mandatory.

# Use JobStep to define a training phase. Use a dataset as the input, and use OBS to store the output.
job_step = wf.steps.JobStep(
    name="training_job", # Name of a training phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
    title="Image classification training", # Title, which defaults to the value of name.
    algorithm=wf.AIGalleryAlgorithm(
        subscription_id="subscription_id", # Subscription ID of the subscribed algorithm
        item_version_id="item_version_id", # Version ID of the subscribed algorithm
        parameters=[
            wf.AlgorithmParameters(
                name="parameter_name", 
                value=wf.Placeholder(name="parameter_name", placeholder_type=wf.PlaceholderType.STR, default="fake_value",description="description_info")
            ) # Algorithm hyperparameters are represented using placeholders, which can be integer, bool, float, or string.
        ]
    ), # Algorithm used for training. An algorithm subscribed to in AI Gallery is used in this example. If the value of an algorithm hyperparameter does not need to be changed, you do not need to configure the hyperparameter in parameters. Hyperparameter values will be automatically filled.

    
    inputs=wf.steps.JobInput(name="data_url", data=release_version_step.outputs["output_name"].as_input()), # The output of the dataset release phase is used as the input of JobStep.
    outputs=wf.steps.JobOutput(name="train_url", obs_config=wf.data.OBSOutputConfig(obs_path=storage.join("directory_path"))), # JobStep outputs
    spec=wf.steps.JobSpec(
        resource=wf.steps.JobResource(
            flavor=wf.Placeholder(name="train_flavor", placeholder_type=wf.PlaceholderType.JSON, description="Training flavor")
            
        )
    ), # Training flavors
    depend_steps=release_version_step # Preceding dataset release phase
)
# release_version_step is an instance object of wf.steps.ReleaseDatasetStep and output_name is the value of the name field of wf.steps.ReleaseDatasetOutput.

workflow = wf.Workflow(
    name="job-step-demo",
    desc="this is a demo workflow",
    steps=[release_version_step, job_step],
    storages=[storage]
)

Job Phase With Visualization

Phase visualization enables you to view the metrics generated by your workflows in real time. You can also display the external disks of each phase separately. To use phase visualization, you need to add and configure an output for showing metrics through the MetricsConfig object, based on the original job phase.

Table 1 MetricsConfig

Parameter

Description

Mandatory

Data Type

metric_files

Metric files

Yes

List. Elements in the list support string, placeholder, or storage.

realtime_visualization

Whether to display the output metrics in real time

No

Bool. The default value is False.

visualization

Whether to display visualization phases separately

No

Bool. The default value is True.

The output metrics file must contain standard JSON data with a maximum size of 1 MB. The data formats must match the supported ones.

  • Key-value pair data
    [
        {
            "key": "loss",
            "title": "loss",
            "type": "float",
            "data": {
                "value": 1.2
            }
        },
        {
            "key": "accuracy",
            "title": "accuracy",
            "type": "float",
            "data": {
                "value": 1.6
            }
        }
    ]

  • Line chart data
    [
        {
            "key": "metric",
            "title": "metric",
            "type": "line chart",
            "data": {
                "x_axis": [
                    {
                        "title": "step/epoch",
                        "value": [
                            1,
                            2,
                            3
                        ]
                    }
                ],
                "y_axis": [
                    {
                        "title": "value",
                        "value": [
                            0.5,
                            0.4,
                            0.3
                        ]
                    }
                ]
            }
        }
    ]

  • Histogram data
    [
        {
            "key": "metric",
            "title": "metric",
            "type": "histogram",
            "data": {
                "x_axis": [
                    {
                        "title": "step/epoch",
                        "value": [
                            1,
                            2,
                            3
                        ]
                    }
                ],
                "y_axis": [
                    {
                        "title": "value",
                        "value": [
                            0.5,
                            0.4,
                            0.3
                        ]
                    }
                ]
            }
        }
    ]
  • Confusion matrix
    [
        {
            "key": "confusion_matrix",
            "title": "confusion_matrix",
            "type": "table",
            "data": {
                "cell_value": [
                    [
                        1,
                        2
                    ],
                    [
                        2,
                        3
                    ]
                ],
                "col_labels": {
                    "title": "labels",
                    "value": [
                        "daisy",
                        "dandelion"
                    ]
                },
                "row_labels": {
                    "title": "predictions",
                    "value": [
                        "daisy",
                        "dandelion"
                    ]
                }
            }
        }
    ]

  • One-dimensional table
    [
        {
            "key": "Application Evaluation Results",
            "title": "Application Evaluation Results",
            "type": "one-dimensional-table",
            "data": {
                "cell_value": [
                    [
                        10,
                        2,
                        0.5
                    ]
                ],
                "labels": [
                    "samples",
                    "maxResTine",
                    "p99"
                ]
            }
        }
    ]
    Example:
    from modelarts import workflow as wf
    
    # Create a Storage object to centrally manage training output directories.
    storage = wf.data.Storage(name="storage_name", title="title_info", description="description_info", with_execution_id=True, create_dir=True) # Only name is mandatory.
    
    # Define the input dataset.
    dataset = wf.data.DatasetPlaceholder(name="input_dataset")
    
    # Use JobStep to define a training phase. Use a dataset as the input, and use OBS to store the output.
    job_step = wf.steps.JobStep(
        name="training_job", # Name of a training phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
        title="Image classification training", # Title, which defaults to the value of name.
        algorithm=wf.AIGalleryAlgorithm(
            subscription_id="subscription_id", # Subscription ID of the subscribed algorithm
            item_version_id="item_version_id", # Algorithm version ID. You can also enter the version number instead.
            parameters=[
                wf.AlgorithmParameters(
                    name="parameter_name", 
                    value=wf.Placeholder(name="parameter_name", placeholder_type=wf.PlaceholderType.STR, default="fake_value",description="description_info")
                ) # Algorithm hyperparameters are represented using placeholders, which can be integer, bool, float, or string.
            ]
    
        ), # Algorithm used for training. An algorithm subscribed to in AI Gallery is used in this example. If the value of an algorithm hyperparameter does not need to be changed, you do not need to configure the hyperparameter in parameters. Hyperparameter values will be automatically filled.
    
        
        inputs=wf.steps.JobInput(name="data_url", data=dataset), # JobStep inputs are configured when the workflow is running. You can also use wf.data.Dataset(dataset_name="fake_dataset_name", version_name="fake_version_name") for the data field.
        outputs=[
        wf.steps.JobOutput(name="train_url", obs_config=wf.data.OBSOutputConfig(obs_path=storage.join("directory_path"))),# JobStep outputs
            wf.steps.JobOutput(name="metrics_output", metrics_config=wf.data.MetricsConfig(metric_files=storage.join("directory_path/metrics.json", create_dir=False))) # Metrics are output to the configured path by the job script.
        ], 
        spec=wf.steps.JobSpec(
            resource=wf.steps.JobResource(
                flavor=wf.Placeholder(name="train_flavor", placeholder_type=wf.PlaceholderType.JSON, description="Training flavor")
                
            )
        )# Training flavors
    )
    
    workflow = wf.Workflow(
        name="job-step-demo",
        desc="this is a demo workflow",
        steps=[job_step],
        storages=[storage]
    )

Workflow does not automatically retrieve the metrics produced by training. You need to extract the metrics from the algorithm code, create the metrics.json file in the required data format, and upload the file to the OBS path specified in MetricsConfig. Workflow only reads, renders, and displays the data.

Using the DataSelector Object as the Input, Which Supports OBS or Datasets

You can use this method when you can choose the input type. The DataSelector object allows you to select either a dataset object or an OBS object as the training input. Here is a code example:

from modelarts import workflow as wf

# Create an OutputStorage object to centrally manage training output directories.
storage = wf.data.OutputStorage(name="storage_name", title="title_info", description="description_info") # Only name is mandatory.

# Define the DataSelector object.
data_selector = wf.data.DataSelector(name="input_data", data_type_list=["dataset", "obs"])

# Use JobStep to define a training phase. Use a dataset as the input, and use OBS to store the output.
job_step = wf.steps.JobStep(
    name="training_job", # Name of a training phase. The name contains a maximum of 64 characters, including only letters, digits, underscores (_), and hyphens (-). It must start with a letter and must be unique in a workflow.
    title="Image classification training", # Title, which defaults to the value of name.
    algorithm=wf.AIGalleryAlgorithm(
        subscription_id="subscription_id", # algorithm subscription ID. You can also enter the version number.
        item_version_id="item_version_id", # Algorithm version ID. You can also enter the version number instead.
        parameters=[
            wf.AlgorithmParameters(
                name="parameter_name", 
                value=wf.Placeholder(name="parameter_name", placeholder_type=wf.PlaceholderType.STR, default="fake_value",description="description_info")
            ) # Algorithm hyperparameters are represented using placeholders, which can be integer, bool, float, or string.
        ]
    ), # Algorithm used for training. An algorithm subscribed to in AI Gallery is used in this example. If the value of an algorithm hyperparameter does not need to be changed, you do not need to configure the hyperparameter in parameters. Hyperparameter values will be automatically filled.
    
    inputs=wf.steps.JobInput(name="data_url", data=data_selector), # JobStep inputs are configured when the workflow is running. You can choose OBS or datasets as the input.
    outputs=wf.steps.JobOutput(name="train_url", obs_config=wf.data.OBSOutputConfig(obs_path=storage.join("directory_path"))), # JobStep outputs
    spec=wf.steps.JobSpec(
        resource=wf.steps.JobResource(
            flavor=wf.Placeholder(name="train_flavor", placeholder_type=wf.PlaceholderType.JSON, description="Training flavor")
           
        )
    )# Training flavors
)

workflow = wf.Workflow(
    name="job-step-demo",
    desc="this is a demo workflow",
    steps=[job_step],
    storages=[storage]
)

When using DataSelector as the input, ensure that the algorithm input supports both datasets and OBS.