I want to add dependency packages in my sagemaker pipeline which will be used in Preprocess step.
I have tried to add it in required_packages
in setup.py
file but it's not working.
I think setup.py file is no use of at all.
required_packages = ["sagemaker==2.93.0", "matplotlib"]
Preprocessing steps:
sklearn_processor = SKLearnProcessor(
framework_version="0.23-1",
instance_type=processing_instance_type,
instance_count=processing_instance_count,
base_job_name=f"{base_job_prefix}/job-name",
sagemaker_session=pipeline_session,
role=role,
)
step_args = sklearn_processor.run(
outputs=[
ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
],
code=os.path.join(BASE_DIR, "preprocess.py"),
arguments=["--input-data", input_data],
)
step_process = ProcessingStep(
name="PreprocessSidData",
step_args=step_args,
)
Pipeline definition:
pipeline = Pipeline(
name=pipeline_name,
parameters=[
processing_instance_type,
processing_instance_count,
training_instance_type,
model_approval_status,
input_data,
],
steps=[step_process],
sagemaker_session=pipeline_session,
)