Cant deploy the trained model on Azure ML service

Question

Unable to deploy the trained model using Azure Machine Learning SDK v2

I have created and trained the model whose directory looks something like this -

The main model script is 'OpenTag2018' and that script is importing other scripts from the subfolder.

I have also created the sore.py that runs when the model is being deployed.

import logging
import os
import json
import mlflow
from io import StringIO
from mlflow.pyfunc.scoring_server import infer_and_parse_json_input, predictions_to_json
import sys
from time import strftime, localtime
from collections import Counter
from config import opt
from pytorch_transformers import BertTokenizer
import random
import numpy as np 
import torch 
from tqdm import tqdm

def init():
    global model
    # "model" is the path of the mlflow artifacts when the model was registered. For automl
    # models, this is generally "mlflow-model".
    model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "use-case1-model")
    model = mlflow.pyfunc.load_model(model_path)
    logging.info("Init complete")


def run(raw_data):
    data = json.loads(raw_data)
    title = data["title"]
    att = data["attributes"]

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    def is_english_char(cp):
        """Checks whether CP is the codepoint of an English character."""
        if ((cp >= 0x0041 and cp <= 0x005A) or  # uppercase A-Z
            (cp >= 0x0061 and cp <= 0x007A) or  # lowercase a-z
            (cp >= 0x00C0 and cp <= 0x00FF) or  # Latin-1 Supplement
            (cp >= 0x0100 and cp <= 0x017F) or  # Latin Extended-A
            (cp >= 0x0180 and cp <= 0x024F) or  # Latin Extended-B
            (cp >= 0x1E00 and cp <= 0x1EFF) or  # Latin Extended Additional
            (cp >= 0x2C60 and cp <= 0x2C7F) or  # Latin Extended-C
            (cp >= 0xA720 and cp <= 0xA7FF) or  # Latin Extended-D
            (cp >= 0xAB30 and cp <= 0xAB6F) or  # Latin Extended-E
            (cp >= 0xFB00 and cp <= 0xFB06)):  # Alphabetic Presentation Forms
            return True

        return False

    max_len = 40
    def X_padding(ids):
        if len(ids) >= max_len:  
            return ids[:max_len]
        ids.extend([0]*(max_len-len(ids))) 
        return ids

    tag_max_len = 6
    def tag_padding(ids):
        if len(ids) >= tag_max_len: 
            return ids[:tag_max_len]
        ids.extend([0]*(tag_max_len-len(ids))) 
        return ids


    def nobert4token(tokenizer, title, attribute):

     def get_char(sent):
        tmp = []
        s = ''
        for char in sent.strip():
            if char.strip():
                cp = ord(char)
                if is_english_char(cp):
                    if s:
                        tmp.append(s)
                    tmp.append(char)
                    s = ''
                else:
                    s += char
            elif s:
                tmp.append(s)
                s = ''
        if s:
            tmp.append(s)
        return tmp

        title_list = get_char(title)
        attribute_list = get_char(attribute)
        print(title_list)



        title_list = tokenizer.convert_tokens_to_ids(title_list)
        attribute_list = tokenizer.convert_tokens_to_ids(attribute_list)
        print(title_list)

        return title_list, attribute_list



    t , a = nobert4token(tokenizer, title.lower(), att[0])

    x = X_padding(t)
    y = tag_padding(a)

    tensor_a = torch.tensor(y, dtype=torch.int32)
    tensor_a = torch.unsqueeze(tensor_a, dim=0).to('cuda')

    tensor_t = torch.tensor(x, dtype=torch.int32)
    tensor_t = torch.unsqueeze(tensor_t, dim=0).to('cuda')

    output = model.predict([tensor_t,tensor_a])

    predict_list = output.tolist()[0]

    for i in range(len(predict_list)):
                start_p, end_p= 0,0
                for index,value in enumerate(predict_list[i]):
                    if value == 1:
                        start_p = index
                        j = index
                        while(predict_list[i][j]!=3):
                            j = j + 1
                            end_p = j
                preds = (tensor_t[i][start_p : end_p])
                words_p = tokenizer.convert_ids_to_tokens([i.item() for i in preds.cpu() if i.item()>0])
    return ' '.join(words_p)

This is how my function calls look like -

from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    CodeConfiguration,
    Environment,
)
from azure.identity import DefaultAzureCredential

# picking the model to deploy. Here we use the latest version of our registered model
model = ml_client.models.get(name="use-case1-model", version=latest_model_version)


# create an online deployment.
red_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    code_configuration=CodeConfiguration(
        code="./dependencies",
        scoring_script="score.py"
    ),
    environment=env,
    instance_type="Standard_F4s_v2",
    instance_count=1,
)

red_deployment_results = ml_client.online_deployments.begin_create_or_update(
    red_deployment
).result()

print(
    f"Deployment {red_deployment_results.name} provisioning state: {red_deployment_results.provisioning_state}"
)

This is how I saved the model using mlflow-

    # Registering the model to the workspace
    mlflow.pytorch.log_model(
        pytorch_model= model,
        registered_model_name="use-case1-model",
        artifact_path="use-case1-model",
        input_example=df[['Title', 'Attributes']],
        conda_env=os.path.join("./dependencies", "conda.yaml")
    )

    # Saving the model to a file
    mlflow.pytorch.save_model(
        pytorch_model= model,
        conda_env=os.path.join("./dependencies", "conda.yaml"),
        input_example=df[['Title', 'Attributes']],
        path=os.path.join(args.model, "use-case1-model")
    )

My Environment file -

channels:
  - conda-forge
dependencies:
  - python=3.8
  - pip=22.1.2
  - numpy=1.21.2
  - scikit-learn=0.24.2
  - scipy=1.7.1
  - 'pandas>=1.1,<1.2'
  - pytorch=1.10.0
  - pip:
      - 'inference-schema[numpy-support]==1.5.0'
      - xlrd==2.0.1
      - mlflow== 1.26.1
      - azureml-mlflow==1.42.0
      - tqdm==4.63.0
      - pytorch-transformers==1.2.0
      - pytorch-lightning==2.0.2
      - seqeval==1.2.2
      - azureml-inference-server-http==0.8.0
name: model-env

And lastly deployment logs shows this:

Instance status:
SystemSetup: Succeeded
UserContainerImagePull: Succeeded
ModelDownload: Succeeded
UserContainerStart: InProgress

Container events:
Kind: Pod, Name: Pulling, Type: Normal, Time: 2023-05-23T01:17:31.726303Z, Message: Start pulling container image
Kind: Pod, Name: Downloading, Type: Normal, Time: 2023-05-23T01:17:32.697829Z, Message: Start downloading models
Kind: Pod, Name: Pulled, Type: Normal, Time: 2023-05-23T01:20:06.535632Z, Message: Container image is pulled successfully
Kind: Pod, Name: Downloaded, Type: Normal, Time: 2023-05-23T01:20:06.535632Z, Message: Models are downloaded successfully
Kind: Pod, Name: Created, Type: Normal, Time: 2023-05-23T01:20:06.691742Z, Message: Created container inference-server
Kind: Pod, Name: Started, Type: Normal, Time: 2023-05-23T01:20:06.755508Z, Message: Started container inference-server

Container logs:
2023-05-23T01:20:06,767937802+00:00 - rsyslog/run 
2023-05-23T01:20:06,772188056+00:00 - gunicorn/run 
2023-05-23T01:20:06,773563973+00:00 - nginx/run 
2023-05-23T01:20:06,774047779+00:00 | gunicorn/run | 
2023-05-23T01:20:06,775608299+00:00 | gunicorn/run | ###############################################
2023-05-23T01:20:06,777286120+00:00 | gunicorn/run | AzureML Container Runtime Information
2023-05-23T01:20:06,779026742+00:00 | gunicorn/run | ###############################################
2023-05-23T01:20:06,780637662+00:00 | gunicorn/run | 
2023-05-23T01:20:06,782440485+00:00 | gunicorn/run | 
2023-05-23T01:20:06,786468236+00:00 | gunicorn/run | AzureML image information: openmpi4.1.0-ubuntu20.04, Materializaton Build:20230509.v1
2023-05-23T01:20:06,788041356+00:00 | gunicorn/run | 
2023-05-23T01:20:06,789705877+00:00 | gunicorn/run | 
2023-05-23T01:20:06,791375398+00:00 | gunicorn/run | PATH environment variable: /azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1/bin:/opt/miniconda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
2023-05-23T01:20:06,793025919+00:00 | gunicorn/run | PYTHONPATH environment variable: 
2023-05-23T01:20:06,794927543+00:00 | gunicorn/run | 
2023-05-23T01:20:07,287148945+00:00 | gunicorn/run | CONDAPATH environment variable: /opt/miniconda

# conda environments:
#
                      *  /azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1
base                     /opt/miniconda

2023-05-23T01:20:08,175873674+00:00 | gunicorn/run | 
2023-05-23T01:20:08,177405680+00:00 | gunicorn/run | Pip Dependencies (before dynamic installation)

adal==1.2.7
aiohttp==3.8.4
aiosignal==1.3.1
alembic==1.11.1
argcomplete==2.1.2
async-timeout==4.0.2
attrs==23.1.0
azure-common==1.1.28
azure-core==1.22.1
azure-graphrbac==0.61.1
azure-identity==1.13.0
azure-mgmt-authorization==2.0.0
azure-mgmt-containerregistry==9.1.0
azure-mgmt-core==1.3.0
azure-mgmt-keyvault==9.3.0
azure-mgmt-resource==21.0.0
azure-mgmt-storage==20.0.0
azureml-core==1.42.0.post1
azureml-inference-server-http==0.8.0
azureml-mlflow==1.42.0
backports.tempfile==1.0
backports.weakref==1.0.post1
bcrypt==4.0.1
boto3==1.26.138
botocore==1.29.138
cachetools==5.3.0
certifi==2023.5.7
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1671179356964/work
charset-normalizer==3.1.0
click==8.1.3
cloudpickle==2.2.1
cmake==3.26.3
contextlib2==21.6.0
cryptography==36.0.2
databricks-cli==0.17.7
docker==5.0.3
entrypoints==0.4
filelock==3.12.0
Flask==2.2.5
Flask-Cors==3.0.10
frozenlist==1.3.3
fsspec==2023.5.0
future @ file:///home/conda/feedstock_root/build_artifacts/future_1673596611778/work
gitdb==4.0.10
GitPython==3.1.31
google-api-core==2.11.0
google-auth==2.18.1
googleapis-common-protos==1.59.0
greenlet==2.0.2
gunicorn==20.1.0
humanfriendly==10.0
idna==3.4
importlib-metadata==6.6.0
importlib-resources==5.12.0
inference-schema==1.5
isodate==0.6.1
itsdangerous==2.1.2
jeepney==0.8.0
Jinja2==3.1.2
jmespath==1.0.0
joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1663332044897/work
jsonpickle==2.2.0
knack==0.9.0
lightning-utilities==0.8.0
lit==16.0.5
Mako==1.2.4
MarkupSafe==2.1.2
mlflow==1.26.1
mlflow-skinny==2.3.2
mpmath==1.3.0
msal==1.22.0
msal-extensions==1.0.0
msrest==0.6.21
msrestazure==0.6.4
multidict==6.0.4
ndg-httpsclient==0.5.1
networkx==3.1
numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1629092056723/work
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-cupti-cu11==11.7.101
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
nvidia-cufft-cu11==10.9.0.58
nvidia-curand-cu11==10.2.10.91
nvidia-cusolver-cu11==11.4.0.1
nvidia-cusparse-cu11==11.7.4.91
nvidia-nccl-cu11==2.14.3
nvidia-nvtx-cu11==11.7.91
oauthlib==3.2.2
opencensus==0.11.2
opencensus-context==0.1.3
opencensus-ext-azure==1.1.9
packaging==21.3
pandas==1.1.5
paramiko==2.12.0
pathspec==0.11.1
pkginfo==1.9.6
portalocker==2.7.0
prometheus-client==0.16.0
prometheus-flask-exporter==0.22.4
protobuf==4.23.1
psutil==5.9.5
pyasn1==0.5.0
pyasn1-modules==0.3.0
pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
pydantic==1.10.7
Pygments==2.15.1
PyJWT==2.7.0
PyNaCl==1.5.0
pyOpenSSL==22.0.0
pyparsing==3.0.9
PySocks==1.7.1
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
pytorch-lightning==2.0.2
pytorch-transformers==1.2.0
pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1680088766131/work
PyYAML==6.0
querystring-parser==1.2.4
regex==2023.5.5
requests==2.31.0
requests-oauthlib==1.3.1
rsa==4.9
s3transfer==0.6.1
sacremoses==0.0.53
scikit-learn @ file:///home/conda/feedstock_root/build_artifacts/scikit-learn_1630910537183/work
scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy_1628206382406/work
SecretStorage==3.3.3
sentencepiece==0.1.99
seqeval==1.2.2
six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
smmap==5.0.0
SQLAlchemy==2.0.15
sqlparse==0.4.4
sympy==1.12
tabulate==0.9.0
threadpoolctl @ file:///home/conda/feedstock_root/build_artifacts/threadpoolctl_1643647933166/work
torch==2.0.1
torchmetrics==0.11.4
tqdm==4.63.0
triton==2.0.0
typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1678559861143/work
urllib3==1.26.9
websocket-client==1.5.2
Werkzeug==2.3.4
wrapt==1.12.1
xlrd==2.0.1
yarl==1.9.2
zipp==3.15.0

2023-05-23T01:20:08,836735094+00:00 | gunicorn/run | 
2023-05-23T01:20:08,838657201+00:00 | gunicorn/run | ###############################################
2023-05-23T01:20:08,840420508+00:00 | gunicorn/run | Checking if the Python package azureml-inference-server-http is installed
2023-05-23T01:20:08,842133215+00:00 | gunicorn/run | ###############################################
2023-05-23T01:20:08,843869922+00:00 | gunicorn/run | 
2023-05-23T01:20:09,830826034+00:00 | gunicorn/run | 
2023-05-23T01:20:09,832438140+00:00 | gunicorn/run | ###############################################
2023-05-23T01:20:09,833918246+00:00 | gunicorn/run | AzureML Inference Server
2023-05-23T01:20:09,835327752+00:00 | gunicorn/run | ###############################################
2023-05-23T01:20:09,836711457+00:00 | gunicorn/run | 
2023-05-23T01:20:10,875997877+00:00 | gunicorn/run | Starting AzureML Inference Server HTTP.
2023-05-23 01:20:11,049 I [10] azmlinfsrv - Loaded logging config from /azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1/lib/python3.8/site-packages/azureml_inference_server_http/logging.json
2023-05-23 01:20:11,143 I [10] gunicorn.error - Starting gunicorn 20.1.0
2023-05-23 01:20:11,144 I [10] gunicorn.error - Listening at: http://0.0.0.0:31311 (10)
2023-05-23 01:20:11,144 I [10] gunicorn.error - Using worker: sync
2023-05-23 01:20:11,146 I [70] gunicorn.error - Booting worker with pid: 70

Azure ML Inferencing HTTP server v0.8.0


Server Settings
---------------
Entry Script Name: /var/azureml-app/dependencies/score.py
Model Directory: /var/azureml-app/azureml-models/use-case1-model/3
Worker Count: 1
Worker Timeout (seconds): 300
Server Port: 31311
Application Insights Enabled: false
Application Insights Key: None
Inferencing HTTP server version: azmlinfsrv/0.8.0
CORS for the specified origins: None


Server Routes
---------------
Liveness Probe: GET   127.0.0.1:31311/
Score:          POST  127.0.0.1:31311/score

Initializing logger
2023-05-23 01:20:11,423 I [70] azmlinfsrv - Starting up app insights client
2023-05-23 01:20:12,970 E [70] azmlinfsrv - Traceback (most recent call last):
  File "/azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1/lib/python3.8/site-packages/azureml_inference_server_http/server/user_script.py", line 74, in load_script
    main_module_spec.loader.exec_module(user_module)
  File "<frozen importlib._bootstrap_external>", line 843, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/var/azureml-app/dependencies/score.py", line 10, in <module>
    from config import opt
  File "/azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1/lib/python3.8/site-packages/azureml_inference_server_http/server/config.py", line 8, in <module>
    from ..constants import DEFAULT_APP_ROOT
ImportError: attempted relative import with no known parent package

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1/lib/python3.8/site-packages/azureml_inference_server_http/server/aml_blueprint.py", line 88, in setup
    self.user_script.load_script(config.app_root)
  File "/azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1/lib/python3.8/site-packages/azureml_inference_server_http/server/user_script.py", line 76, in load_script
    raise UserScriptImportException(ex) from ex
azureml_inference_server_http.server.user_script.UserScriptImportException: Failed to import user script because it raised an unhandled exception

2023-05-23 01:20:12,970 I [70] gunicorn.error - Worker exiting (pid: 70)
2023-05-23 01:20:13,162 I [10] gunicorn.error - Shutting down: Master
2023-05-23 01:20:13,163 I [10] gunicorn.error - Reason: Worker failed to boot.

Azure ML Inferencing HTTP server v0.8.0


Server Settings
---------------
Entry Script Name: /var/azureml-app/dependencies/score.py
Model Directory: /var/azureml-app/azureml-models/use-case1-model/3
Worker Count: 1
Worker Timeout (seconds): 300
Server Port: 31311
Application Insights Enabled: false
Application Insights Key: None
Inferencing HTTP server version: azmlinfsrv/0.8.0
CORS for the specified origins: None


Server Routes
---------------
Liveness Probe: GET   127.0.0.1:31311/
Score:          POST  127.0.0.1:31311/score

2023-05-23T01:20:13,206072314+00:00 - gunicorn/finish 3 0
2023-05-23T01:20:13,207564233+00:00 - Exit code 3 is not normal. Killing image.

score 0 · Answer 1 · answered May 24 '23 at 11:24

Based on the Deployment logs provided, you are getting

ImportError: attempted relative import with no known parent package
with
File "/var/azureml-app/dependencies/score.py", line 10, in <module>
from config import opt
File "/azureml-envs/azureml_d587e0800be72e17d773ddca63762cd1/lib/python3.8/site-packages/azureml_inference_server_http/server/config.py", line 8, in <module>
from ..constants import DEFAULT_APP_ROOT

This means that the Python interpreter is unable to find the module that you are trying to import. In this case, you are trying to import the opt variable from the config module.

If you want to access DEFAULT_APP_ROOT, you can directly import this:

from azureml_inference_server_http.server import config
from azureml_inference_server_http.constants import DEFAULT_APP_ROOT

You can refer to this for more details on relative import error.

Cant deploy the trained model on Azure ML service

1 Answers1