diff --git a/.github/workflows/build-test-deploy.yml b/.github/workflows/build-test-deploy.yml index a0072348..87fac0e2 100644 --- a/.github/workflows/build-test-deploy.yml +++ b/.github/workflows/build-test-deploy.yml @@ -22,8 +22,7 @@ jobs: strategy: matrix: python-version: ['3.8', '3.9', '3.10', '3.11'] - os-version: ['ubuntu-20.04', 'windows-latest', 'macos-latest'] -# Pinned Ubuntu version to 20.04 since no Python 3.6 builds available on ubuntu-latest (22.04) as of 2022-12-7. + os-version: ['ubuntu-latest', 'windows-latest', 'macos-latest'] # os-version: [ubuntu-latest, windows-latest, macos-latest] steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index f955f4e9..41a38060 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,35 @@ +v1.11.7 (2026-03-12) +------------- +**Improvements** +- Added `CodeFile` class to pzmm module for creating and uploading Python code files to SAS Intelligent Decisioning + - New method `write_id_code_file()` uploads a Python code file to a specified Viya folder and registers it with the Decisions service + - Accepts code as a raw string, file path, or `Path` object + - Validates code format via the SAS Viya API before upload; can be disabled with `validate_code=False` + - Raises `ValueError` if the file already exists in the target folder, if the folder is not found, or if validation fails + - Cleans up the uploaded file if Decisions service registration fails + - See `examples/pzmm_id_code_file_example.ipynb` for usage examples + +v1.11.6 (2025-11-18) +-------------------- +**Improvements** +- Added `create_requirements_txt` parameter to `create_requirements_json()` function in `write_json_files.py` to optionally generate a requirements.txt file alongside the requirements.json file. + +v1.11.5 (2025-06-27) +-------------------- +**Improvements** +- Added model versioning methods to `model_repository.py` to handle model version endpoints. +- Allow for user to set custom timeout length for score testing in `score_model_with_cas`. + +v1.11.4 (2025-05-02) +-------------------- +**Improvements** +- Improved `upload_local_model` to allow for SAS Model Manager to properly intake local ASTORE models. + +v1.11.3 (2025-04-29) +-------------------- +**Improvements** +- Added `upload_local_model` to `tasks.py`, which can be used to upload local directories to SAS Model Manager without any file generation. + v1.11.2 (2025-04-08) -------------------- **Bugfixes** diff --git a/examples/pzmm_generate_complete_model_card.ipynb b/examples/pzmm_generate_complete_model_card.ipynb index 3a68271b..60124580 100644 --- a/examples/pzmm_generate_complete_model_card.ipynb +++ b/examples/pzmm_generate_complete_model_card.ipynb @@ -578,7 +578,7 @@ " \"MartialStatus_Married_AF_spouse\", 'MartialStatus_Married_civ_spouse', 'MartialStatus_Never_married', 'MartialStatus_Divorced', 'MartialStatus_Separated', \n", " 'MartialStatus_Widowed', 'Race_White', 'Race_Black', 'Race_Asian_Pac_Islander', 'Race_Amer_Indian_Eskimo', 'Race_Other', 'Relationship_Husband', \n", " 'Relationship_Not_in_family', 'Relationship_Own_child', 'Relationship_Unmarried', 'Relationship_Wife', 'Relationship_Other_relative', 'WorkClass_Private',\n", - " 'Education_Bachelors'\n", + " 'Education_Bachelors', 'Education_Some_college', 'Education_HS_grad'\n", " ]\n", " # OHE columns must be removed after data combination\n", " predictor_columns = ['Age', 'HoursPerWeek', 'WorkClass_Private', 'WorkClass_Self', 'WorkClass_Gov', \n", @@ -1716,12 +1716,14 @@ ], "source": [ "# Step 13: Generate requirements files\n", - "requirements_json = pzmm.JSONFiles.create_requirements_json(output_path)\n", + "requirements_json = pzmm.JSONFiles.create_requirements_json(output_path, create_requirements_txt=False)\n", "\n", "import json\n", "print(json.dumps(requirements_json, sort_keys=True, indent=4))\n", "\n", "for requirement in requirements_json:\n", + " # Example: Replace sklearn with scikit-learn in requirements\n", + " # (This is redundant in newer versions but shows how to modify package names)\n", " if 'sklearn' in requirement['step']:\n", " requirement['command'] = requirement[\"command\"].replace('sklearn', 'scikit-learn')\n", " requirement['step'] = requirement['step'].replace('sklearn', 'scikit-learn')\n", diff --git a/examples/pzmm_generate_requirements_json.ipynb b/examples/pzmm_generate_requirements_json.ipynb index 604ae800..afd6096c 100644 --- a/examples/pzmm_generate_requirements_json.ipynb +++ b/examples/pzmm_generate_requirements_json.ipynb @@ -14,16 +14,18 @@ "id": "e9b8cb7c-1974-4af5-8992-d51f90fcfe5b", "metadata": {}, "source": [ - "# Automatic Generation of the requirements.json File\n", + "# Automatic Generation of the requirements.json or requirements.txt File\n", "In order to validate Python models within a container publishing destination, the Python packages which contain the modules that are used in the Python score code file and its score resource files must be installed in the run-time container. You can install the packages when you publish a Python model or decision that contains a Python model to a container publishing destination by adding a `requirements.json` file that includes the package install statements to your model.\n", "\n", "This notebook provides an example execution and assessment of the create_requirements_json() function added in python-sasctl v1.8.0. The aim of this function is help to create the instructions (aka the `requirements.json` file) for a lightweight Python container in SAS Model Manager. Lightweight here meaning that the container will only install the packages found in the model's pickle files and python scripts.\n", "\n", + "Additionally, the create_requirements_json() function provides an optional parameter `create_requirements_txt` which when set to `True` will generate a requirements.txt file alongside the requirements.json file. By default this option is set to `False`. The requirements.txt file is needed when consuming Python models in SAS Event Stream Processing, which requires this format for package installation in their environment. While SAS Model Manager continues to use the requirements.json format, adding the requirements.txt file ensures compatibility across both platforms. \n", + "\n", "### **User Warnings**\n", "The methods utilized in this function can determine package dependencies and versions from provided scripts and pickle files, but there are some stipulations that need to be considered:\n", "\n", "1. If run outside of the development environment that the model was created in, the create_requirements_json() function **CANNOT** determine the required package _versions_ accurately. \n", - "2. Not all Python packages have matching import and install names and as such some of the packages added to the requirements.json file may be incorrectly named (i.e. `import sklearn` vs `pip install scikit-learn`).\n", + "2. Not all Python packages have matching import and install names and as such some of the packages added to the requirements.json file may be incorrectly named (i.e. `import sklearn` vs `pip install scikit-learn`). Some of the major packages with differing import and install names are automatically converted. \n", "\n", "As such, it is recommended that the user check over the requirements.json file for package name and version accuracy before deploying to a run-time container in SAS Model Manager." ] @@ -63,7 +65,7 @@ "outputs": [], "source": [ "model_dir = Path.cwd() / \"data/hmeqModels/DecisionTreeClassifier\"\n", - "requirements_json = pzmm.JSONFiles.create_requirements_json(model_dir)" + "requirements_json = pzmm.JSONFiles.create_requirements_json(model_dir, create_requirements_txt=False)" ] }, { @@ -145,6 +147,8 @@ ], "source": [ "for requirement in requirements_json:\n", + " # Example: Replace sklearn with scikit-learn in requirements\n", + " # (This is redundant in newer versions but shows how to modify package names)\n", " if 'sklearn' in requirement['step']:\n", " requirement['command'] = requirement[\"command\"].replace('sklearn', 'scikit-learn')\n", " requirement['step'] = requirement['step'].replace('sklearn', 'scikit-learn')\n", diff --git a/examples/pzmm_id_code_file_example.ipynb b/examples/pzmm_id_code_file_example.ipynb new file mode 100644 index 00000000..72891021 --- /dev/null +++ b/examples/pzmm_id_code_file_example.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "567032e0", + "metadata": {}, + "source": [ + "# Creating Python Code Files for SAS Intelligent Decisioning\n", + "\n", + "This notebook demonstrates how to use the `CodeFile` class to upload Python code\n", + "files that are properly formatted for use with SAS Intelligent Decisioning.\n", + "\n", + "## Overview\n", + "\n", + "SAS Intelligent Decisioning requires Python code files to follow a specific format.\n", + "\n", + "Here is a high-level summary of the formatting requirements:\n", + "\n", + "- An `execute` function is required\n", + "- An `Output:` docstring listing output variables as the first line in the execute function\n", + "- A `DependentPackages:` docstring listing required packages at the top of the file including packages that are needed but are not built-in\n", + "- Must return standard Python data types\n", + "\n", + "\n", + "The `CodeFile` class validates and uploads properly formatted Python code to SAS Viya.\n", + "\n", + "For more information about formating requirements for Python code files, see the [Rules\n", + "For Developing Python Code\n", + "Files](https://documentation.sas.com/?cdcId=edmcdc&cdcVersion=default&docsetId=edmug&docsetTarget=n04vfc1flrz8jsn1o5jblnbgx6i3.htm#n0jrohir6wzvd0n11omfautducm3)\n", + "in _SAS Intelligent Decisioning: User's Guide_.\n", + "\n", + "## Prerequisites\n", + "\n", + "- SAS Viya environment with SAS Intelligent Decisioning\n", + "- Appropriate permissions to create files in the target folder\n", + "- sasctl package installed\n", + "- Python code already formatted according to SAS Intelligent Decisioning requirements" + ] + }, + { + "cell_type": "markdown", + "id": "9da5894f", + "metadata": {}, + "source": [ + "## Setup: Connect to SAS Viya" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e27dcadc", + "metadata": {}, + "outputs": [], + "source": [ + "from sasctl import Session\n", + "from sasctl.pzmm import CodeFile\n", + "from sasctl.services import folders as folder_service\n", + "\n", + "\n", + "# Replace with your SAS Viya connection information\n", + "HOST = 'your-viya-host.com'\n", + "USERNAME = 'your-username'\n", + "PASSWORD = 'your-password'\n", + "\n", + "# Create a session\n", + "sess = Session(HOST, USERNAME, PASSWORD, verify_ssl=False)\n", + "print(f\"Connected to {HOST}\")\n", + "\n", + "try:\n", + " folder_service.create_folder('ID_python_files', \"/Public\")\n", + "except Exception as error:\n", + " print(f\"Folder already exists. {error}\")" + ] + }, + { + "cell_type": "markdown", + "id": "1e0f64d1", + "metadata": {}, + "source": [ + "## Example 1: Simple Code File\n", + "\n", + "Here is a simple example that performs a basic calculation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa33286d", + "metadata": {}, + "outputs": [], + "source": [ + "# Define properly formatted Python code\n", + "simple_code = \"\"\"\n", + "def execute(input_value):\n", + " '''Output: score, category'''\n", + " # Calculate a simple score\n", + " score = input_value * 2 + 10\n", + " category = 'High' if score > 50 else 'Low'\n", + " return score, category\n", + "\"\"\"\n", + "\n", + "# Upload the code file to SAS Viya\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=simple_code,\n", + " file_name='simple_calculator.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "print(f\"The file was uploaded successfully.\")\n", + "print(f\"File ID: {file_obj.id}\")\n", + "print(f\"File Name: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "4073e537", + "metadata": {}, + "source": [ + "## Example 2: Code File with API Call\n", + "\n", + "Here is an example of how to create a code file that makes an API call to retrieve data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6608730a", + "metadata": {}, + "outputs": [], + "source": [ + "api_code = \"\"\"\n", + "'''DependentPackages: requests'''\n", + "def execute(customer_id):\n", + " '''Output: risk_score, status'''\n", + " import requests\n", + " import json\n", + "\n", + " # Make an API call\n", + " url = f\"https://api.example.com/data?id={customer_id}\"\n", + " response = requests.get(url)\n", + "\n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " risk_score = data.get('risk_score', 0)\n", + " status = 'Success'\n", + " else:\n", + " risk_score = -1\n", + " status = 'Failed'\n", + " \n", + " return risk_score, status\n", + "\"\"\"\n", + "\n", + "# Upload the code file\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=api_code,\n", + " file_name='risk_score_api.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "print(f\"The file was uploaded successfully.\")\n", + "print(f\"File ID: {file_obj.id}\")\n", + "print(f\"File name: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "d3658f6f", + "metadata": {}, + "source": [ + "## Example 3: Code with Multiple Dependencies\n", + "\n", + "Here is an example of specifying multiple packages in the `DependentPackages` docstring." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48f441ff", + "metadata": {}, + "outputs": [], + "source": [ + "data_processing_code = \"\"\"\n", + "'''DependentPackages: pandas, numpy'''\n", + "def execute(value1, value2, value3, threshold):\n", + " '''Output: mean_value, std_value, result'''\n", + " import pandas as pd\n", + " import numpy as np\n", + "\n", + " # Create a simple dataframe\n", + " data = pd.DataFrame({\n", + " 'values': [value1, value2, value3]\n", + " })\n", + "\n", + " # Calculate statistics\n", + " mean_value = float(np.mean(data['values']))\n", + " std_value = float(np.std(data['values']))\n", + " result = 'Pass' if mean_value > threshold else 'Fail'\n", + "\n", + " return mean_value, std_value, result\n", + "\"\"\"\n", + "\n", + "# Upload the code file\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=data_processing_code,\n", + " file_name='data_processor.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "print(f\"This file was uploaded successfully: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "76aa2f42", + "metadata": {}, + "source": [ + "## Example 4: Reading Code from a File\n", + "\n", + "Here is an example of reading Python code from an existing file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb8ad79d", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# Create a properly formatted Python file\n", + "temp_code_file = Path('temp_code.py')\n", + "temp_code_file.write_text(\"\"\"\n", + "def execute(income, assets, debt):\n", + " '''Output: credit_score, decision, confidence'''\n", + " # Business logic for credit decision\n", + " credit_score = income * 0.3 + assets * 0.2 - debt * 0.5\n", + " decision = 'Approved' if credit_score > 650 else 'Denied'\n", + " confidence = min(credit_score / 850, 1.0)\n", + " \n", + " return credit_score, decision, confidence\n", + "\"\"\")\n", + "\n", + "# Upload code from file (pass Path object)\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=temp_code_file,\n", + " file_name='credit_decision.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "# Clean up\n", + "temp_code_file.unlink()\n", + "\n", + "print(f\"Code uploaded from file: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a0223909", + "metadata": {}, + "source": [ + "## Example 5: Code File with No Parameters\n", + "\n", + "Here is an example of creating code files that do not require input parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460f264f", + "metadata": {}, + "outputs": [], + "source": [ + "from sasctl.services import files as file_service\n", + "from sasctl.services import folders as folder_service\n", + "\n", + "config_code = \"\"\"\n", + "def execute():\n", + " '''Output: current_date, environment, version'''\n", + " import datetime\n", + "\n", + " # Get current configuration\n", + " current_date = datetime.datetime.now().strftime('%Y-%m-%d')\n", + " environment = 'production'\n", + " version = '1.0.0'\n", + "\n", + " return current_date, environment, version\n", + "\"\"\"\n", + "\n", + "# Check if file already exists and delete it.\n", + "# Warning: Deleting files might result in loss of important data or configurations.\n", + "# Ensure you have backups or that the file can be safely removed before proceeding.\n", + "\n", + "file_name = 'config_info.py'\n", + "folder_path = '/Public/ID_python_files'\n", + "\n", + "try:\n", + " folder_obj = folder_service.get_folder(folder_path)\n", + "\n", + " file_filter = f\"and(eq(name, '{file_name}'), eq(contentType, 'file'))\"\n", + " existing_file = folder_service.get(\n", + " f\"/folders/{folder_obj.id}/members\",\n", + " params={\"filter\": file_filter}\n", + " )\n", + " if len(existing_file) > 0:\n", + " print(f\"Warning: You are about to delete this file: {file_name}\")\n", + " print(\"This action might result in loss of sensitive data or configurations.\")\n", + "\n", + " file_service.delete_file({\"id\": existing_file['uri'].split('/')[-1]})\n", + " print(f\"Deleted file: {file_name}\")\n", + "except Exception as e:\n", + " print(f\"This file was not found: {file_name} {e}\")\n", + "\n", + "\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=config_code,\n", + " file_name=file_name,\n", + " folder=folder_path\n", + ")\n", + "\n", + "print(f\"Configuration code file created: {file_name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "510f7855", + "metadata": {}, + "source": [ + "## Example 6: Disable Validation\n", + "\n", + "Here is an example of skipping pre-upload validation.\n", + "\n", + "**Note:** The file will still be uploaded even if it contains formatting errors.\n", + "The errors appear later when you try to use the file in a decision. You can\n", + "view the code file in SAS Intelligent Decisioning and validate it to check for errors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95855524", + "metadata": {}, + "outputs": [], + "source": [ + "fast_code = \"\"\"\n", + "def execute(input_a, input_b):\n", + " '''Output: result'''\n", + " result = input_a + input_b\n", + " return result\n", + "\"\"\"\n", + "\n", + "# Skip pre-upload validation for faster upload\n", + "# File is still created when there are formatting errors\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=fast_code,\n", + " file_name='fast_calculator.py',\n", + " folder='/Public/ID_python_files',\n", + " validate_code=False # Skip pre-upload validation\n", + ")\n", + "\n", + "print(f\"File uploaded without pre-validation: {file_obj.name}\")\n", + "print(\"Warning: If there are formatting errors, they will appear when you use the file in a decision.\")" + ] + }, + { + "cell_type": "markdown", + "id": "396bc5f0", + "metadata": {}, + "source": [ + "## Clean Up\n", + "\n", + "Close the SAS Viya session when finished." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a1f6b08", + "metadata": {}, + "outputs": [], + "source": [ + "# Close the session\n", + "sess.close()\n", + "print(\"Session closed\")" + ] + }, + { + "cell_type": "markdown", + "id": "12a60696", + "metadata": {}, + "source": [ + "## Additional Resources\n", + "\n", + "- [SAS Intelligent Decisioning Documentation](https://documentation.sas.com/?cdcId=edmcdc&cdcVersion=default&docsetId=edmug&docsetTarget=n04vfc1flrz8jsn1o5jblnbgx6i3.htm)\n", + "- [Rules For Developing Python Code Files](https://documentation.sas.com/?cdcId=edmcdc&cdcVersion=default&docsetId=edmug&docsetTarget=n04vfc1flrz8jsn1o5jblnbgx6i3.htm#n0jrohir6wzvd0n11omfautducm3)\n", + "- [python-sasctl Documentation](https://sassoftware.github.io/python-sasctl/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/sasctl/__init__.py b/src/sasctl/__init__.py index e4c7a0e1..408dfe25 100644 --- a/src/sasctl/__init__.py +++ b/src/sasctl/__init__.py @@ -4,7 +4,7 @@ # Copyright © 2019, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -__version__ = "1.11.2" +__version__ = "1.11.7" __author__ = "SAS" __credits__ = [ "Yi Jian Ching", @@ -16,6 +16,7 @@ "Scott Lindauer", "DJ Moore", "Samya Potlapalli", + "Samuel Babak", ] __license__ = "Apache 2.0" __copyright__ = ( diff --git a/src/sasctl/_services/files.py b/src/sasctl/_services/files.py index f439d9d5..16b0c3d7 100644 --- a/src/sasctl/_services/files.py +++ b/src/sasctl/_services/files.py @@ -20,8 +20,8 @@ class Files(Service): The file can be associated with the URI of another identifiable object (for example, a parentUri). Every file must have an assigned content type and name. Files can be retrieved individually by using the file's - identifier or as a list of files by using a parentUri. Each file has its - content stream associated with it. After creation, the metadata that is + identifier or as a list of files by using a parentUri. Each file is + associated with its content stream. After creation, the metadata that is associated with the file or the actual content can be updated. A single file can be deleted by using a specific ID. Multiple files can be deleted by specifying a parentUri. A file can be uploaded via raw request or @@ -61,7 +61,8 @@ def create_file(cls, file, folder=None, filename=None, expiration=None): with open(file, "rb") as f: file = f.read() - else: + + elif not isinstance(file, bytes): if filename is None: raise ValueError( "`filename` must be specified if `file` is not a path." diff --git a/src/sasctl/_services/model_management.py b/src/sasctl/_services/model_management.py index e91b1aa6..7950d95e 100644 --- a/src/sasctl/_services/model_management.py +++ b/src/sasctl/_services/model_management.py @@ -28,7 +28,13 @@ class ModelManagement(Service): # TODO: set ds2MultiType @classmethod def publish_model( - cls, model, destination, name=None, force=False, reload_model_table=False + cls, + model, + destination, + model_version="latest", + name=None, + force=False, + reload_model_table=False, ): """ @@ -38,6 +44,8 @@ def publish_model( The name or id of the model, or a dictionary representation of the model. destination : str Name of destination to publish the model to. + model_version : str or dict, optional + Provide the version id, name, or dict to publish. Defaults to 'latest'. name : str, optional Provide a custom name for the published model. Defaults to None. force : bool, optional @@ -68,6 +76,23 @@ def publish_model( # TODO: Verify allowed formats by destination type. # As of 19w04 MAS throws HTTP 500 if name is in invalid format. + if model_version != "latest": + if isinstance(model_version, dict) and "modelVersionName" in model_version: + model_version_name = model_version["modelVersionName"] + elif ( + isinstance(model_version, dict) + and "modelVersionName" not in model_version + ): + raise ValueError("Model version is not recognized.") + elif isinstance(model_version, str) and cls.is_uuid(model_version): + model_version_name = mr.get_model_or_version(model, model_version)[ + "modelVersionName" + ] + else: + model_version_name = model_version + else: + model_version_name = "" + model_name = name or "{}_{}".format( model_obj["name"].replace(" ", ""), model_obj["id"] ).replace("-", "") @@ -79,6 +104,7 @@ def publish_model( { "modelName": mp._publish_name(model_name), "sourceUri": model_uri.get("uri"), + "modelVersionID": model_version_name, "publishLevel": "model", } ], @@ -104,6 +130,7 @@ def create_performance_definition( table_prefix, project=None, models=None, + modelVersions=None, library_name="Public", name=None, description=None, @@ -136,6 +163,8 @@ def create_performance_definition( The name or id of the model(s), or a dictionary representation of the model(s). For multiple models, input a list of model names, or a list of dictionaries. If no models are specified, all models in the project specified will be used. Defaults to None. + modelVersions: str, list, optional + The name of the model version(s). Defaults to None, so all models are latest. library_name : str The library containing the input data, default is 'Public'. name : str, optional @@ -239,10 +268,13 @@ def create_performance_definition( "property set." % project.name ) + # Creating the new array of modelIds with version names appended + updated_models = cls.check_model_versions(models, modelVersions) + request = { "projectId": project.id, "name": name or project.name + " Performance", - "modelIds": [model.id for model in models], + "modelIds": updated_models, "championMonitored": monitor_champion, "challengerMonitored": monitor_challenger, "maxBins": max_bins, @@ -279,7 +311,6 @@ def create_performance_definition( for v in project.get("variables", []) if v.get("role") == "output" ] - return cls.post( "/performanceTasks", json=request, @@ -288,6 +319,57 @@ def create_performance_definition( }, ) + @classmethod + def check_model_versions(cls, models, modelVersions): + """ + Checking if the model version(s) are valid and append to model id accordingly. + + Parameters + ---------- + models: list of str + List of models. + modelVersions : list of str + List of model versions associated with models. + + Returns + ------- + String list + """ + if not modelVersions: + return [model.id for model in models] + + updated_models = [] + if not isinstance(modelVersions, list): + modelVersions = [modelVersions] + + if len(models) < len(modelVersions): + raise ValueError( + "There are too many versions for the amount of models specified." + ) + + modelVersions = modelVersions + [""] * (len(models) - len(modelVersions)) + for model, modelVersionName in zip(models, modelVersions): + + if ( + isinstance(modelVersionName, dict) + and "modelVersionName" in modelVersionName + ): + + modelVersionName = modelVersionName["modelVersionName"] + elif ( + isinstance(modelVersionName, dict) + and "modelVersionName" not in modelVersionName + ): + + raise ValueError("Model version is not recognized.") + + if modelVersionName != "": + updated_models.append(model.id + ":" + modelVersionName) + else: + updated_models.append(model.id) + + return updated_models + @classmethod def execute_performance_definition(cls, definition): """Launches a job to run a performance definition. diff --git a/src/sasctl/_services/model_publish.py b/src/sasctl/_services/model_publish.py index c3fa225f..90f665ad 100644 --- a/src/sasctl/_services/model_publish.py +++ b/src/sasctl/_services/model_publish.py @@ -10,6 +10,7 @@ from .model_repository import ModelRepository from .service import Service +from ..utils.decorators import deprecated class ModelPublish(Service): @@ -90,7 +91,7 @@ def delete_destination(cls, item): return cls.delete("/destinations/{name}".format(name=item)) - @classmethod + @deprecated("Use publish_model in model_management.py instead.", "1.11.5") def publish_model(cls, model, destination, name=None, code=None, notes=None): """Publish a model to an existing publishing destination. diff --git a/src/sasctl/_services/model_repository.py b/src/sasctl/_services/model_repository.py index dfbbb95d..a68415e6 100644 --- a/src/sasctl/_services/model_repository.py +++ b/src/sasctl/_services/model_repository.py @@ -8,8 +8,14 @@ import datetime from warnings import warn +import requests +from requests.exceptions import HTTPError +import urllib -from ..core import HTTPError, current_session, delete, get, sasctl_command +# import traceback +# import sys + +from ..core import current_session, delete, get, sasctl_command, RestObj from .service import Service FUNCTIONS = { @@ -154,7 +160,7 @@ def get_model_contents(cls, model): contents = cls.request_link(link, "contents") # By default, request_link() will unwrap a length-1 list. - # If that happens, re-wrap so a list is always returned. + # If that happens, re-wrap so that a list is always returned. if isinstance(contents, list): return contents @@ -179,7 +185,7 @@ def get_repository(cls, repository, refresh=False): Notes ------- - If `repository` is a complete representation of the repository it will be + If `repository` is a complete representation of the repository, it will be returned unless `refresh` is set. This prevents unnecessary REST calls when data is already available on the client. @@ -196,7 +202,7 @@ def get_repository(cls, repository, refresh=False): if cls.is_uuid(repository): try: - # Attempt to GET the repository directly. Access may be restricted, so allow HTTP 403 errors + # Attempt to GET the repository directly. Access might be restricted, so allow HTTP 403 errors # and fall back to using list_repositories() instead. return cls.get("/repositories/{id}".format(id=repository)) except HTTPError as e: @@ -615,11 +621,222 @@ def list_model_versions(cls, model): list """ - model = cls.get_model(model) - if cls.get_model_link(model, "modelVersions") is None: - raise ValueError("Unable to retrieve versions for model '%s'" % model) - return cls.request_link(model, "modelVersions") + if current_session().version_info() < 4: + model = cls.get_model(model) + if cls.get_model_link(model, "modelVersions") is None: + raise ValueError("Unable to retrieve versions for model '%s'" % model) + + return cls.request_link(model, "modelVersions") + else: + link = cls.get_model_link(model, "modelHistory") + if link is None: + raise ValueError( + "Cannot find link for version history for model '%s'" % model + ) + + modelHistory = cls.request_link( + link, + "modelHistory", + headers={"Accept": "application/vnd.sas.collection+json"}, + ) + + if isinstance(modelHistory, RestObj): + return [modelHistory] + return modelHistory + + @classmethod + def get_model_version(cls, model, version_id): + """Get a specific version of a model. + + Parameters + ---------- + model : str or dict + The name, id, or dictionary representation of a model. + version_id: str + The id of a model version. + + Returns + ------- + RestObj + + """ + + model_history = cls.list_model_versions(model) + + for item in model_history: + if item["id"] == version_id: + return cls.request_link( + item, + "self", + headers={"Accept": "application/vnd.sas.models.model.version+json"}, + ) + + raise ValueError("The version id specified could not be found.") + + @classmethod + def get_model_with_versions(cls, model): + """Get the current model with its version history. + + Parameters + ---------- + model : str or dict + The name, id, or dictionary representation of a model. + + Returns + ------- + list + + """ + + if cls.is_uuid(model): + model_id = model + elif isinstance(model, dict) and "id" in model: + model_id = model["id"] + else: + model = cls.get_model(model) + if not model: + raise HTTPError( + "This model may not exist in a project or the model may not exist at all." + ) + model_id = model["id"] + + versions_uri = f"/models/{model_id}/versions" + try: + version_history = cls.request( + "GET", + versions_uri, + headers={"Accept": "application/vnd.sas.collection+json"}, + ) + except urllib.error.HTTPError as e: + raise HTTPError( + f"Request failed: Model id may be referencing a non-existing model." + ) from None + + if isinstance(version_history, RestObj): + return [version_history] + + return version_history + + @classmethod + def get_model_or_version(cls, model, version_id): + """Get a specific version of a model but if model id and version id are the same, the current model is returned. + + Parameters + ---------- + model : str or dict + The name, id, or dictionary representation of a model. + version_id: str + The id of a model version. + + Returns + ------- + RestObj + + """ + + version_history = cls.get_model_with_versions(model) + + for item in version_history: + if item["id"] == version_id: + return cls.request_link( + item, + "self", + headers={ + "Accept": "application/vnd.sas.models.model.version+json, application/vnd.sas.models.model+json" + }, + ) + + raise ValueError("The version id specified could not be found.") + + @classmethod + def get_model_version_contents(cls, model, version_id): + """Get the contents of a model version. + + Parameters + ---------- + model : str or dict + The name, id, or dictionary representation of a model. + version_id: str + The id of a model version. + + Returns + ------- + list + + """ + model_version = cls.get_model_version(model, version_id) + version_contents = cls.request_link( + model_version, + "contents", + headers={"Accept": "application/vnd.sas.collection+json"}, + ) + + if isinstance(version_contents, RestObj): + return [version_contents] + + return version_contents + + @classmethod + def get_model_version_content_metadata(cls, model, version_id, content_id): + """Get the content metadata header information for a model version. + + Parameters + ---------- + model : str or dict + The name, id, or dictionary representation of a model. + version_id: str + The id of a model version. + content_id: str + The id of the content file. + + Returns + ------- + RestObj + + """ + model_version_contents = cls.get_model_version_contents(model, version_id) + + for item in model_version_contents: + if item["id"] == content_id: + return cls.request_link( + item, + "self", + headers={"Accept": "application/vnd.sas.models.model.content+json"}, + ) + + raise ValueError("The content id specified could not be found.") + + @classmethod + def get_model_version_content(cls, model, version_id, content_id): + """Get the specific content inside the content file for a model version. + + Parameters + ---------- + model : str or dict + The name, id, or dictionary representation of a model. + version_id: str + The id of a model version. + content_id: str + The id of the specific content file. + + Returns + ------- + list + + """ + + metadata = cls.get_model_version_content_metadata(model, version_id, content_id) + version_content_file = cls.request_link( + metadata, "content", headers={"Accept": "text/plain"} + ) + + if version_content_file is None: + raise HTTPError("Something went wrong while accessing the metadata file.") + + if isinstance(version_content_file, RestObj): + return [version_content_file] + return version_content_file @classmethod def copy_analytic_store(cls, model): diff --git a/src/sasctl/_services/score_definitions.py b/src/sasctl/_services/score_definitions.py index 448a28c5..05733d2b 100644 --- a/src/sasctl/_services/score_definitions.py +++ b/src/sasctl/_services/score_definitions.py @@ -46,7 +46,7 @@ def create_score_definition( description: str = "", server_name: str = "cas-shared-default", library_name: str = "Public", - model_version: str = "latest", + model_version: Union[str, dict] = "latest", ): """Creates the score definition service. @@ -69,7 +69,7 @@ def create_score_definition( library_name: str, optional The library within the CAS server the table exists in. Defaults to "Public". model_version: str, optional - The user-chosen version of the model with the specified model_id. Defaults to "latest". + The user-chosen version of the model. Deafaults to "latest". Returns ------- @@ -83,9 +83,7 @@ def create_score_definition( else: object_descriptor_type = "sas.models.model.ds2" - if cls._model_repository.is_uuid(model): - model_id = model - elif isinstance(model, dict) and "id" in model: + if isinstance(model, dict) and "id" in model: model_id = model["id"] else: model = cls._model_repository.get_model(model) @@ -118,7 +116,7 @@ def create_score_definition( table = cls._cas_management.get_table(table_name, library_name, server_name) if not table and not table_file: raise HTTPError( - f"This table may not exist in CAS. Please include the `table_file` argument in the function call if it doesn't exist." + "This table may not exist in CAS. Include the `table_file` argument." ) elif not table and table_file: cls._cas_management.upload_file( @@ -127,16 +125,19 @@ def create_score_definition( table = cls._cas_management.get_table(table_name, library_name, server_name) if not table: raise HTTPError( - f"The file failed to upload properly or another error occurred." + "The file failed to upload properly or another error occurred." ) # Checks if the inputted table exists, and if not, uploads a file to create a new table + object_uri, model_version = cls.check_model_version(model_id, model_version) + # Checks if the model version is valid and how to find the name + save_score_def = { "name": model_name, # used to be score_def_name "description": description, "objectDescriptor": { - "uri": f"/modelManagement/models/{model_id}", - "name": f"{model_name}({model_version})", + "uri": object_uri, + "name": f"{model_name} ({model_version})", "type": f"{object_descriptor_type}", }, "inputData": { @@ -151,7 +152,7 @@ def create_score_definition( "projectUri": f"/modelRepository/projects/{model_project_id}", "projectVersionUri": f"/modelRepository/projects/{model_project_id}/projectVersions/{model_project_version_id}", "publishDestination": "", - "versionedModel": f"{model_name}({model_version})", + "versionedModel": f"{model_name} ({model_version})", }, "mappings": inputMapping, } @@ -163,3 +164,37 @@ def create_score_definition( "/definitions", data=json.dumps(save_score_def), headers=headers_score_def ) # The response information of the score definition can be seen as a JSON as well as a RestOBJ + + @classmethod + def check_model_version(cls, model_id: str, model_version: Union[str, dict]): + """Checks if the model version is valid. + + Parameters + ---------- + model_version : str or dict + The model version to check. + + Returns + ------- + String tuple + """ + if model_version != "latest": + + if isinstance(model_version, dict) and "modelVersionName" in model_version: + model_version = model_version["modelVersionName"] + elif ( + isinstance(model_version, dict) + and "modelVersionName" not in model_version + ): + raise ValueError("Model version cannot be found.") + elif isinstance(model_version, str) and cls.is_uuid(model_version): + model_version = cls._model_repository.get_model_or_version( + model_id, model_version + )["modelVersionName"] + + object_uri = f"/modelManagement/models/{model_id}/versions/@{model_version}" + + else: + object_uri = f"/modelManagement/models/{model_id}" + + return object_uri, model_version diff --git a/src/sasctl/pzmm/__init__.py b/src/sasctl/pzmm/__init__.py index 4667bc65..d3eb4de0 100644 --- a/src/sasctl/pzmm/__init__.py +++ b/src/sasctl/pzmm/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) 2021, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from .code_file import CodeFile from .git_integration import GitIntegrate from .import_model import ImportModel from .mlflow_model import MLFlowModel diff --git a/src/sasctl/pzmm/code_file.py b/src/sasctl/pzmm/code_file.py new file mode 100644 index 00000000..45882e0b --- /dev/null +++ b/src/sasctl/pzmm/code_file.py @@ -0,0 +1,242 @@ +# Copyright (c) 2026, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tools for creating and uploading Python code files for SAS Intelligent Decisioning. +""" + +# Standard Library Imports +from pathlib import Path +from typing import Union + +# Package Imports +from ..core import RestObj +from ..services import files as file_service +from ..services import folders as folder_service +from .._services.service import Service + + +class CodeFile(Service): + """ + A class for creating Python code files formatted for SAS Intelligent Decisioning. + + SAS Intelligent Decisioning requires Python code files to follow a specific format + with an execute function that includes docstrings for output variables and + dependent packages. + """ + + _SERVICE_ROOT = "/decisions" + + @classmethod + def _validate_code_format_via_api(cls, code: str) -> bool: + """ + Validate code format using the SAS Viya validation endpoint. + + This validates Output docstring position, return statements, execute function, + and other ID-specific formatting requirements. + + Parameters + ---------- + code : str + Python code to validate. + + Raises + ------ + ValueError + If the code does not meet ID formatting requirements. + """ + try: + response = cls.post( + "/commons/validations/codeFiles", + json={"content": code, "type": "decisionPythonFile"}, + ) + + # If validation fails, the response will contain an error + if not response.get("valid", True): + error = response.get("error", {}) + if isinstance(error, dict): + error_message = error.get("message", str(error)) + else: + error_message = str(error) + raise ValueError(error_message) + + except Exception as e: + # Re-raise ValueError as-is, wrap other exceptions + if isinstance(e, ValueError): + raise + raise ValueError(f"Code validation failed: {str(e)}") + + @classmethod + def _find_file_in_folder( + cls, folder_id: str, file_name: str + ) -> Union[RestObj, None]: + """ + Find a file in a specific folder by name. + + Parameters + ---------- + folder_id : str + The ID of the folder to search in. + file_name : str + Name of the file to find. + + Returns + ------- + RestObj or None + File details if found, None otherwise. + """ + + # Search for the file in the folder + file_filter = f"and(eq(name, '{file_name}'), eq(contentType, 'file'))" + response = folder_service.get( + f"/folders/{folder_id}/members", params={"filter": file_filter} + ) + + if len(response) <= 0: + # No files with file_name were found. + return None + + file_uri = response.get("uri") + + if file_uri: + return response + + return None + + @classmethod + def _load_python_code(cls, code: Union[str, Path]) -> str: + """ + Load and prepare a Python code file for SAS Intelligent Decisioning. + + This method loads code from a string or file path and performs basic checks. + Actual validation against ID format requirements happens during upload. + + Parameters + ---------- + code : str or pathlib.Path + Python code as a string or path to a Python file. + + Returns + ------- + str + The Python code file content. + + Raises + ------ + ValueError + If code is empty or file is not found. + """ + # Check for empty string first + if isinstance(code, str) and (not code or not code.strip()): + raise ValueError("Code cannot be empty") + + # Convert string path to Path object if needed (with error handling for invalid paths) + try: + if isinstance(code, str) and Path(code).exists(): + code = Path(code) + except OSError: + # Path is invalid (for example, too long or malformed) - treat as raw code string + pass + + if isinstance(code, Path): + if not code.exists(): + raise ValueError(f"Code file not found: {code}") + code = code.read_text() + + if not code or not code.strip(): + raise ValueError("Code cannot be empty") + + return code + + @classmethod + def write_id_code_file( + cls, + code: Union[str, Path], + file_name: str, + folder: Union[str, dict], + validate_code: bool = True, + ) -> RestObj: + """ + Validate and upload a Python code file to SAS Intelligent Decisioning. + + This method validates a properly formatted ID Python code file and uploads + it to a specified folder in SAS Viya, it then registers it with the Decisions service. + + Parameters + ---------- + code : str or pathlib.Path + Python code as a string or path to a Python file. The code must already + be formatted for ID with an execute function and proper docstrings. + file_name : str + Name for the code file (for example, 'my_code.py'). Must end with .py + folder : str or dict + Target folder in SAS Viya. Can be a folder name, path (for example, + '/Public/MyFolder'), or folder object returned by folders.get_folder(). + validate_code: bool + If True, validates code format via API before upload. If False, skips validation. + + Returns + ------- + RestObj + Code file object returned by the Decisions service. + + Raises + ------ + ValueError + If file_name does not end with .py, if folder is not found, if code + does not contain required docstrings, or if code is invalid. + SyntaxError + If the provided code has syntax errors. + """ + # Validate file_name + if not file_name.endswith(".py"): + raise ValueError("file_name must end with .py extension") + + # Load the code (handles file paths, empty checks, etc.) + loaded_code = cls._load_python_code(code) + + # Validate code format if requested + if validate_code: + cls._validate_code_format_via_api(loaded_code) + + # Verify that the folder exists + folder_obj = folder_service.get_folder(folder) + if not folder_obj: + raise ValueError(f"Folder '{folder}' not found") + + # Verify that a file with that name does not exist + file_obj = cls._find_file_in_folder(folder_obj.id, file_name) + if file_obj: + raise ValueError(f"File '{file_name}' already exists in this folder.") + + # Upload the file to SAS Viya Files service + file_obj = file_service.create_file( + file=loaded_code.encode("utf-8"), + folder=folder, + filename=file_name, + ) + + data = { + "name": file_name, + "fileUri": f"/files/files/{file_obj.id}", + "type": "decisionPythonFile", + } + + try: + code_file = cls.post("/codeFiles", json=data) + except Exception as post_error: + # Try to clean up the uploaded file since code file creation failed + try: + # There is no response from deleting a file object + file_service.delete_file({"id": file_obj["id"]}) + + except Exception as delete_error: + raise RuntimeError( + f"There was an error creating the code file: {post_error}." + f"Also, could not delete the orphaned file: {delete_error}" + ) + raise RuntimeError( + f"There was an error with creating the code file: {post_error}" + ) + + return code_file diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py index 1c0c560d..c5974c0d 100644 --- a/src/sasctl/pzmm/write_json_files.py +++ b/src/sasctl/pzmm/write_json_files.py @@ -14,7 +14,7 @@ from pathlib import Path from typing import Any, Generator, List, Optional, Type, Union -# Third Party Imports +# Third-Party Imports import pandas as pd from pandas import DataFrame, Series @@ -22,7 +22,7 @@ from sasctl.pzmm.write_score_code import ScoreCode as sc from ..core import current_session from ..utils.decorators import deprecated, experimental -from ..utils.misc import check_if_jupyter +from ..utils.misc import check_if_jupyter, IMPORT_TO_INSTALL_MAPPING try: # noinspection PyPackageRequirements @@ -45,7 +45,7 @@ class NpEncoder(json.JSONEncoder): pass -# TODO: add converter for any type of dataset (list, dataframe, numpy array) +# TODO: add converter for any type of data set (list, dataframe, numpy array) # Constants INPUT = "inputVar.json" @@ -300,14 +300,14 @@ def write_model_properties_json( Model Manager. If these values are detected, they will be supplied as custom user properties. - If a json_path is supplied, this function outputs a JSON file named + If a json_path is supplied, this function writes a JSON file named "ModelProperties.json". Otherwise, a dict is returned. Parameters ---------- model_name : str User-defined model name. This value is overwritten by SAS Model Manager - based on the name of the zip file used for importing the model. + based on the name of the ZIP file used for importing the model. target_variable : str Target variable to be predicted by the model. target_values : list, optional @@ -477,7 +477,7 @@ def write_file_metadata_json( """ Writes a file metadata JSON file pointing to all relevant files. - This function outputs a JSON file named "fileMetadata.json". + This function writes a JSON file named "fileMetadata.json". Parameters ---------- @@ -651,7 +651,7 @@ def add_tuple_to_fitstat( Raises ------ ValueError - If an parameter within the tuple list is not a tuple or has a length + If a parameter within the tuple list is not a tuple or has a length different from the expected three. """ @@ -696,7 +696,7 @@ def user_input_fitstat(cls, data: List[dict]) -> List[dict]: Returns ------- list of dict - List of dicts with the user provided values inputted. + List of dicts with the user provided values entered. """ while True: input_param_name = input("What is the parameter name?\n") @@ -706,7 +706,7 @@ def user_input_fitstat(cls, data: List[dict]) -> List[dict]: f"{input_param_name} is not a valid parameter.", category=UserWarning, ) - if input("Would you like to input more parameters? (Y/N)") == "N": + if input("Would you like to enter more parameters? (Y/N)") == "N": break continue param_value = input("What is the parameter's value?\n") @@ -723,7 +723,7 @@ def user_input_fitstat(cls, data: List[dict]) -> List[dict]: f"1, 2, or 3 or TRAIN, TEST, or VALIDATE respectively.", category=UserWarning, ) - if input("Would you like to input more parameters? (Y/N)") == "N": + if input("Would you like to enter more parameters? (Y/N)") == "N": break continue data[data_role - 1]["dataMap"][param_name] = param_value @@ -929,7 +929,7 @@ def assess_model_bias( maxdiff_dfs=maxdiff_dfs, datarole=datarole ) - # getting json files + # Getting JSON files json_files = cls.bias_dataframes_to_json( groupmetrics=group_metrics, maxdifference=max_differences, @@ -973,7 +973,7 @@ def format_max_differences( Returns ------- pandas.DataFrame - A singluar DataFrame containing all max differences data + A singular DataFrame containing all max differences data """ maxdiff_df = pd.concat(maxdiff_dfs) maxdiff_df = maxdiff_df.rename( @@ -1102,7 +1102,7 @@ def bias_dataframes_to_json( conventions (no spaces and the name cannot begin with a number or symbol). Required for regression problems. The default value is None. json_path : str or pathlib.Path, optional - Location for the output JSON files. If a path is passed, the json files will populate in the directory and + Location for the output JSON files. If a path is passed, the JSON files will populate in the directory and the function will return None, unless return_dataframes is True. Otherwise, the function will return the json strings in a dictionary (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is None. @@ -1200,19 +1200,19 @@ def calculate_model_statistics( cutoff: Optional[float] = None, ) -> Union[dict, None]: """ - Calculates fit statistics (including ROC and Lift curves) from datasets and then + Calculates fit statistics (including ROC and Lift curves) from data sets and then either writes them to JSON files or returns them as a single dictionary. Calculations are performed using a call to SAS CAS via the swat package. An error will be raised if the swat package is not installed or if a connection to a SAS Viya system is not possible. - Datasets must contain the actual and predicted values and may optionally contain + Data sets must contain the actual and predicted values and can optionally contain the predicted probabilities. If no probabilities are provided, a dummy - probability dataset is generated based on the predicted values and normalized by + probability data set is generated based on the predicted values and normalized by the target value. - Datasets can be provided in the following forms, with the assumption that data + Data sets can be provided in the following forms, with the assumption that data is ordered as `actual`, `predict`, and `probability` respectively: * pandas dataframe: the actual and predicted values are their own columns @@ -1220,7 +1220,7 @@ def calculate_model_statistics( * numpy array: the actual and predicted values are their own columns or rows \ and ordered such that the actual values come first and the predicted second - If a json_path is supplied, then this function outputs a set of JSON files named + If a json_path is supplied, then this function writes a set of JSON files named "dmcas_fitstat.json", "dmcas_roc.json", "dmcas_lift.json". Parameters @@ -1228,11 +1228,11 @@ def calculate_model_statistics( target_value : str, int, or float Target event value for model prediction events. validate_data : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the validation data. The default value is None. + Data set pertaining to the validation data. The default value is None. train_data : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the training data. The default value is None. + Data set pertaining to the training data. The default value is None. test_data : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the test data. The default value is None. + Data set pertaining to the test data. The default value is None. json_path : str or pathlib.Path, optional Location for the output JSON files. The default value is None. target_type: str, optional @@ -1242,7 +1242,7 @@ def calculate_model_statistics( Returns ------- dict - Dictionary containing a key-value pair representing the files name and json + Dictionary containing a key-value pair representing the files name and JSON dumps respectively. Raises @@ -1375,21 +1375,21 @@ def check_for_data( test: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, ) -> list: """ - Check which datasets were provided and return a list of flags. + Check which data sets were provided and return a list of flags. Parameters ---------- validate : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the validation data. The default value is None. + Data set pertaining to the validation data. The default value is None. train : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the training data. The default value is None. + Data set pertaining to the training data. The default value is None. test : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the test data. The default value is None. + Data set pertaining to the test data. The default value is None. Returns ------- data_partitions : list - A list of flags indicating which partitions have datasets. + A list of flags indicating which partitions have data sets. Raises ------ @@ -1416,7 +1416,7 @@ def stat_dataset_to_dataframe( target_type: str = "classification", ) -> DataFrame: """ - Convert the user supplied statistical dataset from either a pandas DataFrame, + Convert the user supplied statistical data set from either a pandas DataFrame, list of lists, or numpy array to a DataFrame formatted for SAS CAS upload. If the prediction probabilities are not provided, the prediction data will be @@ -1428,7 +1428,7 @@ def stat_dataset_to_dataframe( Parameters ---------- data : pandas.DataFrame, list of list, or numpy.ndarray - Dataset representing the actual and predicted values of the model. May also + Data set representing the actual and predicted values of the model. May also include the prediction probabilities. target_value : str, int, or float, optional Target event value for model prediction events. Used for creating a binary @@ -1438,7 +1438,7 @@ def stat_dataset_to_dataframe( Returns ------- data : pandas.DataFrame - Dataset formatted for SAS CAS upload. + Data set formatted for SAS CAS upload. Raises ------ @@ -1493,12 +1493,12 @@ def apply_dataframe_to_json( ) -> dict: """ Map the values of the ROC or Lift charts from SAS CAS to the dictionary - representation of the respective json file. + representation of the respective JSON file. Parameters ---------- json_dict : dict - Dictionary representation of the ROC or Lift chart json file. + Dictionary representation of the ROC or Lift chart JSON file. partition : int Numerical representation of the data partition. Either 0, 1, or 2. stat_df : pandas.DataFrame @@ -1510,7 +1510,7 @@ def apply_dataframe_to_json( Returns ------- json_dict : dict - Dictionary representation of the ROC or Lift chart json file, with the + Dictionary representation of the ROC or Lift chart JSON file, with the values from the SAS CAS percentile action set added in. """ for row_num in range(len(stat_df)): @@ -1614,6 +1614,7 @@ def create_requirements_json( cls, model_path: Union[str, Path, None] = Path.cwd(), output_path: Union[str, Path, None] = None, + create_requirements_txt: bool = False, ) -> Union[dict, None]: """ Searches the model directory for Python scripts and pickle files and @@ -1623,9 +1624,9 @@ def create_requirements_json( current working environment. Then the package and version are written to a requirements.json file. - WARNING: The methods utilized in this function can determine package + WARNING: The methods used in this function can determine package dependencies from provided scripts and pickle files, but CANNOT determine the - required package versions without being in the development environment which + required package versions without being in the development environment, in which they were originally created. This function works best when run in the model development environment and is @@ -1635,8 +1636,12 @@ def create_requirements_json( the requirements.json file's package versions to match the model development environment. - When provided with an output_path argument, this function outputs a JSON file - named "requirements.json". Otherwise, a list of dicts is returned. + When provided with an output_path argument, this function writes a JSON file + named "requirements.json". If create_requirements_txt is True, it will also + create a requirements.txt file. Otherwise, a list of dicts is returned. + + Note: The requirements.txt file is created only when the both output_path and + create_requirements_txt are specified. Parameters ---------- @@ -1644,11 +1649,15 @@ def create_requirements_json( The path to a Python project, by default the current working directory. output_path : str or pathlib.Path, optional The path for the output requirements.json file. The default value is None. + create_requirements_txt : bool, optional + Whether to also create a requirements.txt file in addition to the + requirements.json file. This is useful for SAS Event Stream Processing + environments. The default value is False. Returns ------- list of dict - List of dictionary representations of the json file contents, split into + List of dictionary representations of the JSON file contents, split into each package and/or warning. """ pickle_packages = [] @@ -1662,11 +1671,34 @@ def create_requirements_json( package_list = list(set(list(_flatten(package_list)))) package_list = cls.remove_standard_library_packages(package_list) package_and_version = cls.get_local_package_version(package_list) + # Identify packages with missing versions missing_package_versions = [ item[0] for item in package_and_version if not item[1] ] + # Map import names to their corresponding package installation names + package_and_version = [ + (IMPORT_TO_INSTALL_MAPPING.get(name, name), version) + for name, version in package_and_version + ] + + if create_requirements_txt: + requirements_txt = "" + if missing_package_versions: + requirements_txt += "# Warning- The existence and/or versions for the following packages could not be determined:\n" + requirements_txt += "# " + ", ".join(missing_package_versions) + "\n" + + for package, version in package_and_version: + if version: + requirements_txt += f"{package}=={version}\n" + + if output_path: + with open( # skipcq: PTC-W6004 + Path(output_path) / "requirements.txt", "w" + ) as file: + file.write(requirements_txt) + # Create a list of dicts related to each package or warning json_dicts = [] if missing_package_versions: @@ -1757,7 +1789,7 @@ def get_code_dependencies( Get the package dependencies for all Python scripts in the provided directory path. - Note that currently this functionality only works for .py files. + Note that currently this functionality works only for .py files. Parameters ---------- @@ -1800,16 +1832,16 @@ def find_imports(file_path: Union[str, Path]) -> List[str]: file_text = file.read() # Parse the file to get the abstract syntax tree representation tree = ast.parse(file_text) - modules = [] + modules = set() # Walk through each node in the ast to find import calls for node in ast.walk(tree): # Determine parent module for `from * import *` calls if isinstance(node, ast.ImportFrom): - modules.append(node.module) + modules.add(node.module.split(".")[0]) elif isinstance(node, ast.Import): for name in node.names: - modules.append(name.name) + modules.add(name.name.split(".")[0]) modules = list(set(modules)) try: @@ -1902,7 +1934,7 @@ def get_package_names(stream: Union[bytes, str]) -> List[str]: # Convert to a pandas dataframe for ease of conditional filtering df_pickle = pd.DataFrame({"opcode": opcode, "arg": arg, "pos": pos}) - # For all opcodes labelled GLOBAL or STACK_GLOBAL pull out the package names + # For all opcodes labeled GLOBAL or STACK_GLOBAL pull out the package names global_stack = df_pickle[ (df_pickle.opcode == "GLOBAL") | (df_pickle.opcode == "STACK_GLOBAL") ] @@ -1912,12 +1944,12 @@ def get_package_names(stream: Union[bytes, str]) -> List[str]: global_stack.arg.str.split().str[0].str.split(".").str[0].unique().tolist() ) - # For all opcodes labelled BINUNICODE or SHORT_BINUNICODE grab the package names + # For all opcodes labeled BINUNICODE or SHORT_BINUNICODE grab the package names binunicode = df_pickle[ (df_pickle.opcode == "BINUNICODE") | (df_pickle.opcode == "SHORT_BINUNICODE") ] - # From the argument column, split the string by `.`, then return only unique + # From the argument column, split the string by `.`, and then return only unique # cells with at least one split arg_binunicode = binunicode.arg.str.split(".") unicode_packages = ( @@ -2364,7 +2396,7 @@ def generate_model_card( target_value=target_value, ) - # Formats all new ModelProperties information into one dictionary that can be used to update the json file + # Formats all new ModelProperties information into one dictionary that can be used to update the JSON file update_dict["trainTable"] = training_table update_dict["selectionStatistic"] = selection_statistic update_dict["algorithm"] = algorithm @@ -2465,7 +2497,7 @@ def generate_outcome_average( Returns ------- dict - Returns a dictionary with a key value pair that represents the outcome average. + Returns a dictionary with a key-value pair that represents the outcome average. """ import numbers @@ -2508,7 +2540,7 @@ def get_selection_statistic_value( Returns ------- float - Returns the numerical value assoicated with the chosen selection statistic. + Returns the numerical value associated with the chosen selection statistic. """ if isinstance(model_files, dict): if FITSTAT not in model_files: @@ -2523,7 +2555,7 @@ def get_selection_statistic_value( or fitstat["dataMap"][selection_statistic] == None ): raise RuntimeError( - "The chosen selection statistic was not generated properly. Please ensure the value has been " + "The chosen selection statistic was not generated properly. Please ensure that the value has been " "properly created then try again." ) return fitstat["dataMap"][selection_statistic] @@ -2542,7 +2574,7 @@ def get_selection_statistic_value( or fitstat["dataMap"][selection_statistic] == None ): raise RuntimeError( - "The chosen selection statistic was not generated properly. Please ensure the value has been " + "The chosen selection statistic was not generated properly. Please ensure that the value has been " "properly created then try again." ) return fitstat["dataMap"][selection_statistic] diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index afe7e468..0bb5dc9c 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -766,10 +766,16 @@ def impute_missing_values(data): """ impute_values = \\\n + {"var1": 0, "var2": "", "var3": 125.3} """ - self.score_code += f"\n{'':4}return data.replace(' .', np.nan).fillna(impute_values).apply(pd.to_numeric, errors='ignore')\n" - """ - - return data.replace(' .', np.nan).fillna(impute_values).apply(pd.to_numeric, errors='ignore') + self.score_code += ( + f"\n\n{'':4}# Specify downcasting behavior for pandas 2.x to avoid warnings\n" + + f"{'':4}if int(pd.__version__.split('.')[0]) == 2:\n{'':8}pd.set_option('future.no_silent_downcasting', True)\n" + + f"{'':4}return data.replace(r'^\\s*\\.$', np.nan, regex=True).fillna(impute_values).infer_objects()\n" + ) + """ + # Specify downcasting behavior for pandas 2.x to avoid warnings + if int(pd.__version__.split('.')[0]) == 2: + pd.set_option('future.no_silent_downcasting', True) + return data.replace(r'^\s*\.$', np.nan, regex=True).fillna(impute_values).infer_objects() """ # TODO: Needs unit test diff --git a/src/sasctl/tasks.py b/src/sasctl/tasks.py index d466c10f..ca659630 100644 --- a/src/sasctl/tasks.py +++ b/src/sasctl/tasks.py @@ -15,6 +15,7 @@ from pathlib import Path from typing import Union from warnings import warn +import zipfile import pandas as pd @@ -264,10 +265,9 @@ def _register_sas_model( out_var = [] in_var = [] import copy - import zipfile as zp zip_file_copy = copy.deepcopy(zip_file) - tmp_zip = zp.ZipFile(zip_file_copy) + tmp_zip = zipfile.ZipFile(zip_file_copy) if "outputVar.json" in tmp_zip.namelist(): out_var = json.loads( tmp_zip.read("outputVar.json").decode("utf=8") @@ -327,8 +327,8 @@ def _register_sas_model( if current_session().version_info() < 4: # Upload the model as a ZIP file if using Viya 3. - zipfile = utils.create_package(model, input=input) - model = mr.import_model_from_zip(name, project, zipfile, version=version) + zip_file = utils.create_package(model, input=input) + model = mr.import_model_from_zip(name, project, zip_file, version=version) else: # If using Viya 4, just upload the raw AStore and Model Manager will handle inspection. astore = cas.astore.download(rstore=model) @@ -981,6 +981,7 @@ def score_model_with_cas( library_name: str = "Public", model_version: str = "latest", use_cas_gateway: bool = False, + timeout: int = 300, ): score_definition = sd.create_score_definition( score_def_name, @@ -994,7 +995,85 @@ def score_model_with_cas( use_cas_gateway=use_cas_gateway, ) score_execution = se.create_score_execution(score_definition.id) - score_execution_poll = se.poll_score_execution_state(score_execution) + score_execution_poll = se.poll_score_execution_state(score_execution, timeout) print(score_execution_poll) score_results = se.get_score_execution_results(score_execution, use_cas_gateway) return score_results + + +def upload_local_model( + path: Union[str, Path], + model_name: str, + project_name: str, + repo_name: Union[str, dict] = None, + version: str = "latest", +): + """A function to upload a model and any associated files to the model repository. + Parameters + ---------- + path : Union[str, Path] + The path to the model and any associated files. + model_name : str + The name of the model. + project_name : str + The name of the project to which the model will be uploaded. + repo_name : Union[str, dict], optional + repository in which to create the project + version: str, optional + The version of the model being uploaded. Defaults to 'latest'. For new model version, use 'new'. + """ + # Use default repository if not specified + try: + if repo_name is None: + repository = mr.default_repository() + else: + repository = mr.get_repository(repo_name) + except HTTPError as e: + if e.code == 403: + raise AuthorizationError( + "Unable to register model. User account does not have read permissions " + "for the /modelRepository/repositories/ URL. Please contact your SAS " + "Viya administrator." + ) + raise e + + # Unable to find or create the repo. + if not repository and not repo_name: + raise ValueError("Unable to find a default repository") + if not repository: + raise ValueError(f"Unable to find repository '{repo_name}'") + + # Get project from repo if it exists; if it doesn't, create a new one + p = mr.get_project(project_name) + if p is None: + p = mr.create_project(project_name, repository) + + # zip up all files in directory (except any previous zip files) + zip_name = str(Path(path) / (model_name + ".zip")) + file_names = sorted(Path(path).glob("*[!(zip|sasast)]")) + sasast_file = next(Path(path).glob("*.sasast"), None) + if sasast_file: + # If a sasast file is present, upload it as well + with open(sasast_file, "rb") as sasast: + sasast_model = sasast.read() + data = { + "name": model_name, + "projectId": p.id, + "type": "ASTORE", + "versionOption": version, + } + files = {"files": (sasast_file.name, sasast_model)} + model = mr.post("/models", files=files, data=data) + for file in file_names: + with open(file, "r") as f: + mr.add_model_content(model, f, file.name) + else: + with zipfile.ZipFile(str(zip_name), mode="w") as zFile: + for file in file_names: + zFile.write(str(file), arcname=file.name) + # upload zipped model + with open(zip_name, "rb") as zip_file: + model = mr.import_model_from_zip( + model_name, project_name, zip_file, version=version + ) + return model diff --git a/src/sasctl/utils/misc.py b/src/sasctl/utils/misc.py index b2a33658..5c8536a5 100644 --- a/src/sasctl/utils/misc.py +++ b/src/sasctl/utils/misc.py @@ -6,9 +6,34 @@ import random import string +import warnings from .decorators import versionadded +# Mapping of Python import names to their PyPI installation names +IMPORT_TO_INSTALL_MAPPING = { + # Data Science & ML Core + "sklearn": "scikit-learn", + "skimage": "scikit-image", + "cv2": "opencv-python", + "PIL": "Pillow", + # Data Formats & Parsing + "yaml": "PyYAML", + "bs4": "beautifulsoup4", + "docx": "python-docx", + "pptx": "python-pptx", + # Date & Time Utilities + "dateutil": "python-dateutil", + # Database Connectors + "MySQLdb": "MySQL-python", + "psycopg2": "psycopg2-binary", + # System & Platform + "win32api": "pywin32", + "win32com": "pywin32", + # Scientific Libraries + "Bio": "biopython", +} + def installed_packages(): """List Python packages installed in the current environment. @@ -18,10 +43,10 @@ def installed_packages(): Notes ----- - Uses pip freeze functionality so pip module must be present. For pip + Uses pip freeze functionality, so pip module must be present. For pip versions >=20.1, this functionality fails to provide versions for some - conda installed, locally installed, and url installed packages. Instead - uses the pkg_resources package which is typically bundled with pip. + conda installed, locally installed, and url installed packages. Instead, + uses the importlib package, which is typically bundled with python. """ from packaging import version @@ -30,14 +55,14 @@ def installed_packages(): import pip if version.parse(pip.__version__) >= version.parse("20.1"): - import pkg_resources + from importlib.metadata import distributions - return [ - p.project_name + "==" + p.version for p in pkg_resources.working_set - ] + output = [p.name + "==" + p.version for p in distributions()] + return output else: from pip._internal.operations import freeze except ImportError: + try: from pip.operations import freeze except ImportError: @@ -49,7 +74,7 @@ def installed_packages(): @versionadded(version="1.5.1") def random_string(length): - """Generates a random alpha-numeric string of a given length. + """Generates a random alphanumeric string of a given length. Parameters ---------- @@ -62,7 +87,7 @@ def random_string(length): """ - # random.choices() wasn't added until Python 3.6, so repeatedly call .choice() instead + # random.choices() was not added until Python 3.6, so repeatedly call .choice() instead chars = string.ascii_letters + string.digits return "".join(random.choice(chars) for _ in range(length)) @@ -70,7 +95,7 @@ def random_string(length): @versionadded(version="1.9.0") def check_if_jupyter() -> bool: """ - Check if the code is being executed from a jupyter notebook. + Check if the code is being executed from a Jupyter notebook. Source: https://stackoverflow.com/questions/47211324/check-if-module-is-running-in- jupyter-or-not @@ -78,7 +103,7 @@ def check_if_jupyter() -> bool: Returns ------- bool - True if a jupyter notebook is detected. False otherwise. + True if a Jupyter notebook is detected. False otherwise. """ try: shell = get_ipython().__class__.__name__ diff --git a/tests/integration/test_pymas.py b/tests/integration/test_pymas.py index 7973211b..aef90971 100644 --- a/tests/integration/test_pymas.py +++ b/tests/integration/test_pymas.py @@ -10,7 +10,6 @@ import pytest - pytest.skip( "PyMAS functionality is deprecated and will be removed in a future release.", allow_module_level=True, @@ -241,9 +240,7 @@ def test_from_pickle(train_data, pickle_file): end; endpackage; -""".lstrip( - "\n" - ) +""".lstrip("\n") assert isinstance(p, PyMAS) @@ -343,9 +340,7 @@ def hello_world(): end; endpackage; -""".lstrip( - "\n" - ) +""".lstrip("\n") f = tmpdir.join("model.py") f.write(code) diff --git a/tests/unit/test_code_file.py b/tests/unit/test_code_file.py new file mode 100644 index 00000000..112fc7dc --- /dev/null +++ b/tests/unit/test_code_file.py @@ -0,0 +1,517 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright © 2026, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from unittest import mock +import pytest +import tempfile +from pathlib import Path + +from sasctl.pzmm import CodeFile + + +class TestValidateCodeFormatViaAPI: + """Tests for _validate_code_format_via_api method.""" + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_success(self, mock_post): + """Test successful code validation via API.""" + mock_post.return_value = {"valid": True} + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + # Should not raise any exception + CodeFile._validate_code_format_via_api(code) + + mock_post.assert_called_once_with( + "/commons/validations/codeFiles", + json={"content": code, "type": "decisionPythonFile"}, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_with_error_message(self, mock_post): + """Test validation failure with error message.""" + mock_post.return_value = { + "valid": False, + "error": { + "message": "Output docstring must be the first line in execute function" + }, + } + + code = """ +def execute(): + result = 'test' + 'Output:result' + return result +""" + with pytest.raises(ValueError, match="Output docstring must be the first line"): + CodeFile._validate_code_format_via_api(code) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_with_error_no_message(self, mock_post): + """Test validation failure with error but no message.""" + mock_post.return_value = {"valid": False, "error": "Validation failed"} + + code = "invalid code" + + with pytest.raises(ValueError, match="Validation failed"): + CodeFile._validate_code_format_via_api(code) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_api_exception(self, mock_post): + """Test handling of API exceptions during validation.""" + mock_post.side_effect = RuntimeError("API connection failed") + + code = "def execute():\n return 1" + + with pytest.raises( + ValueError, match="Code validation failed: API connection failed" + ): + CodeFile._validate_code_format_via_api(code) + + +class TestFindFileInFolder: + """Tests for _find_file_in_folder method.""" + + @mock.patch("sasctl.services.folders.get") + def test_find_file_in_folder_found(self, mock_get): + """Test finding an existing file in a folder.""" + mock_get.return_value = { + "uri": "files/files/acde070d-8c4c-4f0d-9d8a-162843c10333" + } + + result = CodeFile._find_file_in_folder("folder-456", "test.py") + + assert result is not None + assert result == mock_get.return_value + mock_get.assert_called_once_with( + "/folders/folder-456/members", + params={"filter": "and(eq(name, 'test.py'), eq(contentType, 'file'))"}, + ) + + @mock.patch("sasctl.services.folders.get") + def test_find_file_in_folder_not_found(self, mock_get): + """Test when file is not found in folder.""" + mock_response = mock.MagicMock() + mock_response.__len__ = mock.MagicMock(return_value=0) + mock_get.return_value = mock_response + + result = CodeFile._find_file_in_folder("folder-456", "nonexistent.py") + + assert result is None + + @mock.patch("sasctl.services.folders.get") + def test_find_file_in_folder_no_uri(self, mock_get): + """Test when response has no URI.""" + mock_get.return_value = {"id": "unique-id"} + + result = CodeFile._find_file_in_folder("folder-456", "test.py") + + assert result is None + + +class TestLoadPythonCode: + """Tests for _load_python_code method.""" + + def test_load_python_code_from_string(self): + """Test loading code from a string.""" + code = "def execute():\n return 'test'" + result = CodeFile._load_python_code(code) + assert result == code + + def test_load_python_code_from_file(self): + """Test loading code from a file path.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def execute():\n return 'test'") + temp_path = Path(f.name) + + try: + result = CodeFile._load_python_code(temp_path) + assert result == "def execute():\n return 'test'" + finally: + temp_path.unlink() + + def test_load_python_code_from_string_path(self): + """Test loading code from a string path.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def test():\n pass") + temp_path = f.name + + try: + result = CodeFile._load_python_code(temp_path) + assert result == "def test():\n pass" + finally: + Path(temp_path).unlink() + + def test_load_python_code_empty_string(self): + """Test that empty string raises ValueError.""" + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code("") + + def test_load_python_code_whitespace_only(self): + """Test that whitespace-only string raises ValueError.""" + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code(" \n\t ") + + def test_load_python_code_file_not_found(self): + """Test that non-existent file raises ValueError.""" + with pytest.raises(ValueError, match="Code file not found"): + CodeFile._load_python_code(Path("/nonexistent/path/to/file.py")) + + def test_load_python_code_empty_file(self): + """Test that empty file raises ValueError.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + temp_path = Path(f.name) + + try: + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code(temp_path) + finally: + temp_path.unlink() + + def test_load_python_code_whitespace_only_file(self): + """Test that file with only whitespace raises ValueError.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write(" \n\n\t ") + temp_path = Path(f.name) + + try: + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code(temp_path) + finally: + temp_path.unlink() + + def test_load_python_code_invalid_path_string(self): + """Test that invalid path string is treated as raw code.""" + # A string that looks like it could be a path but is actually invalid + code = "/some/path/that/does/not/exist.py but is actually code" + result = CodeFile._load_python_code(code) + assert result == code + + +class TestWriteIDCodeFile: + """Tests for write_id_code_file method.""" + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_success( + self, mock_find_file, mock_get_folder, mock_create_file, mock_post + ): + """Test successful upload of a code file to Viya.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_file_obj.name = "test_code.py" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_code_file.name = "test_code.py" + mock_code_file.id = "cf-12345" + mock_post.return_value = mock_code_file + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + + result = CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + assert mock_create_file.called + assert mock_post.called + assert result.name == "test_code.py" + + # Verify post was called with correct data + mock_post.assert_called_once_with( + "/codeFiles", + json={ + "name": "test_code.py", + "fileUri": "/files/files/12345", + "type": "decisionPythonFile", + }, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile._validate_code_format_via_api") + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_with_validation( + self, + mock_find_file, + mock_get_folder, + mock_create_file, + mock_post, + mock_validate, + ): + """Test upload with validation enabled.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_post.return_value = mock_code_file + + code = "def execute():\n return 'test'" + + result = CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=True, + ) + + # Verify validation was called + mock_validate.assert_called_once_with(code) + assert result == mock_code_file + + def test_write_id_code_file_invalid_filename(self): + """Test that invalid file names are rejected.""" + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 42 +""" + + with pytest.raises(ValueError, match="file_name must end with .py"): + CodeFile.write_id_code_file( + code=code, file_name="test_code.txt", folder="/Public/TestFolder" + ) + + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_already_exists(self, mock_find_file, mock_get_folder): + """Test that uploading a file that already exists raises error.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + + mock_existing_file = mock.MagicMock() + mock_existing_file.id = "existing-file-id" + mock_existing_file.name = "duplicate.py" + mock_find_file.return_value = mock_existing_file + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + + with pytest.raises( + ValueError, match="File 'duplicate.py' already exists in this folder" + ): + CodeFile.write_id_code_file( + code=code, + file_name="duplicate.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + @mock.patch("sasctl.services.folders.get_folder") + def test_write_id_code_file_folder_not_found(self, mock_get_folder): + """Test that referencing a non-existent folder raises error.""" + mock_get_folder.return_value = None + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + + with pytest.raises(ValueError, match="Folder '/NonExistent' not found"): + CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/NonExistent", + validate_code=False, + ) + + def test_write_id_code_file_empty_code(self): + """Test that empty code raises error.""" + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile.write_id_code_file( + code="", + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_from_path( + self, mock_find_file, mock_get_folder, mock_create_file, mock_post + ): + """Test uploading code from a file path.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def execute():\n return 'test'") + temp_path = Path(f.name) + + try: + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_post.return_value = mock_code_file + + result = CodeFile.write_id_code_file( + code=temp_path, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + assert result == mock_code_file + mock_create_file.assert_called_once() + finally: + temp_path.unlink() + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.delete_file") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_post_fails_cleanup_success( + self, + mock_find_file, + mock_get_folder, + mock_create_file, + mock_delete_file, + mock_post, + ): + """Test that file is cleaned up when post fails.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_file_obj.__getitem__ = mock.MagicMock(return_value="12345") + mock_create_file.return_value = mock_file_obj + + mock_post.side_effect = RuntimeError("API error") + + code = "def execute():\n return 'test'" + + with pytest.raises( + RuntimeError, + match="There was an error with creating the code file: API error", + ): + CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + # Verify cleanup was attempted + mock_delete_file.assert_called_once_with({"id": "12345"}) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.delete_file") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_post_fails_cleanup_fails( + self, + mock_find_file, + mock_get_folder, + mock_create_file, + mock_delete_file, + mock_post, + ): + """Test error handling when both post and cleanup fail.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_file_obj.__getitem__ = mock.MagicMock(return_value="12345") + mock_create_file.return_value = mock_file_obj + + mock_post.side_effect = RuntimeError("API error") + mock_delete_file.side_effect = RuntimeError("Delete failed") + + code = "def execute():\n return 'test'" + + with pytest.raises(RuntimeError): + CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_with_folder_object( + self, mock_find_file, mock_get_folder, mock_create_file, mock_post + ): + """Test uploading with folder object instead of path.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_post.return_value = mock_code_file + + code = "def execute():\n return 'test'" + folder_dict = {"id": "folder-123", "name": "TestFolder"} + + result = CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder=folder_dict, + validate_code=False, + ) + + assert result == mock_code_file + mock_get_folder.assert_called_once_with(folder_dict) diff --git a/tests/unit/test_misc_utils.py b/tests/unit/test_misc_utils.py index 939e6ef6..622b2ba1 100644 --- a/tests/unit/test_misc_utils.py +++ b/tests/unit/test_misc_utils.py @@ -14,7 +14,8 @@ def test_list_packages(): packages = installed_packages() # We know that these packages should always be present - assert any(re.match("requests==.*", p) for p in packages) + assert packages is not None + assert any( re.match("sasctl.*", p) for p in packages ) # sasctl may be installed from disk so no '==' diff --git a/tests/unit/test_model_management.py b/tests/unit/test_model_management.py index fbd4fc36..834b0ecc 100644 --- a/tests/unit/test_model_management.py +++ b/tests/unit/test_model_management.py @@ -23,6 +23,8 @@ def test_create_performance_definition(): RestObj({"name": "Test Model 2", "id": "67890", "projectId": PROJECT["id"]}), ] USER = "username" + VERSION_MOCK = {"modelVersionName": "1.0"} + VERSION_MOCK_NONAME = {} with mock.patch("sasctl.core.Session._get_authorization_token"): current_session("example.com", USER, "password") @@ -111,6 +113,32 @@ def test_create_performance_definition(): table_prefix="TestData", ) + with pytest.raises(ValueError): + # Model verions exceeds models + get_model.side_effect = copy.deepcopy(MODELS) + _ = mm.create_performance_definition( + models=["model1", "model2"], + modelVersions=["1.0", "2.0", "3.0"], + library_name="TestLibrary", + table_prefix="TestData", + max_bins=3, + monitor_challenger=True, + monitor_champion=True, + ) + + with pytest.raises(ValueError): + # Model version dictionary missing modelVersionName + get_model.side_effect = copy.deepcopy(MODELS) + _ = mm.create_performance_definition( + models=["model1", "model2"], + modelVersions=VERSION_MOCK_NONAME, + library_name="TestLibrary", + table_prefix="TestData", + max_bins=3, + monitor_challenger=True, + monitor_champion=True, + ) + get_project.return_value = copy.deepcopy(PROJECT) get_project.return_value["targetVariable"] = "target" get_project.return_value["targetLevel"] = "interval" @@ -125,21 +153,68 @@ def test_create_performance_definition(): monitor_challenger=True, monitor_champion=True, ) + url, data = post_models.call_args + assert post_models.call_count == 1 + assert PROJECT["id"] == data["json"]["projectId"] + assert MODELS[0]["id"] in data["json"]["modelIds"] + assert MODELS[1]["id"] in data["json"]["modelIds"] + assert "TestLibrary" == data["json"]["dataLibrary"] + assert "TestData" == data["json"]["dataPrefix"] + assert "cas-shared-default" == data["json"]["casServerId"] + assert data["json"]["name"] + assert data["json"]["description"] + assert data["json"]["maxBins"] == 3 + assert data["json"]["championMonitored"] is True + assert data["json"]["challengerMonitored"] is True - assert post_models.call_count == 1 - url, data = post_models.call_args - - assert PROJECT["id"] == data["json"]["projectId"] - assert MODELS[0]["id"] in data["json"]["modelIds"] - assert MODELS[1]["id"] in data["json"]["modelIds"] - assert "TestLibrary" == data["json"]["dataLibrary"] - assert "TestData" == data["json"]["dataPrefix"] - assert "cas-shared-default" == data["json"]["casServerId"] - assert data["json"]["name"] - assert data["json"]["description"] - assert data["json"]["maxBins"] == 3 - assert data["json"]["championMonitored"] is True - assert data["json"]["challengerMonitored"] is True + get_model.side_effect = copy.deepcopy(MODELS) + _ = mm.create_performance_definition( + # One model version as a string name + models=["model1", "model2"], + modelVersions="1.0", + library_name="TestLibrary", + table_prefix="TestData", + max_bins=3, + monitor_challenger=True, + monitor_champion=True, + ) + + assert post_models.call_count == 2 + url, data = post_models.call_args + assert f"{MODELS[0]['id']}:1.0" in data["json"]["modelIds"] + assert MODELS[1]["id"] in data["json"]["modelIds"] + + get_model.side_effect = copy.deepcopy(MODELS) + # List of string type model versions + _ = mm.create_performance_definition( + models=["model1", "model2"], + modelVersions=["1.0", "2.0"], + library_name="TestLibrary", + table_prefix="TestData", + max_bins=3, + monitor_challenger=True, + monitor_champion=True, + ) + assert post_models.call_count == 3 + url, data = post_models.call_args + assert f"{MODELS[0]['id']}:1.0" in data["json"]["modelIds"] + assert f"{MODELS[1]['id']}:2.0" in data["json"]["modelIds"] + + get_model.side_effect = copy.deepcopy(MODELS) + # List of dictionary type and string type model versions + _ = mm.create_performance_definition( + models=["model1", "model2"], + modelVersions=[VERSION_MOCK, "2.0"], + library_name="TestLibrary", + table_prefix="TestData", + max_bins=3, + monitor_challenger=True, + monitor_champion=True, + ) + assert post_models.call_count == 4 + url, data = post_models.call_args + assert f"{MODELS[0]['id']}:1.0" in data["json"]["modelIds"] + assert f"{MODELS[1]['id']}:2.0" in data["json"]["modelIds"] with mock.patch( "sasctl._services.model_management.ModelManagement" ".post" @@ -160,20 +235,39 @@ def test_create_performance_definition(): monitor_champion=True, ) - assert post_project.call_count == 1 - url, data = post_project.call_args - - assert PROJECT["id"] == data["json"]["projectId"] - assert MODELS[0]["id"] in data["json"]["modelIds"] - assert MODELS[1]["id"] in data["json"]["modelIds"] - assert "TestLibrary" == data["json"]["dataLibrary"] - assert "TestData" == data["json"]["dataPrefix"] - assert "cas-shared-default" == data["json"]["casServerId"] - assert data["json"]["name"] - assert data["json"]["description"] - assert data["json"]["maxBins"] == 3 - assert data["json"]["championMonitored"] is True - assert data["json"]["challengerMonitored"] is True + # one extra test for project with version id + + assert post_project.call_count == 1 + url, data = post_project.call_args + + assert PROJECT["id"] == data["json"]["projectId"] + assert MODELS[0]["id"] in data["json"]["modelIds"] + assert MODELS[1]["id"] in data["json"]["modelIds"] + assert "TestLibrary" == data["json"]["dataLibrary"] + assert "TestData" == data["json"]["dataPrefix"] + assert "cas-shared-default" == data["json"]["casServerId"] + assert data["json"]["name"] + assert data["json"]["description"] + assert data["json"]["maxBins"] == 3 + assert data["json"]["championMonitored"] is True + assert data["json"]["challengerMonitored"] is True + + get_model.side_effect = copy.deepcopy(MODELS) + # Project with model version + _ = mm.create_performance_definition( + project="project", + modelVersions="2.0", + library_name="TestLibrary", + table_prefix="TestData", + max_bins=3, + monitor_challenger=True, + monitor_champion=True, + ) + + assert post_project.call_count == 2 + url, data = post_project.call_args + assert f"{MODELS[0]['id']}:2.0" in data["json"]["modelIds"] + assert MODELS[1]["id"] in data["json"]["modelIds"] def test_table_prefix_format(): with pytest.raises(ValueError): diff --git a/tests/unit/test_model_repository.py b/tests/unit/test_model_repository.py index 9232896b..bf4f9284 100644 --- a/tests/unit/test_model_repository.py +++ b/tests/unit/test_model_repository.py @@ -13,6 +13,10 @@ from sasctl import current_session from sasctl.services import model_repository as mr +from sasctl.core import RestObj, VersionInfo, request +from requests import HTTPError +import urllib.error + def test_create_model(): MODEL_NAME = "Test Model" @@ -230,3 +234,343 @@ def test_add_model_content(): assert post.call_args[1]["files"] == { "files": ("test.pkl", binary_data, "application/image") } + + +def test_create_model_version(): + model_mock = {"id": 12345} + new_model_mock = {"id": 34567} + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model", + side_effect=[ + model_mock, + model_mock, + new_model_mock, + model_mock, + new_model_mock, + ], + ) as get_model: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model_link" + ) as get_model_link: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request_link" + ) as request_link: + get_model_link_mock = { + "method": "GET", + "rel": "modelHistory", + "href": "/modelRepository/models/12345/history", + "uri": "/modelRepository/models/12345/history", + "type": "application/vnd.sas.collection", + "responseItemType": "application/vnd.sas.models.model.version", + } + + get_model_link.return_value = None + with pytest.raises(ValueError): + mr.create_model_version(model=model_mock, minor=False) + + get_model_link.return_value = get_model_link_mock + response = mr.create_model_version(model=model_mock, minor=False) + + request_link.assert_called_with( + model_mock, "addModelVersion", json={"option": "major"} + ) + assert response == new_model_mock + + response = mr.create_model_version(model=model_mock, minor=True) + request_link.assert_called_with( + model_mock, "addModelVersion", json={"option": "minor"} + ) + assert response == new_model_mock + + +def test_list_model_versions(): + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model_link" + ) as get_model_link: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request_link" + ) as request_link: + with mock.patch("sasctl.core.Session.version_info") as version: + version.return_value = VersionInfo(4) + get_model_link.return_value = None + with pytest.raises(ValueError): + mr.list_model_versions( + model="12345", + ) + + get_model_link_mock = { + "method": "GET", + "rel": "modelHistory", + "href": "/modelRepository/models/12345/history", + "uri": "/modelRepository/models/12345/history", + "type": "application/vnd.sas.collection", + "responseItemType": "application/vnd.sas.models.model.version", + } + + get_model_link.return_value = get_model_link_mock + + response = mr.list_model_versions(model="12345") + assert response + + request_link.return_value = RestObj({"id": "12345"}) + response = mr.list_model_versions(model="12345") + assert isinstance(response, list) + + request_link.return_value = [ + RestObj({"id": "12345"}), + RestObj({"id": "3456"}), + ] + response = mr.list_model_versions(model="12345") + assert isinstance(response, list) + + +def test_get_model_version(): + with mock.patch( + "sasctl._services.model_repository.ModelRepository.list_model_versions" + ) as list_model_versions: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request_link" + ) as request_link: + + list_model_versions_mock = [ + { + "id": "123", + "links": [ + { + "method": "GET", + "rel": "self", + "href": "/modelRepository/models/abc/history/123", + "uri": "/modelRepository/models/abc/history/123", + "type": "demo", + } + ], + }, + {"id": "345", "links": []}, + ] + + list_model_versions.return_value = list_model_versions_mock + + with pytest.raises(ValueError): + mr.get_model_version(model="000", version_id="000") + + response = mr.get_model_version(model="000", version_id="123") + request_link.assert_called_once_with( + list_model_versions_mock[0], + "self", + headers={"Accept": "application/vnd.sas.models.model.version+json"}, + ) + + +def test_get_model_with_versions(): + with mock.patch( + "sasctl._services.model_repository.ModelRepository.is_uuid" + ) as is_uuid: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model" + ) as get_model: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request" + ) as request: + + is_uuid.return_value = True + response = mr.get_model_with_versions(model="12345") + assert response + + is_uuid.return_value = False + get_model.return_value = None + response = mr.get_model_with_versions(model={"id": "12345"}) + assert response + + is_uuid.return_value = False + get_model.return_value = None + with pytest.raises(HTTPError): + mr.get_model_with_versions(model=RestObj) + + is_uuid.return_value = False + get_model.return_value = RestObj({"id": "123456"}) + request.side_effect = urllib.error.HTTPError( + url="http://demo.sas.com", + code=404, + msg="Not Found", + hdrs=None, + fp=None, + ) + with pytest.raises(HTTPError): + mr.get_model_with_versions(model=RestObj) + + request.side_effect = None + request.return_value = RestObj({"id": "12345"}) + response = mr.get_model_with_versions(model=RestObj) + assert isinstance(response, list) + + request.return_value = [ + RestObj({"id": "12345"}), + RestObj({"id": "3456"}), + ] + response = mr.get_model_with_versions(model=RestObj) + assert isinstance(response, list) + + request.assert_any_call( + "GET", + "/models/123456/versions", + headers={"Accept": "application/vnd.sas.collection+json"}, + ) + + request.assert_any_call( + "GET", + "/models/12345/versions", + headers={"Accept": "application/vnd.sas.collection+json"}, + ) + + +def test_get_model_or_version(): + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model_with_versions" + ) as get_model_with_versions: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request_link" + ) as request_link: + + get_model_with_versions_mock = [ + { + "id": "123", + "links": [ + { + "method": "GET", + "rel": "self", + "href": "/modelRepository/models/abc/history/123", + "uri": "/modelRepository/models/abc/history/123", + "type": "demo", + } + ], + }, + {"id": "345", "links": []}, + ] + + get_model_with_versions.return_value = [] + with pytest.raises(ValueError): + mr.get_model_or_version(model="000", version_id="000") + + get_model_with_versions.return_value = get_model_with_versions_mock + with pytest.raises(ValueError): + mr.get_model_or_version(model="000", version_id="000") + + response = mr.get_model_or_version(model="000", version_id="123") + request_link.assert_called_once_with( + get_model_with_versions_mock[0], + "self", + headers={ + "Accept": "application/vnd.sas.models.model.version+json, application/vnd.sas.models.model+json" + }, + ) + + +def test_get_model_version_contents(): + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model_version" + ) as get_model_version: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request_link" + ) as request_link: + + get_model_version.return_value = {"id": "000"} + request_link.return_value = RestObj({"id": "12345"}) + response = mr.get_model_version_contents(model="12345", version_id="3456") + assert isinstance(response, list) + + request_link.return_value = [ + RestObj({"id": "12345"}), + RestObj({"id": "3456"}), + ] + response = mr.get_model_version_contents(model="12345", version_id="3456") + assert isinstance(response, list) + + request_link.assert_any_call( + {"id": "000"}, + "contents", + headers={"Accept": "application/vnd.sas.collection+json"}, + ) + + +def test_get_model_version_content_metadata(): + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model_version_contents" + ) as get_model_version_contents: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request_link" + ) as request_link: + + get_model_with_metadata_mock = [ + { + "id": "123", + "links": [ + { + "method": "GET", + "rel": "self", + "href": "/modelRepository/models/abc/history/123", + "uri": "/modelRepository/models/abc/history/123", + "type": "demo", + } + ], + }, + {"id": "345", "links": []}, + ] + + get_model_version_contents.return_value = [] + with pytest.raises(ValueError): + mr.get_model_version_content_metadata( + model="000", version_id="123", content_id="000" + ) + + get_model_version_contents.return_value = get_model_with_metadata_mock + with pytest.raises(ValueError): + mr.get_model_version_content_metadata( + model="abc", version_id="123", content_id="000" + ) + + response = mr.get_model_version_content_metadata( + model="abc", version_id="123", content_id="345" + ) + assert response + request_link.assert_called_once_with( + get_model_with_metadata_mock[1], + "self", + headers={"Accept": "application/vnd.sas.models.model.content+json"}, + ) + + +def test_get_model_version_content(): + with mock.patch( + "sasctl._services.model_repository.ModelRepository.get_model_version_content_metadata" + ) as get_model_version_content_metadata: + with mock.patch( + "sasctl._services.model_repository.ModelRepository.request_link" + ) as request_link: + + get_model_version_content_metadata.return_value = {"id": 000} + request_link.return_value = None + with pytest.raises(HTTPError): + mr.get_model_version_content( + model="abc", version_id="123", content_id="345" + ) + + request_link.return_value = RestObj({"id": "12345"}) + response = mr.get_model_version_content( + model="abc", version_id="123", content_id="345" + ) + assert isinstance(response, list) + + request_link.return_value = [ + RestObj({"id": "12345"}), + RestObj({"id": "3456"}), + ] + response = mr.get_model_version_content( + model="abc", version_id="123", content_id="345" + ) + assert isinstance(response, list) + + request_link.assert_any_call( + {"id": 000}, + "content", + headers={"Accept": "text/plain"}, + ) diff --git a/tests/unit/test_score_definitions.py b/tests/unit/test_score_definitions.py index d1210866..1ebdc462 100644 --- a/tests/unit/test_score_definitions.py +++ b/tests/unit/test_score_definitions.py @@ -63,89 +63,190 @@ def test_create_score_definition(): "sasctl._services.cas_management.CASManagement.upload_file" ) as upload_file: with mock.patch( - "sasctl._services.score_definitions.ScoreDefinitions.post" - ) as post: - # Invalid model id test case - get_model.return_value = None - with pytest.raises(HTTPError): - sd.create_score_definition( - score_def_name="test_create_sd", - model="12345", - table_name="test_table", - ) - # Valid model id but invalid table name with no table_file argument test case - get_model_mock = { - "id": "12345", - "projectId": "54321", - "projectVersionId": "67890", - "name": "test_model", - } - get_model.return_value = get_model_mock - get_table.return_value = None - with pytest.raises(HTTPError): - sd.create_score_definition( - score_def_name="test_create_sd", - model="12345", - table_name="test_table", - ) - - # Invalid table name with a table_file argument that doesn't work test case - get_table.return_value = None - upload_file.return_value = None - get_table.return_value = None - with pytest.raises(HTTPError): - sd.create_score_definition( - score_def_name="test_create_sd", - model="12345", - table_name="test_table", - table_file="test_path", - ) - - # Valid table_file argument that successfully creates a table test case - get_table.return_value = None - upload_file.return_value = RestObj - get_table_mock = {"tableName": "test_table"} - get_table.return_value = get_table_mock - response = sd.create_score_definition( - score_def_name="test_create_sd", - model="12345", - table_name="test_table", - table_file="test_path", - ) - assert response - - # Valid table_name argument test case - get_table.return_value = get_table_mock - response = sd.create_score_definition( - score_def_name="test_create_sd", - model="12345", - table_name="test_table", - table_file="test_path", - ) - assert response - - # Checking response with inputVariables in model elements - get_model_mock = { - "id": "12345", - "projectId": "54321", - "projectVersionId": "67890", - "name": "test_model", - "inputVariables": [ - {"name": "first"}, - {"name": "second"}, - {"name": "third"}, - ], - } - get_model.return_value = get_model_mock - get_table.return_value = get_table_mock - response = sd.create_score_definition( - score_def_name="test_create_sd", - model="12345", - table_name="test_table", - ) - assert response - assert post.call_count == 3 - - data = post.call_args - json_data = json.loads(data.kwargs["data"]) - assert json_data["mappings"] != [] + "sasctl._services.model_repository.ModelRepository.get_model_or_version" + ) as get_model_or_version: + with mock.patch( + "sasctl._services.score_definitions.ScoreDefinitions.is_uuid" + ) as is_uuid: + with mock.patch( + "sasctl._services.score_definitions.ScoreDefinitions.post" + ) as post: + + # Invalid model id test case + get_model.return_value = None + with pytest.raises(HTTPError): + sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + ) + # Valid model id but invalid table name with no table_file argument test case + get_model_mock = { + "id": "12345", + "projectId": "54321", + "projectVersionId": "67890", + "name": "test_model", + } + get_model.return_value = get_model_mock + get_table.return_value = None + with pytest.raises(HTTPError): + sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + ) + + # Invalid table name with a table_file argument that doesn't work test case + get_table.return_value = None + upload_file.return_value = None + get_table.return_value = None + with pytest.raises(HTTPError): + sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + table_file="test_path", + ) + + # Valid table_file argument that successfully creates a table test case + get_table.return_value = None + upload_file.return_value = RestObj + get_table_mock = {"tableName": "test_table"} + get_table.return_value = get_table_mock + response = sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + table_file="test_path", + ) + assert response + + # Valid table_name argument test case + get_table.return_value = get_table_mock + response = sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + table_file="test_path", + ) + assert response + + # Checking response with inputVariables in model elements + get_model_mock = { + "id": "12345", + "projectId": "54321", + "projectVersionId": "67890", + "name": "test_model", + "inputVariables": [ + {"name": "first"}, + {"name": "second"}, + {"name": "third"}, + ], + } + get_model.return_value = get_model_mock + get_table.return_value = get_table_mock + response = sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + ) + assert response + assert post.call_count == 3 + + data = post.call_args + json_data = json.loads(data.kwargs["data"]) + assert json_data["mappings"] != [] + assert ( + json_data["objectDescriptor"]["name"] + == "test_model (latest)" + ) + assert ( + json_data["properties"]["versionedModel"] + == "test_model (latest)" + ) + + # Model version dictionary with no model version name + with pytest.raises(ValueError): + response = sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + model_version={}, + ) + + # Model version as a model version name string, not UUID + get_model.return_value = get_model_mock + get_table.return_value = get_table_mock + is_uuid.return_value = False + response = sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + model_version="1.0", + ) + assert response + assert post.call_count == 4 + + data = post.call_args + json_data = json.loads(data.kwargs["data"]) + assert ( + json_data["objectDescriptor"]["name"] + == "test_model (1.0)" + ) + assert ( + json_data["properties"]["versionedModel"] + == "test_model (1.0)" + ) + + # Model version as a dict with modelVersionName key + get_model.return_value = get_model_mock + get_table.return_value = get_table_mock + is_uuid.return_value = False + response = sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + model_version={"modelVersionName": "1.0"}, + ) + assert response + assert post.call_count == 5 + + data = post.call_args + json_data = json.loads(data.kwargs["data"]) + assert ( + json_data["objectDescriptor"]["name"] + == "test_model (1.0)" + ) + assert ( + json_data["properties"]["versionedModel"] + == "test_model (1.0)" + ) + + # Model version as a dictionary with model version name key + get_version_mock = { + "id": "3456", + "modelVersionName": "1.0", + } + get_model.return_value = get_model_mock + get_table.return_value = get_table_mock + is_uuid.return_value = True + get_model_or_version.return_value = get_version_mock + response = sd.create_score_definition( + score_def_name="test_create_sd", + model="12345", + table_name="test_table", + model_version="3456", + ) + assert response + assert post.call_count == 6 + + data = post.call_args + json_data = json.loads(data.kwargs["data"]) + assert ( + json_data["objectDescriptor"]["name"] + == "test_model (1.0)" + ) + assert ( + json_data["properties"]["versionedModel"] + == "test_model (1.0)" + ) diff --git a/tests/unit/test_write_json_files.py b/tests/unit/test_write_json_files.py index b0a3c6a0..3321fc30 100644 --- a/tests/unit/test_write_json_files.py +++ b/tests/unit/test_write_json_files.py @@ -699,8 +699,9 @@ def test_create_requirements_json(change_dir): dtc = dtc.fit(x_train, y_train) with open(tmp_dir / "DecisionTreeClassifier.pickle", "wb") as pkl_file: pickle.dump(dtc, pkl_file) - jf.create_requirements_json(tmp_dir, Path(tmp_dir)) + jf.create_requirements_json(tmp_dir, Path(tmp_dir), True) assert (Path(tmp_dir) / "requirements.json").exists() + assert (Path(tmp_dir) / "requirements.txt").exists() json_dict = jf.create_requirements_json(tmp_dir) expected = [ @@ -709,13 +710,20 @@ def test_create_requirements_json(change_dir): "command": f"pip install numpy=={np.__version__}", }, { - "step": "install sklearn", - "command": f"pip install sklearn=={sk.__version__}", + "step": "install scikit-learn", + "command": f"pip install scikit-learn=={sk.__version__}", }, ] unittest.TestCase.maxDiff = None unittest.TestCase().assertCountEqual(json_dict, expected) + # Verify requirements.txt content + with open(Path(tmp_dir) / "requirements.txt", "r") as file: + requirements_content = [line.strip() for line in file.readlines()] + + assert f"numpy=={np.__version__}" in requirements_content + assert f"scikit-learn=={sk.__version__}" in requirements_content + class TestAssessBiasHelpers(unittest.TestCase): md_1 = pd.DataFrame({"Value": [0], "Base": ["A"], "Compare": ["C"]})