diff --git a/.github/workflows/ibm-ce-tests.yml b/.github/workflows/ibm-ce-tests.yml
new file mode 100644
index 000000000..038793f28
--- /dev/null
+++ b/.github/workflows/ibm-ce-tests.yml
@@ -0,0 +1,91 @@
+name: IBM CE Tests
+
+on:
+ workflow_dispatch:
+ # this allows to run the workflow manually through the github dashboard
+
+env:
+ HAVE_LITHOPS_CONFIG: ${{ secrets.LITHOPS_CONFIG != '' }}
+ LITHOPS_CONFIG_FILE: /tmp/lithops_config.yaml
+
+jobs:
+
+ determine_runnable_test_jobs:
+ runs-on: ubuntu-latest
+ if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
+
+ outputs:
+ localhost: ${{ steps.script.outputs.localhost }}
+ code_engine: ${{ steps.script.outputs.code_engine }}
+
+ steps:
+ - name: Set jobs to run
+ id: script
+ run: |
+ echo "localhost=true" >> $GITHUB_OUTPUT
+ echo "code_engine=false" >> $GITHUB_OUTPUT
+
+
+ localhost_tests:
+ runs-on: ubuntu-latest
+ needs: determine_runnable_test_jobs
+ if: needs.determine_runnable_test_jobs.outputs.localhost == 'true'
+
+ steps:
+ - name: Clone Lithops repository
+ uses: actions/checkout@v4
+
+ - name: Install Python 3.10
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Install Lithops
+ run: |
+ pip3 install -U .[tests]
+
+ - name: Run Lithops tests
+ run: |
+ cd lithops/tests
+ pytest -v --backend localhost --storage localhost
+
+
+ ibm_ce_cos_tests:
+ runs-on: ubuntu-latest
+ needs: determine_runnable_test_jobs
+ if: needs.determine_runnable_test_jobs.outputs.code_engine == 'true'
+
+ steps:
+ - name: Clone Lithops repository
+ uses: actions/checkout@v4
+
+ - name: Install Python 3.10
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Install Lithops
+ run: |
+ pip3 install -U .[tests]
+
+ - name: Install Lithops config
+ id: config
+ run: |
+ echo -n -e "${{ secrets.LITHOPS_CONFIG }}" > $LITHOPS_CONFIG_FILE
+
+ - name: Build new runtime
+ run: |
+ docker login -u ${{ secrets.DOCKER_USER }} -p ${{ secrets.DOCKER_TOKEN }}
+ cd runtime/code_engine
+ lithops runtime build -f Dockerfile.githubci ${{ secrets.DOCKER_USER }}/lithops-ce-gihub-ci:${{ github.run_id }} -b code_engine
+ sed -i '/runtime: lithops-ce/c\ runtime: '${{ secrets.DOCKER_USER }}'/lithops-ce-gihub-ci:'${{ github.run_id }} $LITHOPS_CONFIG_FILE
+
+ - name: Run Lithops tests
+ run: |
+ cd lithops/tests
+ pytest -v --backend code_engine --storage ibm_cos
+
+ - name: Delete Lithops CE runtime
+ if: needs.determine_runnable_test_jobs.outputs.code_engine == 'true'
+ run: |
+ lithops runtime delete ${{ secrets.DOCKER_USER }}/lithops-ce-gihub-ci:${{ github.run_id }} -b code_engine -s ibm_cos
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index 89ce84e6b..000000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,134 +0,0 @@
-name: "on-pull-request"
-
-on:
- pull_request:
- branches:
- - master
-
- workflow_dispatch:
- # this allows to run the workflow manually through the github dashboard
-
-env:
- HAVE_LITHOPS_CONFIG: ${{ secrets.LITHOPS_CONFIG != '' }}
- LITHOPS_CONFIG_FILE: /tmp/lithops_config.yaml
-
-jobs:
-
- determine_runnable_jobs:
- runs-on: ubuntu-latest
- if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
-
- outputs:
- localhost: ${{ steps.script.outputs.localhost }}
- ibm_cf: ${{ steps.script.outputs.ibm_cf }}
- code_engine: ${{ steps.script.outputs.code_engine }}
-
- steps:
- - name: Set jobs to run
- id: script
- run: |
- echo "localhost=true" >> $GITHUB_OUTPUT
- echo "ibm_cf=false" >> $GITHUB_OUTPUT
- echo "code_engine=false" >> $GITHUB_OUTPUT
-
-
- localhost_tests:
- runs-on: ubuntu-latest
- needs: determine_runnable_jobs
- if: needs.determine_runnable_jobs.outputs.localhost == 'true'
-
- steps:
- - name: Clone Lithops repository
- uses: actions/checkout@v3
-
- - name: Install Python 3.10
- uses: actions/setup-python@v4
- with:
- python-version: '3.10'
-
- - name: Install Lithops
- run: |
- pip3 install -U .
-
- - name: Run Lithops tests
- run: |
- lithops test -b localhost -s localhost
-
-
- ibm_ce_cos_tests:
- runs-on: ubuntu-latest
- needs: determine_runnable_jobs
- if: needs.determine_runnable_jobs.outputs.code_engine == 'true'
-
- steps:
- - name: Clone Lithops repository
- uses: actions/checkout@v3
-
- - name: Install Python 3.10
- uses: actions/setup-python@v4
- with:
- python-version: '3.10'
-
- - name: Install Lithops
- run: |
- pip3 install -U .
-
- - name: Install Lithops config
- id: config
- run: |
- echo -n -e "${{ secrets.LITHOPS_CONFIG }}" > $LITHOPS_CONFIG_FILE
-
- - name: Build new runtime
- run: |
- docker login -u ${{ secrets.DOCKER_USER }} -p ${{ secrets.DOCKER_TOKEN }}
- cd runtime/code_engine
- lithops runtime build -f Dockerfile.githubci ${{ secrets.DOCKER_USER }}/lithops-ce-gihub-ci:${{ github.run_id }} -b code_engine
- sed -i '/runtime: lithops-ce/c\ runtime: '${{ secrets.DOCKER_USER }}'/lithops-ce-gihub-ci:'${{ github.run_id }} $LITHOPS_CONFIG_FILE
-
- - name: Run Lithops tests
- run: |
- lithops test -b code_engine -s ibm_cos -k
-
- - name: Delete Lithops CE runtime
- if: needs.determine_runnable_jobs.outputs.code_engine == 'true'
- run: |
- lithops runtime delete ${{ secrets.DOCKER_USER }}/lithops-ce-gihub-ci:${{ github.run_id }} -b code_engine -s ibm_cos
-
-
- ibm_cf_cos_tests:
- runs-on: ubuntu-latest
- needs: determine_runnable_jobs
- if: needs.determine_runnable_jobs.outputs.ibm_cf == 'true'
-
- steps:
- - name: Clone Lithops repository
- uses: actions/checkout@v3
-
- - name: Install Python 3.10
- uses: actions/setup-python@v4
- with:
- python-version: '3.10'
-
- - name: Install Lithops
- run: |
- pip3 install -U .
-
- - name: Install Lithops config
- id: config
- run: |
- echo -n -e "${{ secrets.LITHOPS_CONFIG }}" > $LITHOPS_CONFIG_FILE
-
- - name: Build new runtime
- run: |
- docker login -u ${{ secrets.DOCKER_USER }} -p ${{ secrets.DOCKER_TOKEN }}
- cd runtime/ibm_cf
- lithops runtime build -f Dockerfile.githubci ${{ secrets.DOCKER_USER }}/lithops-cf-gihub-ci:${{ github.run_id }} -b ibm_cf
- sed -i '/runtime: lithops-cf/c\ runtime: '${{ secrets.DOCKER_USER }}'/lithops-cf-gihub-ci:'${{ github.run_id }} $LITHOPS_CONFIG_FILE
-
- - name: Run Lithops tests
- run: |
- lithops test -b ibm_cf -s ibm_cos -k
-
- - name: Delete Lithops CF runtime
- run: |
- lithops runtime delete ${{ secrets.DOCKER_USER }}/lithops-cf-gihub-ci:${{ github.run_id }} -b ibm_cf -s ibm_cos
diff --git a/.github/workflows/python-linting.yml b/.github/workflows/python-linting.yml
new file mode 100644
index 000000000..0e953752b
--- /dev/null
+++ b/.github/workflows/python-linting.yml
@@ -0,0 +1,39 @@
+name: Python Linting
+
+on:
+ pull_request:
+ branches:
+ - master
+ paths:
+ - 'setup.py'
+ - 'lithops/**'
+
+ workflow_dispatch:
+ # this allows to run the workflow manually through the github dashboard
+
+jobs:
+
+ flake8:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Clone Lithops repository
+ uses: actions/checkout@v4
+
+ - name: Install Python 3.10
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Install dependencies
+ run: |
+ python3 -m pip install --upgrade pip
+ pip3 install -U flake8
+
+ - name: Install Lithops
+ run: |
+ pip3 install -U .
+
+ - name: Lint with flake8
+ run: |
+ flake8 lithops --count --max-line-length=180 --statistics --ignore W605,W503
diff --git a/.github/workflows/tests-all-os.yml b/.github/workflows/tests-all-os.yml
new file mode 100644
index 000000000..7459380f8
--- /dev/null
+++ b/.github/workflows/tests-all-os.yml
@@ -0,0 +1,94 @@
+name: Tests all OS
+
+on:
+ workflow_dispatch:
+
+jobs:
+ localhost_tests:
+ runs-on: ${{ matrix.os }}
+ timeout-minutes: 6
+ env:
+ OBJC_DISABLE_INITIALIZE_FORK_SAFETY: YES
+
+ strategy:
+ fail-fast: False
+ matrix:
+ include:
+ # Linux
+ - os: ubuntu-latest
+ python-version: "3.10"
+ - os: ubuntu-latest
+ python-version: "3.11"
+ - os: ubuntu-latest
+ python-version: "3.12"
+ - os: ubuntu-latest
+ python-version: "3.13"
+ - os: ubuntu-22.04
+ python-version: "3.10"
+ - os: ubuntu-22.04
+ python-version: "3.11"
+
+ # macOS
+ - os: macos-latest
+ python-version: "3.10"
+ - os: macos-latest
+ python-version: "3.11"
+ - os: macos-latest
+ python-version: "3.12"
+ - os: macos-latest
+ python-version: "3.13"
+ - os: macos-15
+ python-version: "3.10"
+ - os: macos-15
+ python-version: "3.11"
+
+ # Windows
+ - os: windows-latest
+ python-version: "3.10"
+ - os: windows-latest
+ python-version: "3.11"
+ - os: windows-latest
+ python-version: "3.12"
+ - os: windows-latest
+ python-version: "3.13"
+
+ steps:
+ - name: Clone Lithops repository
+ uses: actions/checkout@v4
+
+ - name: Install Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Lithops
+ run: |
+ pip3 install -U .[tests]
+
+ - name: Create Lithops config file
+ run: |
+ mkdir -p $HOME/.lithops
+ echo "lithops:" >> $HOME/.lithops/config
+ echo " monitoring_interval: 0.1" >> $HOME/.lithops/config
+ echo " log_level: DEBUG" >> $HOME/.lithops/config
+ echo " include_modules: None" >> $HOME/.lithops/config
+ echo "localhost:" >> $HOME/.lithops/config
+ echo " version: 2" >> $HOME/.lithops/config
+
+ - name: Run Lithops tests
+ run: |
+ cd lithops/tests
+ # pytest -v --durations=0 --backend localhost --storage localhost
+ pytest -v --durations=0 -o log_cli=true --log-cli-level=DEBUG --backend localhost --storage localhost
+
+ - name: Display last 500 lines of the Lithops log file
+ if: cancelled() || failure()
+ shell: bash
+ run: |
+ if [ "$RUNNER_OS" == "Linux" ]; then
+ tail -n 500 /tmp/lithops-runner/localhost-runner.log
+ elif [ "$RUNNER_OS" == "macOS" ]; then
+ tail -n 500 $TMPDIR/lithops-runner/localhost-runner.log
+ elif [ "$RUNNER_OS" == "Windows" ]; then
+ tail -n 500 "C:\Users\RUNNER~1\AppData\Local\Temp\lithops-root\localhost-runner.log"
+ fi
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 000000000..315f74ba1
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,41 @@
+name: Tests
+
+on:
+ pull_request:
+ branches:
+ - master
+ paths:
+ - 'setup.py'
+ - 'lithops/**'
+
+ workflow_dispatch:
+ # this allows to run the workflow manually through the github dashboard
+
+jobs:
+
+ localhost_tests:
+ runs-on: ubuntu-22.04
+ timeout-minutes: 5
+
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+
+ steps:
+ - name: Clone Lithops repository
+ uses: actions/checkout@v4
+
+ - name: Install Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Lithops
+ run: |
+ pip3 install -U .[tests]
+
+ - name: Run Lithops tests
+ run: |
+ cd lithops/tests
+ pytest -v --backend localhost --storage localhost
diff --git a/.gitignore b/.gitignore
index 252671700..1817e5ee3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,10 @@ dist/
plots/
*.egg-info
lithops_*.zip
+*.log
+*.txt
+*.csv
+*.coverage*
# Virtual environments
.env
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 484adc067..de44092d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,224 @@
# Changelog
-## [v3.0.1.dev0]
+## [v3.6.3.dev0]
+
+### Added
+-
+
+### Changed
+-
+
+### Fixed
+-
+
+
+## [v3.6.2]
+
+### Fixed
+- [Localhost] Fix shutil.Error caused by existing __pycache__ directory when copying files in the runner
+- [Executor] Make retry count configurable in RetryingFunctionExecutor
+
+
+## [v3.6.1]
+
+### Fixed
+- [GCP Functions] Poll long-running operations for GCP Function deployment
+- [GCP Functions] Retry function creation on failure to improve reliability
+- [IBM CE] Sanitize user_key in IBM CE to be RFC 1233 compliant
+- [CLI] Fix storage list error
+- [K8s] Fixed bug with first execution of K8s and Singularity
+- [Core] Prevent job monitor from stopping abruptly on iteration error causing hanging jobs
+
+
+## [v3.6.0]
+
+### Added
+- [Core] Added support for python 3.13
+- [AWS EC2] Add support for configuring EBS volumes in EC2 lithops workers
+- [AWS EC2] Add support for specifying CIDR block in EC2 public subnet
+
+### Fixed
+- [Standalone] Fixed an issue causing workers to stop prematurely in Consume mode
+- [Invoker] Reduced the number of threads used in the async FaaS Invoker
+- [Monitoring] Fixed token bucket issue that prevented generating the correct number of tokens
+- [Code Engine] Allow to build the default runtime with Python 3.13
+- [Monitoring] Fixed race condition in RMQ Monitor
+- [AWS S3] Fixed InvalidLocationConstraint error in AWS S3 handled
+
+
+## [v3.5.1]
+
+### Fixed
+- [Core] Fix issue in "if self._call_output in future.py" for objects with ambiguous truth values
+- [Standalone] Consume execution mode failing to run the installation script thus failing all the jobs
+- [Azure VMs] Consume execution mode failing to execute jobs
+
+
+## [v3.5.0]
+
+### Added
+- [Singularity] Added new singularity compute backend
+- [Oracle Functions] Added support for python 3.11
+- [k8s] Added 'master_timeout' parameter to k8s backend
+- [AWS Lambda] Added user_tags to the runtime deployment
+
+### Fixed
+- [Storage] Fixed "KeyError: 'monitoring_interval'" error when instantiating Storage() class
+- [k8s] Fixed bug between threads when there are multiple executions
+- [OpenWhisk] Fixed issue in the list_runtimes method
+- [OpenWhisk] Fixed runtime name formatting for self hosted container registries
+
+
+## [v3.4.1]
+
+### Added
+- [Localhost] Added error capturing and logging for job/task process failures
+
+### Fixed
+- [Worker] Fixed potential issue that can appear during 'func_obj' loading from cache
+
+
+## [v3.4.0]
+
+### Added
+- [CLI] Allow to pass a name in the "lithops runtime list" command
+- [Ceph] Added extra region parameter to Ceph backend
+
+### Changed
+- [Setup] Moved IBM and AWS deps to lithops[ibm] and lithops[aws] extra
+- [Setup] Moved kubernetes and knative deps to lithops[kubernetes] and lithops[knative] extra
+- [Setup] Moved minio, ceph and redis deps to lithops[minio], lithops[ceph] and lithops[redis] extra
+- [Setup] Moved matplotlib, seaborn, numpy and pandas dependencies to lithops[plotting] extra
+- [Setup] Removed unused 'lxml', 'docker' and 'python-dateutil' packages from the setup.py
+- [Core] Detached progress bar from INFO logs
+- [Future] Exposed 'wait_dur_sec' and 'retries' in future.wait() and future.get_result() methods
+- [Localhost] Upgraded localhost backend v2 and set it as the default localhost backend
+- [Localhost] Set monitoring_interval to 0.1 in the localhost storage backend
+- [AWS Batch] Updated CPU and Memory resource limits
+
+### Fixed
+- [AWS Lambda] Fixed wrong AWS Lambda delete runtime_name match semantics
+- [Worker] Fixed potential issue that can appear during 'func_obj' loading from cache
+- [Monitor] Fixed potential 'keyerror' exceptions
+- [Swift] Fixed OpenStack Swift parameters and authentication by adding domain information
+- [AWS Batch] Fixed missing ecsTaskJobRole
+
+
+## [v3.3.0]
+
+### Added
+- [Core] Added a mechanism to automatically retry failed tasks
+- [Azure Containers] Automatically login to the container registry if the credentials are in the config
+
+### Changed
+- [AWS] Eliminated the need for access and secret keys in the configuration
+- [Tests] Moved tests from unittest to pytest
+
+### Fixed
+- [AWS Lambda] Fixed runtime deletion with "lithops runtime delete"
+- [Localhost] Fixed issue with the job manager
+- [Serializer] Fix serialization bug which triggers side effects on dynamic attributes
+- [Worker] Removed "distutils" lib imports as it is deprecated in python 3.12
+- [Serverless] Allow to build container runtimes with the MacBook Mx chip
+- [K8s] Fixed task granularity calculation and memory units issue (RabbitMQ version)
+- [AWS Lambda] Fixed AWS Lambda function Name for SSO users
+- [AWS] Fixed generated user-key for SSO users
+- [Azure VMs] Fixed worker creation and communication
+
+
+## [v3.2.0]
+
+### Added
+- [Lithops] Added support for Python 3.12
+- [CLI] Added "--include" parameter in "lithops image build" to allow users upload local files to the VM image
+
+### Changed
+- [Standalone] Use redis in the master VM to store all the relevant data about jobs and workers
+- [Standalone] Use redis to store the work queues
+- [Standalone] Improved resiliency and worker granularity
+- [CLI] Show the timestamp in the local timezone format on "lithops job list"
+- [CLI] Show worker creation timestamp and time-to-dismantle on "lithops worker list"
+
+### Fixed
+- [SSH Cli] Fixed minor error with the "err" variable
+- [Cli] Fixed job status on "lithops job list" for standalone backends
+- [Standalone] Fixed issue in the "lithops image build" that appears when the vpc is already created
+- [Future] Fixed issue with missing 'worker_end_tstamp' variable
+
+
+## [v3.1.2]
+
+### Added
+- [Plots] Allow to set the figure size of the plots
+- [Stats] Added new CPU, Memory and Network statistics in the function results
+- [IBM VPC] Added a new parameter to enable/disable resource existence check in the platform
+
+### Changed
+- [Config] Renamed 'customized_runtime' to 'runtime_include_function'
+- [IBM VPC] Increased the total number of available IPs in the private network
+- [Standalone] Do not stop the VM immediately after a job in the Consume mode
+
+### Fixed
+- [Standalone] Fixed issue that appears when the invocation payload is too big
+- [Invoker] Fixed "runtime_include_function" function/modules path
+- [AWS EC2] Reset the public IP address of the master VM on stop
+
+
+## [v3.1.1]
+
+### Added
+- [k8s] Added a new way of invoking functions using a RabbitMQ work queue
+- [IBM VPC] Added "zone" config parameter
+- [IBM Code Engine] Get and print an error message in case of container execution failure
+
+### Changed
+- [OpenWhisk] Updated default runtimes
+
+### Fixed
+- [Standalone] Fixed issue with a wrong value of "chunksize"
+- [IBM Code Engine] Fixed missing parameter on clean
+- [Executor] Fixed potential deadlock in wait() and get_result() when an exception is produced in a function activation
+
+
+## [v3.1.0]
+
+### Added
+- [Cli] Added new 'lithops image delete' command for standalone backends
+- [Cli] Added new 'lithops job list' command for standalone backends
+- [Cli] Added new 'lithops worker list' command for standalone backends
+- [AWS EC2] Added delete_image() method for deleting VM images through the cli
+- [IBM VPC] Added delete_image() method for deleting VM images through the cli
+- [localhost] New localhost backend v2 to maximize resource utilization when multiple maps are executed from the same FunctionExecutor
+- [Standalone] Automatically retrieve the CPU_COUNT from the VM in case worker_processes is not set in config
+- [Standalone] Keep track of the worker and job status
+- [Storage] Include "Config" parameter to download_file() and upload_file() methods for boto3 related backends
+- [Cli] Include 'worker name' in the 'lithops runtime list' cmd
+- [AWS Lambda] Created 'namespace' config key to virtually separate worker deployments
+
+### Changed
+- [Standalone] Changed default mode of execution from 'consume' to 'reuse'
+- [Joblib] Updated the joblib backend to make it compatible with new versions of joblib
+- [Joblib] Spawn only one function when 'prefer' is set to 'threads'
+- [AWS EC2] Changed default image name from "lithops-worker-default" to "lithops-ubuntu-jammy-22.04-amd64-server"
+- [IBM VPC] Changed default image name from "lithops-worker-default" to "lithops-ubuntu-22-04-3-minimal-amd64-1"
+- [Serializer] Improve serializer performance when include_modules is set in config
+- [SSH Client] Do not raise LithopsValidationError on Authentication failed
+- [AWS Lambda] Renamed function name to "lithops-worker-xxxx"
+
+### Fixed
+- [Job] Fixed max data size in the invocation payload
+- [Multiprocessing] Fixed cpu_count
+- [Standalone] Start new workers when the VM instance type changes (in reuse mode)
+- [GCP Functions] Fixed issue with "function_url" variable
+- [Standalone] Fixed multiple runtime usage at the same time in master VM
+- [localhost] Get the correct docker/podman path for jobs that run in a container
+- [k8s] Limit the size of the "user" label as the maximum allowed is 63 chars
+- [Joblib] Fix shared objects utility when multiple maps run from the same executor
+- [Azure VMs] Fix wrong exception when trying to connect to the master VM for the first time
+- [Partitioner] Fix partitioner
+
+
+## [v3.0.1]
### New
- [OCI Functions] Added new 'Oracle Cloud Functions' serverless backend
@@ -8,7 +226,7 @@
### Added
- [Kubernetes] Added Redis server in master pod for shared data between workers
-- [Kubernetes] Allow to set "conntext" and "namespace" in lithops config
+- [Kubernetes] Allow to set "context" and "namespace" in lithops config
### Changed
- [CodeEngine] Create the CE project only when necessary instead of creating it always
@@ -26,15 +244,8 @@
- [Azure Virtual Machines] Added new 'Azure Virtual Machines' standalone backend
### Added
-- [AWS Lambda] Added support for python 3.10 runtimes
-- [AWS Lambda] Added support for python 3.11 runtimes
-- [Azure Functions] Added support for python 3.10 runtimes
-- [Azure Functions] Added support for python 3.11 runtimes
-- [Google Cloud Functions] Added support for python 3.11 runtimes
-- [IBM CF] Added support for python 3.11 runtimes
-- [Openwhisk] Added support for python 3.11 runtimes
-- [Aliyun Functions] Added support for python 3.10 runtimes
-- [Executor] Allow to set all the compute backend params programatically in the FunctionExecutor()
+- [Serverless] Added support for python 3.10 and 3.11 runtimes
+- [Executor] Allow to set all the compute backend params programmatically in the FunctionExecutor()
- [AWS EC2] Allow to automatically create the VPC and all the necessary resources
- [IBM VPC & AWS EC2] General fixes and Improvements
- [Executor] Allow to pass the config file location in the FunctionExecutor()
@@ -44,10 +255,10 @@
- [Cli] Added new 'lithops image build' command for standalone backends
- [Cli] Added new 'lithops image list' command for standalone backends
- [IBM VPC] Added build_image() method for automatically building VM images
-- [IBM VPC] Added list_image() method for listing the availabe VM images
+- [IBM VPC] Added list_image() method for listing the available VM images
- [AWS EC2] Added build_image() method for automatically building VM images
-- [AWS EC2] Added list_image() method for listing the availabe VM images
-- [Azure VMS] Added list_image() method for listing the availabe VM images
+- [AWS EC2] Added list_image() method for listing the available VM images
+- [Azure VMS] Added list_image() method for listing the available VM images
- [IBM CF] Automatically create a CF namespace if not provided in config
- [IBM VPC] Added Madrid (Spain) region
- [Code Engine] Automatically create a new project if not provided in config
@@ -77,6 +288,7 @@
- [Multiprocessing] Check redis connection before starting to submit jobs
- [Redis] Fixed redis backend exception regarding storage_bucket
+
## [v2.9.0]
### Added
@@ -142,7 +354,7 @@
- [Code Engine] Add CE conflict exception to retriables
- [Core] Show logs from module_dependency.py
- [GCP Functions] Fix runtime_build command
-- [Infinispan] Fix infinispan storage backend
+- [Infinispan] Fix Infinispan storage backend
- [Core] Detect a Class if passed as a lithops input function
@@ -221,7 +433,7 @@
- [Standalone] Fix cloudinit initialization script
- [Future] Fix host_status_query_count stat
- [Google Cloud Run] Fixed wrong variable name 'runtime_cpus'
-- [Google Cloud] Changed docs for Google cloud backend refering to id instead of name
+- [Google Cloud] Changed docs for Google cloud backend regarding to id instead of name
## [v2.5.8]
@@ -238,7 +450,7 @@
### Added
- [AWS Batch] Added AWS Batch backend
- [Standalone] Allow to start workers using a public key instead of using a password
-- [Standalone] Added diferent levels of worker verification
+- [Standalone] Added different levels of worker verification
- [Infinispan] Added new Infinispan Hot Rod storage backend
### Fixed
@@ -256,7 +468,7 @@
### Added
- [AWS_EC2] Added AWS EC2 Standalone backend
- [AWS_EC2] Allow to start workers using Spot instances in AWS EC2 Standalone backend
-- [Standalone] Added the logic to create the missing deleta of workers in reuse mode
+- [Standalone] Added the logic to create the missing delta of workers in reuse mode
- [Standalone] Cancel running job tasks on ctrl-c
- [Standalone] New logic to verify that the master VM is correctly setup
- [Standalone] Added new command "lithops attach" that allows to create live ssh connections to the master VM
@@ -269,7 +481,7 @@
- [Standalone] Fixed VM initial installation script
- [Standalone] Fixed get_workers method on master
- [Standalone] Deleted unnecessary extra worker
-- [Standalone] Ensure all workers are proppery started on reuse mode
+- [Standalone] Ensure all workers are properly started on reuse mode
- [Localhost] Fixed storage delete_objects method that was deleting the entire folder of a file
- [IBM VPC] General fixes in IBM VPC backend
@@ -277,24 +489,24 @@
## [v2.5.5]
### Added
-- [CLI] Allow to pass all availbe 'docker' parameter to 'lithops runtime build' command
+- [CLI] Allow to pass all available 'docker' parameter to 'lithops runtime build' command
- [Multiprocessing] Add example file with different argument passing examples for Pool and Process
### Fixed
-- [Localhost] Fixed minnor issue when deleting completed jobs
+- [Localhost] Fixed minor issue when deleting completed jobs
- [Multiprocessing] Fixed args mismatch error when passing list of tuples to Pool.map
-- [Standalone] Fixed cloud-init script that ocasionaly fails to set ssh credentials
+- [Standalone] Fixed cloud-init script that occasionally fails to set ssh credentials
## [v2.5.4]
-### Fixes
+### Fixed
- [Standalone] Avoid deleting the master VM on consume mode
## [v2.5.3]
-### Fixes
+### Fixed
- [Core] Fixed lithops.map_reduce() jobs. Sometimes jobs where not finishing
- [Core] Spawn lithops.cleaner only once in the same execution instance
- [Tests] Fix when running 'lithops verify' command
@@ -309,12 +521,12 @@
- [Core] Allow to spawn the reduce function in map_reduce() after a configurable percentage of completed map activations
### Changed
-- [Config] 'max_workers' and 'worker_processess' keys must be set at backend level in config
+- [Config] 'max_workers' and 'worker_processes' keys must be set at backend level in config
- [Config] 'remote_invoker' key must be set at backend level in config
- [Config] 'customized_runtime' key must be set at lithops level in config
- [Config] 'serverless' section in config is no longer required
-### Fixes
+### Fixed
- [CodeEngine] Fixed 'max_workers' parameter to limit the number of max workers per map invocation
- [IBM CF] Create the runtime if not deployed when invoked
- [Localhost] Fix localhost paths for windows hosts
@@ -329,7 +541,7 @@
- [Localhost] Start container with user's uid:gid
- [Localhost] Extended default execution timeout to 3600 seconds
-### Fixes
+### Fixed
- [Standalone] Fixed standalone execution on consume mode
- [Aliyun FC] Fixed Aliyun Function compute backend
- [Core] Fixed 'lithops runtime build' command when the backend is not configured in config
@@ -340,7 +552,7 @@
### Added
- [CLI] Add new command in cli to list deployed runtimes
- [Standalone] Add reuse mode that allows to reuse the same VMs for all the maps
-- [Config] alow to configure worker_processes parameter in serverless and standalone sections
+- [Config] Allow to configure worker_processes parameter in serverless and standalone sections
- [Localhost] Prevent multiple jobs in the same executor to run at the same time
- [Standalone] Prevent multiple jobs submitted to the same master VM to run at the same time
- [CE] Added COS Direct endpoints for free bandwidth from/to CodeEngine
@@ -354,7 +566,7 @@
- [AWS Lambda] Add support for Python3.9
- [Standalone] ssh VM password is now a 37 chars random and dynamic password (for create and resue modes)
-### Fixes
+### Fixed
- [CE] Create a new token when it expires after 20 minutes when using the same FunctionExecutor
- [CE] Prevent exception when detecting the docker username in k8s and CE backends
- [Core] Fix minor issue in jobrunner
@@ -363,7 +575,7 @@
## [v2.4.1]
-### Fixes
+### Fixed
- [IBM VPC] Fixed a data inconsistency on consume mode
## [v2.4.0]
@@ -380,8 +592,9 @@
### Changed
- [Core] Improved performance and efficiency of the lithops cleaner background process
- [AWS Lambda] Use layer from Klayers API for pre-compiled Amazon Linux numpy binaries
+- [Core] Moved invoke_pool_threads param from map and map_reduce calls. Now it must be set at backend level in config
-### Fixes
+### Fixed
- [Localhost] Fixed error when processing localhost objects
- [Localhost] Allow to create a localhost storage instance when a config file exists with a cloud configuration
- [Core] Fixed an unusual inconsistency in configuration between 'backend' and 'mode' parameters
@@ -391,9 +604,6 @@
- [Core] Fixed 'lithops storage list' CLI when a bucket is empty
- [Standalone] Fixed execution
-### Deleted
-- [Core] Deleted invoke_pool_threads param from map and map_reduce calls. Now it must be set at backend level in config
-
## [v2.3.5]
@@ -408,7 +618,7 @@
- [Core] Add 'key' and 'bucket' attrs in localhost partitioner for compatibility with OS
- [Serverless] runtime, runtime_memory and runtime_timeout can only be set at backend level
-### Fixes
+### Fixed
- [Standalone] Fix execution
- [Core] Avoid loading the config file twice
@@ -431,7 +641,7 @@
- [multiprocessing] Improved nanomsg Pipe implementation
- [joblib] Optimized joblib backend (concurrent args data upload/download)
-### Fixes
+### Fixed
- [Core] Fixed module analyzer
- [Core] Clear only present jobs instead of all after wait() or get_result()
- [multiprocessing] Fix put/get slice to/from mp.Array or mp.RawArray
@@ -439,7 +649,7 @@
## [v2.3.3]
-### Fixes
+### Fixed
- [Core] Allow to execute class methods as lithops function
@@ -453,7 +663,7 @@
- [Core] New monitoring system
- [Core] Deleted strong dependency to pika==0.13.1
-### Fixes
+### Fixed
- [Partitioner] Fixed partitioner when obj url contains more than one subfolder
- [Cli] Fixed serverless runtime lifecycle methods
@@ -484,7 +694,7 @@
- [Core] Improved worker when chunksize is set to values > 1
- [Core] Check lithops version mismatch in host instead of in worker
-### Fixes
+### Fixed
- [Core] Overwrite the runtime set in config with the runtime set in the FunctionExecutor
- [Cli] Fixed --config param in lithops cli
- [Standalone] Fixed internal executions
@@ -521,12 +731,18 @@
- [IBM VPC] Improved IBM VPC backend
- [AWS Lambda] Lambda layer modules update
-### Fixes
+### Fixed
- [Multiprocessing] Fix issues related to Pipes and Queues
- [Multiprocessing] Fix multiprocessing.context methods
- [CodeEngine/knative] Fix getting docker username in MAC OS hosts
+## [v2.2.16]
+
+### Fixed
+- [Code Engine] Fixing code engine docker image
+
+
## [v2.2.15]
### Added
@@ -630,7 +846,7 @@
### Added
- [Core] Add joblib backend for scikit-learn
-- [Cli] Add more config paramters in lithops cli
+- [Cli] Add more config parameters in lithops cli
- [IBM COS] Add 'region' config param
- [Knative] Add 'min_instances', 'max_instances' and 'concurrency' config params
@@ -750,18 +966,24 @@
- [Core] IBM VPC service client lib
- [Docker] Docker backend compatible with IBM VPC VM
-### Fixed
-- [Ceph] Fix in ceph endpoint
-
### Changed
- [Docker] Improved Docker executor
+### Fixed
+- [Ceph] Fix in Ceph endpoint
+
+
## [v1.7.2]
### Added
- [GCR] Added Google Cloud Run Backend
+
+### Changed
+- [Core] Improved Storage abstraction
+- [Core] InternalStorage uses storage abstraction
+
### Fixed
- [Core] Fixed invoker token bucket when quota limit is reached
- [Core] Fixed logging
@@ -770,11 +992,6 @@
- [Localhost] Fixed invocations ability to launch subprocesses
- [Docker] Fixed docker running as user and not root
-### Changed
-- [Core] Improved Storage abstraction
-- [Core] InternalStorage uses storage abstraction
-
-
## [v1.7.0]
### Added
@@ -868,6 +1085,7 @@
- [Core] Fixed issue with windows hosts
- [Core] Some other Internal fixes
+
## [v1.4.2]
### Added
@@ -1405,7 +1623,7 @@
- Moved some info prints to debug
- improved remote function invocation mechanism
-### Fixes
+### Fixed
- Fixing flask security issues CVE-2018-1000656
- Fixed minor issue when futures is not a list
- Fixed default config exception. API KEY is not mandatory.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0eaaab558..bfd81394a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -16,23 +16,39 @@ To contribute a patch:
1. Break your work into small, single-purpose patches if possible. It's much
harder to merge in a large change with a lot of disjoint features.
2. Submit the patch as a GitHub pull request against the master branch.
-3. Make sure that your code passes the unit tests.
-4. Make sure that your code passes the linter.
-5. Add new unit tests for your code.
+3. Make sure that your code passes the tests.
+4. Make sure that your code passes the linter. Install `flake8` with `pip3 install flake8` and run the next command until you don't see any linitng error:
+ ```bash
+ flake8 lithops --count --max-line-length=180 --statistics --ignore W605,W503
+ ```
+5. Add new tests for your code.
-Unit testing
-------------
+Testing
+-------
-To test that all is working as expected, run either:
+To test that all is working as expected, you must install `pytest`, navigate to the tests folder `lithops/tests/`, and execute:
+```bash
+pytest -v
+```
+If you made changes to a specific backend, please run tests on that backend.
+For example, if you made changes to the AWS Lambda backend, execute the tests with:
```bash
-$ lithops test
+pytest -v --backend aws_lambda --storage aws_s3
```
-or
+You can list all the available tests using:
```bash
-$ python3 -m lithops.tests.tests_main
+pytest --collect-only
```
-Please follow the guidelines in [docs/testing.md](docs/source/testing.rst) for more details.
\ No newline at end of file
+To run a specific test or group of tests, use the `-k` parameter, for example:
+```bash
+pytest -v --backend localhost --storage localhost -k test_map
+```
+
+To view all the Lithops logs during the tests, and in DEBUG mode, execute:
+```bash
+pytest -o log_cli=true --log-cli-level=DEBUG --backend localhost --storage localhost
+```
diff --git a/README.md b/README.md
index 97d5e63b0..d48fc74a7 100644
--- a/README.md
+++ b/README.md
@@ -5,22 +5,12 @@
-Lithops is a Python multi-cloud distributed computing framework. It allows you to run unmodified local python code at massive scale in the main
-serverless computing platforms. Lithops delivers the user’s code into the cloud without requiring knowledge of how it is deployed and run. Moreover, its multicloud-agnostic architecture ensures portability across cloud providers.
-
-Lithops is specially suited for highly-parallel programs with little or no need for communication between processes, but it also supports parallel applications that need to share state among processes. Examples of applications that run with Lithops include Monte Carlo simulations, deep learning and machine learning processes, metabolomics computations, and geospatial analytics, to name a few.
+Lithops is a Python multi-cloud distributed computing framework that lets you run unmodified Python code at massive scale across cloud, HPC, and on-premise platforms. It supports major cloud providers and Kubernetes platforms, running your code transparently without requiring you to manage deployment or infrastructure.
+Lithops is ideal for highly parallel workloads—such as Monte Carlo simulations, machine learning, metabolomics, or geospatial analytics—and lets you tailor execution to your priorities: you can optimize for performance using AWS Lambda to launch hundreds of functions in milliseconds, or reduce costs by running the same code on AWS Batch with Spot Instances.
## Installation
@@ -37,7 +27,7 @@ Lithops is specially suited for highly-parallel programs with little or no need
```
## Configuration
-Lithops provides an extensible backend architecture (compute, storage) that is designed to work with different Cloud providers and on-premise backends. In this sense, you can code in python and run it unmodified in IBM Cloud, AWS, Azure, Google Cloud, Aliyun and Kubernetes or OpenShift.
+Lithops provides an extensible backend architecture (compute, storage) designed to work with various cloud providers and on-premise platforms. You can write your code in Python and run it unmodified across major cloud providers and Kubernetes environments.
[Follow these instructions to configure your compute and storage backends](config/)
@@ -71,12 +61,12 @@ Lithops is shipped with 2 different high-level Compute APIs, and 2 high-level St
```python
from lithops import FunctionExecutor
-def hello(name):
- return f'Hello {name}!'
+def double(i):
+ return i * 2
with FunctionExecutor() as fexec:
- fut = fexec.call_async(hello, 'World')
- print(fut.result())
+ f = fexec.map(double, [1, 2, 3, 4])
+ print(f.result())
```
```python
-from lithops.storage.cloud_proxy import os
+from lithops.storage.cloud_proxy import os
if __name__ == "__main__":
filepath = 'bar/foo.txt'
@@ -146,47 +136,36 @@ if __name__ == "__main__":
You can find more usage examples in the [examples](/examples) folder.
-## Execution Modes
-
-Lithops is shipped with 3 different modes of execution. The execution mode allows you to decide where and how the functions are executed.
-
-* [Localhost Mode](docs/source/execution_modes.rst#localhost-mode)
-
- This mode allows you to execute functions on the local machine using processes, providing a convenient and efficient way to leverage Lithops' distributed computing capabilities without relying on cloud resources. This mode is particularly useful for development, testing, and debugging purposes. This is the default mode of execution if no configuration is provided.
-
-* [Serverless Mode](docs/source/execution_modes.rst#serverless-mode)
-
- This mode allows you to efficiently execute functions on popular serverless compute services, leveraging the scalability, isolation, and automatic resource provisioning provided by these platforms. With serverless mode, you can easily parallelize task execution, harness the elastic nature of serverless environments, and simplify the development and deployment of scalable data processing workloads and parallel applications.
-
-* [Standalone Mode](docs/source/execution_modes.rst#standalone-mode)
-
- This mode provides the capability to execute functions on one or multiple virtual machines (VMs) simultaneously, in a serverless-like fashion, without requiring manual provisioning as everything is automatically created. This mode can be used in a private cluster or in the cloud, where functions within each VM are executed using parallel processes, similar to the functionality offered in localhost mode.
-
-
## Documentation
-For documentation on using Lithops, see [latest release documentation](https://lithops-cloud.github.io/docs/) or [current github docs](docs/user_guide.md).
+For documentation on using Lithops, see [latest release documentation](https://lithops-cloud.github.io/docs/)
If you are interested in contributing, see [CONTRIBUTING.md](./CONTRIBUTING.md).
## Additional resources
### Blogs and Talks
+
+* [How to run Lithops over EC2 VMs using the new K8s backend](https://danielalecoll.medium.com/how-to-run-lithops-over-ec2-vms-using-the-new-k8s-backend-4b0a4377c4e9)
* [Simplify the developer experience with OpenShift for Big Data processing by using Lithops framework](https://medium.com/@gvernik/simplify-the-developer-experience-with-openshift-for-big-data-processing-by-using-lithops-framework-d62a795b5e1c)
* [Speed-up your Python applications using Lithops and Serverless Cloud resources](https://itnext.io/speed-up-your-python-applications-using-lithops-and-serverless-cloud-resources-a64beb008bb5)
-* [Serverless Without Constraints](https://www.ibm.com/cloud/blog/serverless-without-constraints)
* [Lithops, a Multi-cloud Serverless Programming Framework](https://itnext.io/lithops-a-multi-cloud-serverless-programming-framework-fd97f0d5e9e4)
* [CNCF Webinar - Toward Hybrid Cloud Serverless Transparency with Lithops Framework](https://www.youtube.com/watch?v=-uS-wi8CxBo)
+* [Your easy move to serverless computing and radically simplified data processing](https://www.slideshare.net/gvernik/your-easy-move-to-serverless-computing-and-radically-simplified-data-processing-238929020) Strata Data Conference, NY 2019. See video of Lithops usage [here](https://www.youtube.com/watch?v=EYa95KyYEtg&list=PLpR7f3Www9KCjYisaG7AMaR0C2GqLUh2G&index=3&t=0s) and the example of Monte Carlo [here](https://www.youtube.com/watch?v=vF5HI2q5VKw&list=PLpR7f3Www9KCjYisaG7AMaR0C2GqLUh2G&index=2&t=0s)
+
+
### Papers
-
+* [Serverful Functions: Leveraging Servers in Complex Serverless Workflows](https://dl.acm.org/doi/10.1145/3700824.3701095) - ACM Middleware Industrial Track 2024
+* [Transparent serverless execution of Python multiprocessing applications](https://dl.acm.org/doi/10.1016/j.future.2022.10.038) - Elsevier Future Generation Computer Systems 2023
* [Outsourcing Data Processing Jobs with Lithops](https://ieeexplore.ieee.org/document/9619947) - IEEE Transactions on Cloud Computing 2022
* [Towards Multicloud Access Transparency in Serverless Computing](https://www.computer.org/csdl/magazine/so/5555/01/09218932/1nMMkpZ8Ko8) - IEEE Software 2021
* [Primula: a Practical Shuffle/Sort Operator for Serverless Computing](https://dl.acm.org/doi/10.1145/3429357.3430522) - ACM/IFIP International Middleware Conference 2020. [See presentation here](https://www.youtube.com/watch?v=v698iu5YfWM)
@@ -195,4 +174,4 @@ If you are interested in contributing, see [CONTRIBUTING.md](./CONTRIBUTING.md).
# Acknowledgements
-This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 825184.
+This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 825184 (CloudButton).
diff --git a/config/README.md b/config/README.md
index e94d2576a..dbf4fd1d7 100644
--- a/config/README.md
+++ b/config/README.md
@@ -46,29 +46,30 @@ Storage Backends
@@ -112,41 +113,50 @@ if __name__ == '__main__':
```
### Providing configuration in runtime
-Example of providing configuration keys for IBM Cloud Functions and IBM Cloud Object Storage
+Example of providing configuration keys for IBM Code Engine and IBM Cloud Object Storage
```python
import lithops
-config = {'lithops': {'backend': 'ibm_cf', 'storage': 'ibm_cos'},
- 'ibm': {'region': 'REGION',
- 'iam_api_key': 'IAM_API_KEY',
- 'resource_group_id': 'RESOURCE_GROUP_ID'},
- 'ibm_cos': {'storage_bucket': 'STORAGE_BUCKET'}}
-
-def hello_world(name):
- return f'Hello {name}!'
+config = {
+ 'lithops': {
+ 'backend': 'code_engine',
+ 'storage': 'ibm_cos'
+ },
+ 'ibm': {
+ 'region': 'REGION',
+ 'iam_api_key': 'IAM_API_KEY',
+ 'resource_group_id': 'RESOURCE_GROUP_ID'
+ },
+ 'ibm_cos': {
+ 'storage_bucket': 'STORAGE_BUCKET'
+ }
+}
+
+def hello_world(number):
+ return f'Hello {number}!'
if __name__ == '__main__':
fexec = lithops.FunctionExecutor(config=config)
- fexec.call_async(hello_world, 'World')
+ fexec.map(hello_world, [1, 2, 3, 4])
print(fexec.get_result())
```
## Summary of configuration keys for Lithops
-|Group|Key|Default|Mandatory|Additional info|
-|---|---|---|---|---|
-|lithops | backend | ibm_cf | no | Compute backend implementation. IBM Cloud Functions is the default |
-|lithops | storage | ibm_cos | no | Storage backend implementation. IBM Cloud Object Storage is the default |
-|lithops | data_cleaner | True | no |If set to True, then the cleaner will automatically delete all the temporary data that was written into `storage_bucket/lithops.jobs`|
-|lithops | monitoring | storage | no | Monitoring system implementation. One of: **storage** or **rabbitmq** |
-|lithops | monitoring_interval | 2 | no | Monitoring check interval in seconds in case of **storage** monitoring |
-|lithops | data_limit | 4 | no | Max (iter)data size (in MB). Set to False for unlimited size |
-|lithops | execution_timeout | 1800 | no | Functions will be automatically killed if they exceed this execution time (in seconds). Alternatively, it can be set in the `call_async()`, `map()` or `map_reduce()` calls using the `timeout` parameter.|
-|lithops | include_modules | [] | no | Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None |
-|lithops | exclude_modules | [] | no | Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules |
-|lithops | log_level | INFO |no | Logging level. One of: WARNING, INFO, DEBUG, ERROR, CRITICAL, Set to None to disable logging |
-|lithops | log_format | "%(asctime)s [%(levelname)s] %(name)s -- %(message)s" |no | Logging format string |
-|lithops | log_stream | ext://sys.stderr |no | Logging stream. eg.: ext://sys.stderr, ext://sys.stdout|
-|lithops | log_filename | |no | Path to a file. log_filename has preference over log_stream. |
-|lithops | customized_runtime | False | no | Enables to build a new runtime with the map() function and its dependencies integrated. Only docker-based backends support this feature. |
+| Group | Key | Default | Mandatory | Additional info |
+|---------|---------------------|--------------|-----------|--------------------------------------------------------------------------------------------------|
+| lithops | backend | aws_lambda | no | Compute backend implementation. `localhost` is the default if no config or config file is provided. |
+| lithops | storage | aws_s3 | no | Storage backend implementation. `localhost` is the default if no config or config file is provided. |
+| lithops | data_cleaner | True | no | If True, automatically deletes temporary data written to `storage_bucket/lithops.jobs`. |
+| lithops | monitoring | storage | no | Monitoring system implementation. Options: **storage** or **rabbitmq**. |
+| lithops | monitoring_interval | 2 | no | Interval in seconds for monitoring checks when using **storage** monitoring. |
+| lithops | data_limit | 4 | no | Maximum size (in MB) for iterator data chunks. Set to False for unlimited size. |
+| lithops | execution_timeout | 1800 | no | Maximum execution time in seconds for functions. Functions exceeding this time are terminated. Can also be set per call via the `timeout` parameter. |
+| lithops | include_modules | [] | no | List of dependencies to explicitly include for pickling. If empty, all required dependencies are included. If set to None, no dependencies are included. |
+| lithops | exclude_modules | [] | no | List of dependencies to exclude from pickling. Ignored if `include_modules` is set. |
+| lithops | log_level | INFO | no | Logging level. Options: WARNING, INFO, DEBUG, ERROR, CRITICAL. Set to None to disable logging. |
+| lithops | log_format | "%(asctime)s [%(levelname)s] %(name)s -- %(message)s" | no | Format string for log messages. |
+| lithops | log_stream | ext://sys.stderr | no | Logging output stream, e.g., ext://sys.stderr or ext://sys.stdout. |
+| lithops | log_filename | (empty) | no | File path for logging output. Overrides `log_stream` if set. |
+| lithops | retries | 0 | no | Number of retries for failed function invocations when using the `RetryingFunctionExecutor`. Default is 0. Can be overridden per API call. |
diff --git a/config/config_template.yaml b/config/config_template.yaml
index 11b1e81fa..33bc462a9 100644
--- a/config/config_template.yaml
+++ b/config/config_template.yaml
@@ -1,6 +1,6 @@
#lithops:
- #backend: ibm_cf
- #storage: ibm_cos
+ #backend: aws_lambda
+ #storage: aws_s3
#data_cleaner:
#monitoring: storage
#monitoring_interval: 2
@@ -12,13 +12,11 @@
#log_format: "%(asctime)s [%(levelname)s] %(name)s -- %(message)s"
#log_stream: ext://sys.stdout
#log_filename
- #customized_runtime:
#localhost:
#runtime: python3
#worker_processes: CPU_COUNT
-
#ibm:
#iam_api_key:
#region :
@@ -36,6 +34,7 @@
#remote_invoker:
#max_workers: Default is 1200
#worker_processes: Default is 1
+ #runtime_include_function:
#code_engine:
#namespace:
@@ -51,6 +50,7 @@
#max_workers: Default is 1000
#worker_processes: Default is 1
#connection_retries:
+ #runtime_include_function:
#ibm_vpc:
#region :
@@ -72,9 +72,8 @@
#singlesocket: # Optional, default is False
#runtime:
- #exec_mode: consume
+ #exec_mode: reuse
#auto_dismantle: True
- #pull_runtime:
#hard_dismantle_timeout: 3600
#soft_dismantle_timeout: 300
#workers_policy: # Optional, strict/permissive. Default is 'permissive'
@@ -100,9 +99,8 @@
#worker_processes: Default is 2
#runtime:
- #exec_mode: consume
+ #exec_mode: reuse
#auto_dismantle: True
- #pull_runtime:
#hard_dismantle_timeout: 3600
#soft_dismantle_timeout: 300
#workers_policy: # Optional, strict/permissive. Default is 'permissive'
@@ -143,6 +141,7 @@
#ibm_cos:
#storage_bucket:
#region:
+ #service_instance_id:
#endpoint:
#private_endpoint:
#api_key:
diff --git a/docs/api_futures.md b/docs/api_futures.md
deleted file mode 100644
index 4eb0d4798..000000000
--- a/docs/api_futures.md
+++ /dev/null
@@ -1,425 +0,0 @@
-# Lithops Futures API Details
-
-## Executor
-The primary object in Lithops is the executor. The standard way to get everything set up is to import `lithops`, and create an instance of one of the available modes of executions.
-
-Lithops is shipped with 3 modes of execution: **Localhost**, **Serverless** and **Standalone**. In this sense, each mode of execution has its own executor class:
-
-* `lithops.LocalhostExecutor()`: Executor that uses local processes to run jobs in the local machine.
-* `lithops.ServerlessExecutor()`: Executor to run jobs in one of the available serverless compute backends.
-* `lithops.StandaloneExecutor()`: Executor to run jobs in one of the available standalone compute backends.
-
-Additionally, Lithops includes a top-level function executor, which encompasses all three previous executors:
-
-* `lithops.FunctionExecutor()`: Generic executor that will use the configuration to determine its mode of execution, i.e., based on the configuration it will be **localhost**, **serverless** or **standalone**.
-
-
-By default, the executor load the configuration from the config file. Alternatively, you can pass the configuration with a python dictionary. In any case, note that all the parameters set in the executor will overwrite those set in the configuration.
-
-
-The available calls within an executor are:
-
-|API Call| Type | Description|
-|---|---|---|
-|[call_async()](api_futures.md#executorcall_async) | Async. | Method used to spawn one function activation |
-|[map()](api_futures.md#executormap) | Async. | Method used to spawn multiple function activations |
-|[map_reduce()](api_futures.md#executormap_reduce) | Async. | Method used to spawn multiple function activations with one (or multiple) reducers|
-|[wait()](api_futures.md#executorwait) | Sync. | Wait for the function activations to complete. It blocks the local execution until all the function activations finished their execution (configurable)|
-|[get_result()](api_futures.md#executorget_result) | Sync. | Method used to retrieve the results of all function activations. The results are returned within an ordered list, where each element of the list is the result of one activation|
-|[plot()](api_futures.md#executorplot) | Sync. | Method used to create execution plots |
-|[job_summary()](api_futures.md#jobsummary) | Sync. | Method used to create a summary file of the executed jobs. It includes times and money |
-|[clean()](api_futures.md#executorclean) | Async. | Method used to clean the temporary data generated by Lithops|
-
-
-**LocalhostExecutor(\*\*kwargs)**
-
-Initialize and return Localhost executor object.
-
-|Parameter | Default | Description|
-|---|---|---|
-|config | None | Settings passed in here will override those in lithops_config|
-|runtime | None | Name of the docker image to run the functions |
-|workers | cpu_count | Max number of parallel workers |
-|storage | localhost | Storage backend to store temp data|
-|monitoring | storage | Monitoring system implementation. One of: storage, rabbitmq |
-|log_level | INFO | Log level printing (INFO, DEBUG, ...). Set it to None to hide all logs. If this is param is set, all logging params in config are disabled|
-
-Usage:
-
-```python
-import lithops
-fexec = lithops.LocalhostExecutor()
-```
-
-**ServerlessExecutor(\*\*kwargs)**
-
-Initialize and return a Serverless executor object.
-
-|Parameter | Default | Description|
-|---|---|---|
-|config | None | Settings passed in here will override those in lithops_config|
-|backend | ibm_cf | Serverless compute backend to run the functions|
-|runtime | None | Name of the docker image to run the functions |
-|runtime_memory | 256 | Memory (in MB) to use to run the functions |
-|storage | ibm_cos | Storage backend to store temp data|
-|workers | *depends of the backend* | Max number of parallel workers |
-|monitoring | storage | Monitoring system implementation. One of: storage, rabbitmq |
-|remote_invoker | False | Spawn a function that will perform the actual job invocation (True/False) |
-|log_level | INFO | Log level printing (INFO, DEBUG, ...). Set it to None to hide all logs. If this is param is set, all logging params in config are disabled|
-
-Usage:
-
-```python
-import lithops
-fexec = lithops.ServerlessExecutor()
-```
-
-**StandaloneExecutor(\*\*kwargs)**
-
-Initialize and return an Standalone executor object.
-
-|Parameter | Default | Description|
-|---|---|---|
-|config | None | Settings passed in here will override those in lithops_config|
-|backend | ibm_vpc | Standalone compute backend to run the functions|
-|runtime | python3 | Name of the runtime to run the functions. It can be a docker image or *python3* |
-|workers | cpu_count | Max number of parallel workers |
-|storage | ibm_cos | Storage backend to store temp data|
-|monitoring | storage | Monitoring system implementation. One of: storage, rabbitmq |
-|log_level | INFO | Log level printing (INFO, DEBUG, ...). Set it to None to hide all logs. If this is param is set, all logging params in config are disabled|
-
-Usage:
-
-```python
-import lithops
-fexec = lithops.StandaloneExecutor()
-```
-
-**FunctionExecutor(\*\*kwargs)**
-
-Initialize and return a generic function executor.
-
-|Parameter | Default | Description|
-|---|---|---|
-|mode | serverless | Execution mode. One of: localhost, serverless or standalone|
-|config | None | Settings passed in here will override those in lithops_config|
-|backend | None | Compute backend to run the functions|
-|runtime | None | Name of the runtime to run the functions. |
-|runtime_memory | None | Memory (in MB) to use to run the functions |
-|workers | None | Max number of parallel workers |
-|storage | ibm_cos | Storage backend to store temp data|
-|monitoring | storage | Monitoring system implementation. One of: storage, rabbitmq |
-|remote_invoker | False | Spawn a function that will perform the actual job invocation (True/False) |
-|log_level | INFO | Log level printing (INFO, DEBUG, ...). Set it to None to hide all logs. If this is param is set, all logging params in config are disabled|
-
-Usage:
-
-```python
-import lithops
-fexec = lithops.FunctionExecutor()
-```
-
-
-## Executor.call_async()
-
-Spawn only one function activation.
-
-**call_async**(func, data, \*\*kwargs)
-
-|Parameter | Default |Description|
-|---|---|---|
-|func | |The function to map over the data |
-|data | |A single value of data |
-|extra_env| None |Additional environment variables for CF environment|
-|runtime_memory| 256 |Memory (in MB) to use to run the functions|
-|timeout| 600 |Max time per function activation (seconds)|
-|include_modules| [] |Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None |
-|exclude_modules| [] |Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules |
-
-* **Returns**: One future for each job (Futures are also internally stored by Lithops).
-
-* **Usage**:
-
- ```python
- future = fexec.call_async(foo, data)
- ```
-
-* **Code example**: [call_async.py](../examples/call_async.py)
-
-## Executor.map()
-
-Spawn multiple function activations based on the items of an input list.
-
-**map**(map_function, map_iterdata, \*\*kwargs)
-
-|Parameter| Default |Description|
-|---|---|---|
-|map_function | |The function to map over the data |
-|map_iterdata | |An iterable of input data (e.g python list) |
-|chunksize | 1 | Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk |
-|worker_processes | 1 | Number of concurrent/parallel processes in each worker|
-|extra_args| None | Additional arguments to pass to each map_function activation |
-|extra_env| None |Additional environment variables for CF environment |
-|runtime_memory| 256 |Memory (in MB) to use to run the functions |
-|timeout| 600 |Max time per function activation (seconds) |
-|include_modules| [] |Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None |
-|exclude_modules| [] |Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules |
-|obj_chunk_size| None | Used for data_processing. Chunk size to split each object in bytes. Must be >= 1MiB. 'None' for processing the whole file in one function activation|
-|obj_chunk_number| None | Used for data_processing. Number of chunks to split each object. 'None' for processing the whole file in one function activation. chunk_n has prevalence over chunk_size if both parameters are set|
-|obj_newline| '\n' | New line character for keeping line integrity of partitions. 'None' for disabling line integrity logic and get partitions of the exact same size in the functions|
-
-* **Returns**: A list with size len(map_iterdata) of futures for each job (Futures are also internally stored by Lithops).
-
-* **Usage**:
-
- ```python
- iterdata = [1, 2, 3, 4]
- futures = fexec.map(foo, iterdata)
- ```
-
-* **Code example**: [map.py](../examples/map.py)
-
-## Executor.map_reduce()
-
-Spawn multiple *map_function* activations, based on the items of an input list, eventually spawning one (or multiple) *reduce_function* activations over the results of the map phase.
-
-**map_reduce**(map_function, map_iterdata, reduce_function, \*\*kwargs)
-
-|Parameter| Default |Description|
-|---|---|---|
-|map_function| |The function to map over the data |
-|map_iterdata | |An iterable of input data (e.g python list)|
-|chunksize | 1 | Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk |
-|worker_processes | 1 | Number of concurrent/parallel processes in each worker|
-|extra_args| None | Additional arguments to pass to each map_function activation |
-|reduce_function| |The function to map over the results of map_function |
-|spawn_reducer| 20 | Percentage of done map functions before spawning the reduce function. By default the reducer is spawned when 20% of the map activations are done. |
-|extra_env| None | Additional environment variables for CF environment|
-|map_runtime_memory| 256 | Memory (in MB) to use to run the map_function|
-|reduce_runtime_memory| 256| Memory (in MB) to use to run the reduce_function|
-|timeout| 600 | Max time per function activation (seconds)|
-|include_modules| [] |Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None |
-|exclude_modules| [] |Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules |
-|obj_chunk_size| None | Used for data_processing. Chunk size to split each object in bytes. Must be >= 1MiB. 'None' for processing the whole file in one function activation|
-|obj_chunk_number| None | Used for data_processing. Number of chunks to split each object. 'None' for processing the whole file in one function activation. chunk_n has prevalence over chunk_size if both parameters are set|
-|obj_newline| '\n' | New line character for keeping line integrity of partitions. 'None' for disabling line integrity logic and get partitions of the exact same size in the functions|
-|obj_reduce_by_key| False| Used for data_processing. Set one reducer per object after running the partitioner (reduce-by-key) |
-
-
-* **Returns**: A list with size len(map_iterdata) of futures for each job (Futures are also internally stored by Lithops).
-
-* **Usage**:
-
- ```python
- iterdata = [1, 2, 3, 4]
- futures = fexec.map_reduce(foo, iterdata, bar)
- ```
-
-* **Code example**: [map_reduce.py](../examples/map_reduce.py)
-
-
-## Executor.wait()
-
-Waits for the function activations to finish.
-
-**wait**(\*\*kwargs)
-
-|Parameter| Default |Description|
-|---|---|---|
-|fs| None | List of futures to wait. If None, Lithops uses the internally stored futures |
-|throw_except | True | Re-raise exception if call raised|
-|return_when| ALL_COMPLETED | One of 'ALL_COMPLETED', 'ANY_COMPLETED', 'ALWAYS' |
-|download_results| False | Whether or not download the results while monitoring activations |
-|timeout| None | Timeout of waiting for results (in seconds)|
-|THREADPOOL_SIZE| 128 | Number of threads to use waiting for results|
-|WAIT_DUR_SEC| 1 | Time interval between each check (seconds) if no rabbitmq_monitor activated |
-|show_progressbar| True | whether or not to show the progress bar |
-
-
-* **Returns**: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed.
-
-* **Usage**:
-
- ```python
- iterdata = [1, 2, 3, 4]
- futures = fexec.map(foo, iterdata)
- fexec.wait()
- ```
-
-* **Code example**: [wait.py](../examples/wait.py)
-
-## Executor.get_result()
-
-Gets the results from all the function activations. It internally makes use of the `Executor.wait()` method.
-
-**get_result**(\*\*kwargs)
-
-|Parameter| Default |Description|
-|---|---|---|
-|fs| None | List of futures to get the results. If None, Lithops uses the internally stored futures |
-|throw_except | True | Re-raise exception if call raised|
-|timeout| None | Timeout of waiting for results (in seconds)|
-|THREADPOOL_SIZE| 128 | Number of threads to use waiting for results|
-|WAIT_DUR_SEC| 1 | Time interval between each check (seconds) if no rabbitmq_monitor activated |
-|show_progressbar| True | whether or not to show the progress bar |
-
-* **Returns**: The results are returned within an ordered list, where each element of the list is the result of one activation.
-
-* **Usage**:
-
- ```python
- iterdata = [1, 2, 3, 4]
- futures = fexec.map(foo, iterdata)
- results = fexec.get_result()
- ```
-
-* **Code example**: [call_async.py](../examples/call_async.py), [map.py](../examples/map.py), [map_reduce.py](../examples/map_reduce.py)
-
-## Executor.plot()
-
-Creates 2 detailed execution plots: A timeline plot and a histogram plot.
-
-**plot**(\*\*kwargs)
-
-|Parameter| Default |Description|
-|---|---|---|
-|fs| None | List of futures to plot. If None, Lithops uses the internally stored futures|
-|dst| None | Path to destination file, either absolute or relative. If set, you must specify the path + the file prefix (see example below), then lithops will generate the *prefix*_histogram.png and *prefix*_timeline.png files. If None, Lithops will create a new folder called *plots* in the current directory and use the current timestamp as file *prefix* |
-
-* **Returns**: *Nothing*. It stores 2 different plots in the selected `dst` path.
-
-* **Usage**:
-
- ```python
- iterdata = [1, 2, 3, 4]
- fexec.map(foo, iterdata)
- results = fexec.get_result() # or fexec.wait()
- # The next command will generate test_timeline.png and test_histogram.png in ~/lithops_plots
- fexec.plot(dst='~/lithops_plots/test')
- ```
-
-* **Example**:
-
-
-
-
-
-
-## Executor.clean()
-
-Cleans the temporary data generated by Lithops in IBM COS. This process runs asynchronously to the main execution since Lithops starts another process to do the task. If `data_cleaner=True` (default), this method is executed automatically after calling `get_result()`.
-
-**clean**(\*\*kwargs)
-
-|Parameter| Default |Description|
-|---|---|---|
-|fs| None | List of futures to clean temp data. If None, Lithops uses the internally stored futures |
-|cs| None | List of cloudobjects to clean |
-|clean_cloudobjects| True | Clean or not the cloudobjects generated in the executor |
-|spawn_cleaner| True | Spawn cleaner process. If false it stores the data to be cleaned in a tmp dir |
-
-* **Returns**: *Nothing*.
-
-* **Usage**:
-
- ```python
- iterdata = [1, 2, 3, 4]
- futures = fexec.map(foo, iterdata)
- results = fexec.get_result()
- fexec.clean()
- ```
-
-* **Code example**: [map.py](../examples/map.py)
-
-
-# Function chaining
-
-Function chaining is a pattern where multiple functions are called on the same executor consecutively. Using the same `lithops.FunctionExecutor` object reference, multiple functions can be invoked. It increases the readability of the code and means less redundancy. This means we chain multiple functions together with the same element reference. It’s not necessary to attach the `lithops.FunctionExecutor` reference multiple times for each function call.
-
-This patter is specially useful when the output of one invocation is the input of another invocation. In this case, Lithops does not download the intermediate results to the local client, instead, the intermediate results are directly read from the next function.
-
-It currently works with the Futures API, and you can chain the `map()`, `map_reuce()`, `wait()` and `get_result()` methods. Note that the returning value of one function must match the signature of the next function when chaining multiple `map()` calls. View the next examples:
-
-
-Getting the result from a single `map()` call:
-
-```python
-import lithops
-
-def my_func1(x):
- return x*2
-
-iterdata = [1, 2, 3]
-
-fexec = lithops.FunctionExecutor()
-res = fexec.map(my_func1, iterdata).get_result()
-print(res)
-```
-
-
-Chain multiple map() calls and get the final result:
-
-```python
-import lithops
-
-
-def my_func1(x):
- return x*2, 5
-
-def my_func2(x, y):
- return x+y
-
-iterdata = [1, 2, 3]
-
-fexec = lithops.FunctionExecutor()
-res = fexec.map(my_func1, iterdata).map(my_func2).get_result()
-print(res)
-```
-
-There is no limit in the number of map() calls that can be chained:
-
-```python
-def my_func1(x):
- return x+2, 5
-
-
-def my_func2(x, y):
- return x+y, 5, 2
-
-
-def my_func3(x, y, z):
- return x+y+z
-
-
-iterdata = [1, 2, 3]
-
-fexec = lithops.FunctionExecutor()
-res = fexec.map(my_func1, iterdata).map(my_func2).map(my_func3).get_result()
-print(res)
-```
-
-Alternatively, you can pass the `futures` generated in a `map()` or `map_reduce()` call to the `iterdata` parameter with the same effect. Not that in this case you will only get the results of the last `map()` execution. Results of intermediate `map()`s are never downloaded:
-
-```python
-def my_func1(x):
- return x+2, 5
-
-
-def my_func2(x, y):
- return x+y, 5, 2
-
-
-def my_func3(x, y, z):
- return x+y+z
-
-
-iterdata = [1, 2, 3]
-
-fexec = lithops.FunctionExecutor()
-futures1 = fexec.map(my_func1, iterdata)
-futures2 = fexec.map(my_func2, futures1)
-futures3 = fexec.map(my_func3, futures2)
-final_result = fexec.get_result()
-
-print(final_result)
-```
\ No newline at end of file
diff --git a/docs/api_storage.md b/docs/api_storage.md
deleted file mode 100644
index 5d126201c..000000000
--- a/docs/api_storage.md
+++ /dev/null
@@ -1,322 +0,0 @@
-# Lithops Storage API Details
-
-Lithops allows to create a **Storage** instance and abstract away the backend implementation details. The standard way to get a Storage object set up is to import the lithops `Storage` class and create an instance.
-
-
-**Storage(\*\*kwargs)**
-
-Initialize and return a Storage object.
-
-|Parameter | Default | Description|
-|---|---|---|
-|config | None | Lithops configuration dictionary |
-|backend | None | Name of the backend |
-
-
-
-By default, the configuration is loaded from the lithops config file, so there is no need to provide any parameter to create a Storage instance:
-
-```python
-from lithops import Storage
-
-storage = Storage()
-```
-
-Alternatively, you can pass the lithops configuration through a dictionary. In this case, it will load the storage backend set in the `storage` key of the `lithops` section:
-
-```python
-from lithops import Storage
-
-config = {'lithops' : {'storage' : 'ibm_cos'},
- 'ibm_cos': {'region': 'REGION', 'api_key': 'API_KEY'}}
-
-storage = Storage(config=config)
-```
-
-In case you have multiple storage set in your configuration, you can force the storage backend by using the `backend` parameter:
-
-```python
-from lithops import Storage
-
-storage = Storage(backend='redis') # this will create a redis Storage instance
-```
-
-or:
-
-```python
-from lithops import Storage
-
-config = {'lithops' : {'storage' : 'ibm_cos'},
- 'ibm_cos': {'region': 'REGION', 'api_key': 'API_KEY'}}
- 'redis': {'host': 'HOST', 'port':'PORT'}}
-
-
-storage = Storage(config=config) # this will create an ibm_cos Storage instance
-storage = Storage(config=config, backend='redis') # this will create a redis Storage instance
-```
-
-## Storage API Calls
-
-### `Storage.put_object()`
-
-Adds an object to a bucket of the storage backend.
-
-**put_object**(bucket, key, data)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-|key | Name of the object (String)|
-|data| Object data (bytes/string or seekable file-like object)|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- # Bytes/string data
- storage.put_object('my_bucket', 'test.txt', 'Hello World')
- ```
-
- ```python
- storage = Storage()
- # Seekable file-like object
- with open('/tmp/my_big_file.csv', 'rb') as fl:
- storage.put_object('my_bucket', 'my_big_file.csv', fl)
- ```
-
-
-### `Storage.get_object()`
-
-Retrieves objects from the storage backend.
-
-**get_object**(bucket, key, \*\*kwargs)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-|key | Name of the object (String)|
-|stream | Get the object data or a file-like object (True/False) |
-|extra_get_args | Extra get arguments to be passed to the underlying backend implementation (dict). For example, to specify the byte-range to read: `extra_get_args={'Range': 'bytes=0-100'}`|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- data = storage.get_object('my_bucket', 'test.txt')
- ```
-
-
-### `Storage.head_object()`
-The HEAD operation retrieves metadata from an object without returning the object itself. This operation is useful if you're only interested in an object's metadata.
-
-**head_object**(bucket, key)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-|key | Name of the object (String)|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- obj_metadata = storage.head_object('my_bucket', 'test.txt')
- ```
-
-
-### `Storage.delete_object()`
-
-Removes objects from the storage backend
-
-**delete_object**(bucket, key)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-|key | Name of the object (String)|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- storage.delete_object('my_bucket', 'test.txt')
- ```
-
-### `Storage.delete_objects()`
-
-This operation enables you to delete multiple objects from a bucket using a single HTTP request. If you know the object keys that you want to delete, then this operation provides a suitable alternative to sending individual delete requests, reducing per-request overhead.
-
-**delete_objects**(bucket, key_list)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-|key_list | Name of the objects (list)|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- storage.delete_objects('my_bucket', ['test1.txt', 'test2.txt'])
- ```
-
-
-### `Storage.head_bucket()`
-
-This operation is useful to determine if a bucket exists and you have permission to access it. The operation returns a 200 OK if the bucket exists and you have permission to access it. Otherwise, the operation might return responses such as 404 Not Found and 403 Forbidden .
-
-**head_bucket**(bucket)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- storage.head_bucket('my_bucket')
- ```
-
-
-### `Storage.list_objects()`
-
-Returns all of the objects in a bucket. For each object, the list contains the name of the object (key) and the size.
-
-**list_objects**(bucket, \*\*kwargs)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-|prefix | key prefix for filtering (String)|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- storage.list_objects('my_bucket', prefix='temp/')
- ```
-
-
-### `Storage.list_keys()`
-
-Similar to list_objects(), it returns all of the objects in a bucket. For each object, the list contains only the names of the objects (keys).
-
-**list_keys**(bucket, \*\*kwargs)
-
-|Parameter | Description|
-|---|---|
-|bucket | Name of the bucket (String)|
-|prefix | key prefix for filtering (String)|
-
-* **Usage**:
-
- ```python
- storage = Storage()
- storage.list_keys('my_bucket')
- ```
-
-
-### `Storage.get_client()`
-Returns the underlying storage backend client. For example, if `Storage` is an instance built on top of AWS S3, it returns a boto3 client.
-
-**get_client**()
-
-* **Usage**:
-
- ```python
- storage = Storage()
- boto3_client = storage.get_client()
- ```
-
-### `Storage.put_cloudobject()`
-
-Adds objects to a bucket of the storage backend. Returns a **cloudobject** that is a reference to the object.
-
-**put_cloudobject**(body, \*\*kwargs)
-
-|Parameter | Description|
-|---|---|
-|body| Object data (bytes/string or seekable file-like object)|
-|bucket | Name of the bucket (String). By default it uses the `storage_bucket`|
-|key | Name of the object (String). By default it creates a random key|
-
-If `bucket` paramter is not provided, it will use the `storage_bucket` set in the lithops config. If `key` is not provided, it will create a random temporary key.
-
-* **Usage**:
-
- ```python
- storage = Storage()
- # Bytes/string
- cobj = storage.put_cloudobject('Hello World!')
- ```
-
- ```python
- storage = Storage()
- # Seekable file-like object
- with open('/tmp/my_big_file.csv', 'rb') as fl:
- cobj = storage.put_cloudobject(fl)
- ```
-
-
-### `Storage.get_cloudobject()`
-
-Retrieves CloudObjects from a bucket of the storage backend.
-
-**get_cloudobject**(cloudobject, \*\*kwargs)
-
-
-|Parameter | Description|
-|---|---|
-|cloudobject| CloudObject Instance|
-|stream | Get the object data or a file-like object (True/False) |
-
-
-* **Usage**:
-
- ```python
- storage = Storage()
- cobj = storage.put_cloudobject('Hello World!', 'my-bucket', 'test.txt')
- data = storage.get_cloudobject(cobj)
- ```
-
-
-### `Storage.delete_cloudobject()`
-
-Removes CloudObjects from a bucket of the storage backend.
-
-**delete_cloudobject**(cloudobject)
-
-
-|Parameter | Description|
-|---|---|
-|cloudobject| CloudObject Instance|
-
-
-* **Usage**:
-
- ```python
- storage = Storage()
- cobj = storage.put_cloudobject('Hello World!', 'test.txt')
- storage.delete_cloudobject(cobj)
- ```
-
-### `Storage.delete_cloudobjects()`
-
-This operation enables you to delete multiple objects from a bucket using a single HTTP request. If you know the object keys that you want to delete, then this operation provides a suitable alternative to sending individual delete requests, reducing per-request overhead.
-
-**delete_cloudobject**(cloudobjects, \*\*kwargs)
-
-
-|Parameter | Description|
-|---|---|
-|cloudobjects| CloudObject Instances (list)|
-
-
-* **Usage**:
-
- ```python
- storage = Storage()
- cobj1 = storage.put_cloudobject('Hello World!', 'test1.txt')
- cobj2 = storage.put_cloudobject('Hello World!', 'test2.txt')
- storage.delete_cloudobjects([cobj1, cobj2])
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index db7db47a1..20e19b567 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,6 +12,7 @@
import os
import sys
+import lithops
sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath("../"))
@@ -72,8 +73,6 @@
# html_theme = 'sphinx_material'
# html_theme = 'karma_sphinx_theme'
html_theme = 'sphinx_book_theme'
-
-html_logo = "_static/lithops_logo_readme.png"
html_favicon = '_static/favicon.png'
language = 'en'
@@ -81,16 +80,23 @@
html_theme_options = {
'repository_url': 'https://github.com/lithops-cloud/lithops',
'repository_branch': 'master',
- 'google_analytics_id': 'G-7YKZHZYDCR',
'use_issues_button': False,
'use_download_button': True,
'use_fullscreen_button': False,
'use_repository_button': True,
- 'show_navbar_depth': 1
+ 'show_navbar_depth': 1,
+ # https://pydata-sphinx-theme.readthedocs.io/en/latest/user_guide/branding.html
+ "logo": {
+ "text": f"Lithops v{lithops.__version__}",
+ "image_light": "_static/lithops_logo_readme.png",
+ "image_dark": "_static/lithops_logo_readme.png",
+ },
+ # https://pydata-sphinx-theme.readthedocs.io/en/latest/user_guide/analytics.html
+ "analytics": {
+ "google_analytics_id": "G-7YKZHZYDCR",
+ }
}
-# html_title = f"Lithops v{lithops.__version__}"
-html_title = ''
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
diff --git a/docs/index.rst b/docs/index.rst
index 4f4f4264d..762d48d15 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,92 +1,198 @@
-What is Lithops?
-****************
+Welcome to Lithops!
+********************
+
+**Lithops is a Python multi-cloud serverless computing framework** that empowers you to **run unmodified Python code at massive scale** on leading serverless platforms and beyond.
+
+Whether you're processing terabytes of data or launching thousands of parallel tasks, Lithops lets you **focus on your code, not infrastructure**. It brings simplicity, performance, and flexibility to cloud-native computing.
+
+
+Why Lithops?
+============
+
+Serverless computing makes it easy to run code in the cloud — but scaling data-intensive workloads across clouds is hard. Lithops solves this by providing:
+
+- ✅ **Zero-configuration scale-out**: Run your Python functions on thousands of cloud workers with no infrastructure management.
+- 🌍 **True multi-cloud portability**: Move seamlessly between AWS, GCP, Azure, IBM Cloud, etc...
+- 💡 **Developer-first experience**: Write standard Python code, including NumPy, pandas, and scikit-learn — no cloud-specific boilerplate required.
+- 🧠 **Optimized for big data and AI**: Efficiently process massive datasets stored in object storage services with automatic partitioning.
+
+
+What You Can Build
+===================
+
+Lithops is ideal for **highly parallel, data-heavy workloads**. These include:
+
+- 🔁 Monte Carlo simulations
+- 🧬 Metabolomics and genomics pipelines
+- 🗺️ Geospatial analytics
+- 🧠 Deep learning and hyperparameter tuning
+- 📊 Big Data ETL and analytics workflows
+
+If your problem can be broken down into many small, independent tasks, Lithops will help you solve it at scale — fast.
+
+Key Features
+============
-.. image:: source/images/lithops_logo_readme.png
- :alt: Lithops
+Compute Anywhere
+----------------
+**Lithops features a modular and extensible backend architecture**, allowing you to run workloads across:
+
+- Serverless functions
+- Cloud VMs and Kubernetes clusters
+- On-premise compute resources
+
+No matter where your data lives, Lithops can execute your code right next to it.
+
+.. image:: source/images/multicloud.jpg
+ :alt: Available backends
:align: center
-|
-**Lithops is a Python multi-cloud serverless computing framework. It allows to run unmodified local python code at massive scale in the main serverless computing platforms.**
+Object Storage Made Easy
+-------------------------
-Lithops delivers the user’s code into the cloud without requiring knowledge of how it is deployed and run.
-Moreover, its multicloud-agnostic architecture ensures portability across cloud providers, overcoming vendor lock-in.
+**Seamlessly process large-scale data stored in object storage.**
-------------
+Lithops simplifies working with data lakes and object storage by providing:
-**Lithops provides great value for data-intensive applications like Big Data analytics and embarrassingly parallel jobs.**
+- 🔍 **Automatic data discovery**: Detects and lists files across nested directories.
+- 📂 **Transparent data partitioning**: Splits large files (e.g., CSV, Parquet, JSON) into chunks for efficient parallel processing.
+- 🧰 **Unified, Pythonic API**: Interact with your data using a single interface, regardless of where it's stored.
-It is specially suited for highly-parallel programs with little or no need for communication between processes.
+You write simple Python code — Lithops handles the complexity of parallel I/O, data distribution, and storage backends under the hood.
-Examples of applications that run with Lithops include Monte Carlo simulations, deep learning and machine learning processes, metabolomics computations, and geospatial
-analytics, to name a few.
-------------
+Get Started Quickly
+====================
-**Lithops facilitates consuming data from object storage (like AWS S3, GCP Storage or IBM Cloud Object Storage) by providing automatic partitioning and data discovery for common data formats like CSV.**
+To start using Lithops:
-Lithops abstracts away the underlying cloud-specific APIs for accessing storage and provides an intuitive and easy to use interface to process high volumes of data.
+1. Install via pip:
+ .. code-block:: bash
-Quick Start
-***********
+ pip install lithops
-Lithops is available for Python 3.6 and up. Install it using ``pip``:
+2. Configure your cloud credentials (see the :doc:`Configuration Guide `)
-.. code-block::
+3. Write and run your first parallel job:
- pip install -U lithops
+ .. code-block:: python
-You're ready to execute a simple example!
+ import lithops
-.. code:: python
+ def my_function(x):
+ return x * 2
- from lithops import FunctionExecutor
+ fexec = lithops.FunctionExecutor()
+ fexec.map(my_function, range(10))
+ print(fexec.get_result())
- def hello(name):
- return 'Hello {}!'.format(name)
+You're now running massively parallel workloads with just a few lines of code!
- with FunctionExecutor() as fexec:
- fut = fexec.call_async(hello, 'World')
- print(fut.result())
-Use any Cloud
-*************
-**Lithops provides an extensible backend architecture that is designed to work with different compute and storage services available on Cloud providers and on-premise backends.**
+Success stories
+===============
-In this sense, you can code your application in Python and run it unmodified wherever your data is located at: IBM Cloud, AWS, Azure, Google Cloud and Alibaba Aliyun...
+* `Metaspace Metabolomics Platform `_ is running in production in AWS with hundreds of users.
+ MetaSpace is using Lithops over Lambda Functions and EC2 VMs to access metabolomics data in Amazon S3.
+ MetaSpace moved from Spark to Lithops to simplify dynamic and elastic resource provisioning.
-.. image:: source/images/multicloud.jpg
- :alt: Available backends
- :align: center
+* `OpenNebula Open Source Cloud and Edge Computing platform `_ integrates Lithops as an easy-to-use appliance
+ for data analytics. OpenNebula also deploys MinIO storage and Lithops Kubernetes backend to facilitate data analytics
+ in on-premise and edge deployments.
-|
+* `Cubed `_ is a popular library for scalable multidimensional array processing with bounded memory.
+ Cubed is a drop-in replacement for Dask's Array API.
+ Cubed integrates Lithops as a fast compute backend enabling scalable array processing in the Cloud.
+
+* `BSC Marenostrum 5 SuperComputer `_ is a pre-exascale EuroHPC supercomputer with
+ a peak computational power of 314 PFlops. A new Lithops HPC compute backend has been created enabling large-scale computing
+ reaching tens of thousands of concurrent functions. LithopsHPC is now being used in the neardata.eu project for extreme
+ data analytics of genomics pipelines.
-Additional resources
-********************
Blogs and Talks
----------------
-* `Simplify the developer experience with OpenShift for Big Data processing by using Lithops framework `_
-* `Speed-up your Python applications using Lithops and Serverless Cloud resources `_
-* `Serverless Without Constraints `_
-* `Lithops, a Multi-cloud Serverless Programming Framework `_
-* `CNCF Webinar - Toward Hybrid Cloud Serverless Transparency with Lithops Framework `_
-* `Using Serverless to Run Your Python Code on 1000 Cores by Changing Two Lines of Code `_
-* `Decoding dark molecular matter in spatial metabolomics with IBM Cloud Functions `_
-* `Your easy move to serverless computing and radically simplified data processing `_ Strata Data Conference, NY 2019
-* `Speed up data pre-processing with Lithops in deep learning `_
-* `Predicting the future with Monte Carlo simulations over IBM Cloud Functions `_
-* `Process large data sets at massive scale with Lithops over IBM Cloud Functions `_
-* `Industrial project in Technion on Lithops `_
+===============
+
+* `Simplify the developer experience with OpenShift for Big Data processing by using Lithops framework
+ `_
+
+* `Speed-up your Python applications using Lithops and Serverless Cloud resources
+ `_
+
+* `Serverless Without Constraints
+ `_
+
+* `Lithops, a Multi-cloud Serverless Programming Framework
+ `_
+
+* `CNCF Webinar - Toward Hybrid Cloud Serverless Transparency with Lithops Framework
+ `_
+
+* `Using Serverless to Run Your Python Code on 1000 Cores by Changing Two Lines of Code
+ `_
+
+* `Decoding dark molecular matter in spatial metabolomics with IBM Cloud Functions
+ `_
+
+* `Your easy move to serverless computing and radically simplified data processing
+ `_
+ Strata Data Conference, NY 2019
+
+* `Speed up data pre-processing with Lithops in deep learning
+ `_
+
+* `Predicting the future with Monte Carlo simulations over IBM Cloud Functions
+ `_
+
+* `Process large data sets at massive scale with Lithops over IBM Cloud Functions
+ `_
+
+* `Industrial project in Technion on Lithops
+ `_
+
Papers
-------
-* `Outsourcing Data Processing Jobs with Lithops `_ - IEEE Transactions on Cloud Computing 2022
-* `Towards Multicloud Access Transparency in Serverless Computing `_ - IEEE Software 2021
-* `Primula: a Practical Shuffle/Sort Operator for Serverless Computing `_ - ACM/IFIP International Middleware Conference 2020. `See Primula presentation here `_
-* `Bringing scaling transparency to Proteomics applications with serverless computing `_ - 6th International Workshop on Serverless Computing (WoSC6) 2020. `See Workshop presentation here `_
-* `Serverless data analytics in the IBM Cloud `_ - ACM/IFIP International Middleware Conference 2018
+======
+
+* `Serverful Functions: Leveraging Servers in Complex Serverless Workflows
+ `_ - ACM Middleware Industrial Track 2024
+
+* `Transparent serverless execution of Python multiprocessing applications
+ `_ - Elsevier Future Generation Computer Systems 2023
+
+* `Outsourcing Data Processing Jobs with Lithops
+ `_ - IEEE Transactions on Cloud Computing 2022
+
+* `Towards Multicloud Access Transparency in Serverless Computing
+ `_ - IEEE Software 2021
+
+* `Primula: a Practical Shuffle/Sort Operator for Serverless Computing
+ `_ - ACM/IFIP International Middleware Conference 2020.
+ `See Primula presentation here `_
+
+* `Bringing scaling transparency to Proteomics applications with serverless computing
+ `_ - 6th International Workshop on Serverless Computing (WoSC6) 2020.
+ `See Workshop presentation here `_
+
+* `Serverless data analytics in the IBM Cloud
+ `_ - ACM/IFIP International Middleware Conference 2018
+
+
+Join the Community
+==================
+
+Lithops is an open-source project, actively maintained and supported by a community of contributors and users. You can:
+
+- 💬 Join the discussion on `GitHub Discussions `_
+- 🐞 Report issues or contribute on `GitHub `_
+- 📖 Read more in the full documentation
+
+
+---
+
+**Start writing scalable cloud applications — with Lithops.**
.. toctree::
@@ -123,6 +229,7 @@ Papers
source/api_futures.rst
source/functions.md
+ source/worker_granularity.rst
source/notebooks/function_chaining.ipynb
source/api_stats.rst
@@ -156,10 +263,8 @@ Papers
:maxdepth: 0
:caption: Advanced Features
- source/worker_granularity.rst
source/monitoring.rst
- Custom Runtime
- source/customized_runtime.rst
+ Custom Runtimes
.. toctree::
@@ -178,5 +283,4 @@ Papers
Applications
source/contributing.rst
- source/testing.rst
Changelog
diff --git a/docs/source/api_futures.rst b/docs/source/api_futures.rst
index a2cee0c5f..b42999795 100644
--- a/docs/source/api_futures.rst
+++ b/docs/source/api_futures.rst
@@ -3,19 +3,43 @@
Lithops Futures API
===================
-The primary object in Lithops is the executor. The standard way to get everything set up is to import `lithops`, and create an instance of one of the available modes of executions.
+The core abstraction in Lithops is the **executor**, responsible for orchestrating the execution of your functions across different environments.
-Lithops is shipped with 3 modes of execution: **Localhost**, **Serverless** and **Standalone**. In this sense, each mode of execution has its own executor class:
+To get started, you typically import `lithops` and create an executor instance to run your code. Lithops provides a flexible set of executors to suit different needs.
-* `lithops.LocalhostExecutor()`: Executor that uses local processes to run jobs in the local machine.
-* `lithops.ServerlessExecutor()`: Executor to run jobs in one of the available serverless compute backends.
-* `lithops.StandaloneExecutor()`: Executor to run jobs in one of the available standalone compute backends.
+Primary Executors
+-----------------
-Additionally, Lithops includes a top-level function executor, which encompasses all three previous executors:
+* **FunctionExecutor** (`lithops.FunctionExecutor()`):
+ The main, generic executor that automatically selects its execution mode based on the provided configuration.
+ This lets you write your code once and run it seamlessly on localhost, serverless, or standalone backends without changing your code.
-* `lithops.FunctionExecutor()`: Generic executor that will use the configuration to determine its mode of execution, i.e., based on the configuration it will be **localhost**, **serverless** or **standalone**.
+* **RetryingFunctionExecutor** (`lithops.RetryingFunctionExecutor()`):
+ A robust wrapper around `FunctionExecutor` that transparently handles retries on failed tasks.
+ It supports all features of `FunctionExecutor` with added automatic retry logic, improving fault tolerance and reliability for unstable or transient failure-prone environments.
+
+Secondary Executors
+-------------------
+
+For more specialized use cases, Lithops also provides explicit executors for each execution mode:
+
+* **LocalhostExecutor** (`lithops.LocalhostExecutor()`):
+ Runs jobs locally using multiple processes on your machine. Ideal for development, debugging, or small-scale workloads.
+
+* **ServerlessExecutor** (`lithops.ServerlessExecutor()`):
+ Executes jobs on serverless compute platforms, managing scaling and deployment automatically. Best for massively parallel, ephemeral workloads.
+
+* **StandaloneExecutor** (`lithops.StandaloneExecutor()`):
+ Runs jobs on standalone compute backends such as clusters or virtual machines, suitable for long-running or resource-heavy tasks.
+
+
+Configuration and Initialization
+================================
+
+By default, executors load configuration from the Lithops configuration file (e.g., `lithops_config.yaml`). You can also supply configuration parameters programmatically via a Python dictionary when creating an executor instance. Parameters passed explicitly override those in the config file, allowing for flexible customization on the fly.
+
+This layered executor design lets Lithops provide a powerful, unified API for parallel function execution — from local development to multi-cloud production deployments with fault tolerance and retries built-in.
-By default, the executor load the configuration from the config file. Alternatively, you can pass the configuration with a python dictionary. In any case, note that all the parameters set in the executor will overwrite those set in the configuration.
Futures API Reference
---------------------
@@ -24,3 +48,8 @@ Futures API Reference
:members:
:undoc-members:
:show-inheritance:
+
+.. autoclass:: lithops.retries.RetryingFunctionExecutor
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/api_multiprocessing.rst b/docs/source/api_multiprocessing.rst
index 67cf5588d..8ee24c5d2 100644
--- a/docs/source/api_multiprocessing.rst
+++ b/docs/source/api_multiprocessing.rst
@@ -3,6 +3,13 @@ Multiprocessing API
Lithops implements Python's `multiprocessing API `_ to transparently run local-parallel applications but using serverless functions for Processes and a Redis instance for shared state and Inter-Process Communication (IPC).
+Before utilizing this API, you will need to install its dependencies:
+
+.. code-block:: bash
+
+ python3 -m pip install lithops[multiprocessing]
+
+
Process and Pool
----------------
@@ -50,9 +57,9 @@ The Redis credentials (host, password...) is loaded from the ``redis`` section o
The fastest way to deploy a Redis instance is using Docker in a VM located in the cloud of your choice:
-.. code::
+.. code:: bash
- $ docker run --rm -d --network host --name redis redis:6.2.1 --requirepass redispassword
+ docker run --rm -d --network host --name redis redis:6.2.1 --requirepass redispassword
To have lower latency, you can deploy the functions and the VM in the same VPC and use route through internal traffic instead of the internet.
For example, in AWS, the functions and VM can be deployed in the same VPC: Lambdas go to a private subnet and the VM in a public subnet. This way, the VM has access to the internet and the local Lithops process can also access it.
@@ -79,7 +86,7 @@ For this reason, to set specific configuration in runtime, the ``Lithops.multipr
# To set a config parameter, use the set_parameter
# function and specify the parameter and the desired value
- mp_config.set_parameter(mp_config.LITHOPS_CONFIG, {'lithops': {'mode': 'localhost'}})
+ mp_config.set_parameter(mp_config.LITHOPS_CONFIG, {'lithops': {'backend': 'localhost'}})
mp_config.set_parameter(mp_config.STREAM_STDOUT, True)
mp_config.set_parameter(mp_config.REDIS_EXPIRY_TIME, 1800)
mp_config.set_parameter(mp_config.PIPE_CONNECTION_TYPE, 'redislist')
@@ -120,5 +127,4 @@ Multiprocessing configuration keys
- ``None``
-
* To use nanomsg for Pipes, you must still deploy a Redis instance (used for pipe directory). Note that this feature only works in environments where functions can open a port and communicate with each other.
diff --git a/docs/source/api_stats.rst b/docs/source/api_stats.rst
index afcdcf466..6da164720 100644
--- a/docs/source/api_stats.rst
+++ b/docs/source/api_stats.rst
@@ -10,6 +10,12 @@ Execution summary plots
The :code:`plot()` method from :code:`FunctionExecutor` creates a scatter plot and a histogram plot showing a summary of the tasks executed by a :code:`FunctionExecutor`. By default, lithops creates a :code:`plots/` directory in the working directory path containing both plots in PNG format. For more details refer to the `FunctionExecutor API reference `_.
+To get started, first install Lithops and the plotting dependencies with:
+
+.. code-block:: bash
+
+ python3 -m pip install lithops[plotting]
+
* **Scatter Plot**: the scatter plot shows a timeline on the horizontal axis where the stages of all invocations are arranged on the vertical axis.
- :code:`host submit` indicates the time that the orchestrator process has invoked the function.
- :code:`call start` indicates the timestamp at which the function starts its execution.
@@ -85,13 +91,21 @@ The user can obtain these statistics through the future object:
'worker_cold_start': True,
'worker_end_tstamp': 1647526902.397567,
'worker_exec_time': 0.23604679,
+ 'worker_func_cpu_usage': [0.0, 25.0],
+ 'worker_func_cpu_user_time': 70566.78125,
+ 'worker_func_cpu_system_time': 16418.34375,
'worker_func_end_tstamp': 1647526902.2985177,
'worker_func_exec_time': 1.91e-06,
+ 'worker_func_recv_net_io': 5968,
+ 'worker_func_sent_net_io': 1223,
'worker_func_start_tstamp': 1647526902.2985158,
- 'worker_result_upload_time': 0.07001352,
- 'worker_start_tstamp': 1647526902.1615202,
+ 'worker_func_rss': 60678144,
+ 'worker_func_uss': 44838912,
+ 'worker_func_vms': 552267776,
'worker_peak_memory_start': 88469504,
- 'worker_peak_memory_end': 126469504}
+ 'worker_peak_memory_end': 126469504,
+ 'worker_result_upload_time': 0.07001352,
+ 'worker_start_tstamp': 1647526902.1615202}
.. list-table::
@@ -119,11 +133,11 @@ The user can obtain these statistics through the future object:
* - :code:`host_result_done_tstamp`
- Timestamp of when host received the function result from cloud object storage.
* - :code:`host_result_query_count`
- - Number of queries to the object storage to get the status object (synchronize the completion of the function)
+ - Number of queries to the object storage to get the result object.
* - :code:`host_status_done_tstamp`
- Timestamp of when the host received the signal that the function has finished its execution.
* - :code:`host_status_query_count`
- - Number of queries to the object storage to get the result object
+ - Number of queries to the object storage to get the status object (synchronize the completion of the function).
* - :code:`host_submit_tstamp`
- Timestamp of function invocation.
* - :code:`worker_cold_start`
@@ -132,12 +146,28 @@ The user can obtain these statistics through the future object:
- Timestamp in which the worker function had finished its execution.
* - :code:`worker_exec_time`
- Total execution time of the worker function (lithops wrapper + user defined funtion execution time).
+ * - :code:`worker_func_cpu_usage`
+ - Array of CPU usage percentages, with each element representing the average usage of each CPU core during user-defined function execution.
+ * - :code:`worker_func_cpu_user_time`
+ - CPU user time during the execution of the user-defined function.
+ * - :code:`worker_func_cpu_system_time`
+ - CPU system time during the execution of the user-defined function.
* - :code:`worker_func_end_tstamp`
- Timestamp of the end of execution of the user-defined function.
* - :code:`worker_func_exec_time`
- Total execution time of the user-defined function.
+ * - :code:`worker_func_recv_net_io`
+ - Network I/O bytes received during the execution of the user-defined function.
+ * - :code:`worker_func_sent_net_io`
+ - Network I/O bytes sent during the execution of the user-defined function.
* - :code:`worker_func_start_tstamp`
- Timestamp of the start of execution of the user-defined function.
+ * - :code:`worker_func_rss`
+ - Resident Set Size (RSS) in bytes, indicating the amount of physical memory occupied by the user-defined function during its execution.
+ * - :code:`worker_func_uss`
+ - Unique Set Size (USS) in bytes, representing the memory exclusively used by the function that is not shared with other processes.
+ * - :code:`worker_func_vms`
+ - Virtual Memory Size (VMS) in bytes used by the user-defined function. This metric quantifies the total virtual memory allocated.
* - :code:`worker_result_upload_time`
- Total time taken for the function to upload the result to cloud object storage.
* - :code:`worker_start_tstamp`
diff --git a/docs/source/cli.rst b/docs/source/cli.rst
index e953d317d..d2c7fd317 100644
--- a/docs/source/cli.rst
+++ b/docs/source/cli.rst
@@ -10,7 +10,7 @@ Lithops management
------------------
``lithops hello``
-~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~
Runs a *hello-world* function.
@@ -31,7 +31,7 @@ Runs a *hello-world* function.
- **Usage example**: ``lithops hello -b ibm_cf -s ibm_cos``
``lithops test``
-~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~
Runs the unit testing suite. For more instructions about testing `view
this page `__.
@@ -58,7 +58,7 @@ this page `__.
| --keep\_datasets, -k | Keeps datasets in storage after the test run (Flag) |
+------------------------+----------------------------------------------------------------+
-- **Usage example**: ``lithops test -b ibm_cf -s ibm_cos -f``
+- **Usage example**: ``lithops test -b ibm_cf -s ibm_cos``
``lithops clean``
~~~~~~~~~~~~~~~~~
@@ -108,12 +108,50 @@ Open an ssh connection to the master VM (Only available for standalone backends)
- **Usage example**: ``lithops attach -b ibm_vpc``
+``lithops worker list``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Lists the available workers in the master VM (Only available for standalone backends)
+
++------------------------+----------------------------------------------------------------+
+| Parameter | Description |
++========================+================================================================+
+| --config, -c | Path to your config file |
++------------------------+----------------------------------------------------------------+
+| --backend, -b | Compute backend name |
++------------------------+----------------------------------------------------------------+
+| --region, -r | Compute backend region |
++------------------------+----------------------------------------------------------------+
+| --debug, -d | Activate debug logs (Flag) |
++------------------------+----------------------------------------------------------------+
+
+- **Usage example**: ``lithops worker list -b ibm_vpc``
+
+``lithops job list``
+~~~~~~~~~~~~~~~~~~~~
+
+Lists the jobs submitted to the master VM (Only available for standalone backends)
+
++------------------------+----------------------------------------------------------------+
+| Parameter | Description |
++========================+================================================================+
+| --config, -c | Path to your config file |
++------------------------+----------------------------------------------------------------+
+| --backend, -b | Compute backend name |
++------------------------+----------------------------------------------------------------+
+| --region, -r | Compute backend region |
++------------------------+----------------------------------------------------------------+
+| --debug, -d | Activate debug logs (Flag) |
++------------------------+----------------------------------------------------------------+
+
+- **Usage example**: ``lithops job list -b ibm_vpc``
+
Runtime management
------------------
For complete instructions on how to build runtimes for Lithops, please
-refer to ``runtime/`` folder and choose your compute backend.
+refer to the ``runtime/`` folder and choose your compute backend.
``lithops runtime build ``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -121,7 +159,7 @@ refer to ``runtime/`` folder and choose your compute backend.
Build a new runtime image. Depending of the compute backend, there must
be a Dockerfile located in the same folder you run the command,
otherwise use ``-f`` parameter. Note that this command only builds the
-image and puts it to a container registry. This command do not deploy
+image and puts it into a container registry. This command do not deploy
the runtime to the compute backend.
+-----------------+-----------------------------------+
@@ -409,7 +447,7 @@ Deletes objects from a given bucket.
``lithops storage list ``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Deletes objects from a given bucket.
+Lists objects from a given bucket.
+-----------------+---------------------------------+
| Parameter | Description |
@@ -431,5 +469,3 @@ Deletes objects from a given bucket.
- To list all objects that start with given prefix
:``lithops storage list -b ibm_cos cloudbucket -p test/``
-
-
diff --git a/docs/source/comparing_lithops.rst b/docs/source/comparing_lithops.rst
index 64bd843b7..07565ce7b 100644
--- a/docs/source/comparing_lithops.rst
+++ b/docs/source/comparing_lithops.rst
@@ -1,77 +1,61 @@
-Comparing Lithops with other distributed computing frameworks
+Comparing Lithops with Other Distributed Computing Frameworks
=============================================================
-In a nutshell, Lithops differs from other distributed computing frameworks in that Lithops leverages serverless
-functions to compute massively parallel computations.
+Lithops introduces a novel approach to distributed computing by leveraging **serverless functions** for massively parallel computations. Unlike traditional frameworks that require managing a cluster of nodes, Lithops utilizes Function-as-a-Service (FaaS) platforms to dynamically scale execution resources — down to zero when idle and massively up when needed.
-In addition, Lithops provides a simple and easy-to-use interface to access and process data stored in Object Storage
-from your serverless functions.
-
-Moreover, Lithops abstract design allows seamlessly portability between clouds and FaaS services, avoiding vendor
-lock-in.
+In addition, Lithops offers a simple and consistent programming interface to transparently process data stored in **Object Storage** from within serverless functions. Its **modular and cloud-agnostic architecture** enables seamless portability across different cloud providers and FaaS platforms, effectively avoiding vendor lock-in.
PyWren
------
-.. image:: https://www.faasification.com/assets/img/tools/pywren-logo-big.png
- :align: center
- :width: 250
+`PyWren `_ is the precursor to Lithops. Initially designed to run exclusively on AWS Lambda using a Conda runtime and supporting only Python 2.7, it served as a proof of concept for using serverless functions in scientific computing.
+In 2018, the Lithops team forked PyWren to adapt it for **IBM Cloud Functions**, which offered a Docker-based runtime. This evolution also introduced support for **Object Storage as a primary data source** and opened the door to more advanced use cases such as Big Data analytics.
-`PyWren `_ is Lithops' "father" project. PyWren was only designed to run in AWS Lambda with a
-Conda environment and only supported Python 2.7. In 2018, Lithops' creators forked PyWren and adapted it to IBM Cloud
-Functions, which, in contrast, uses a Docker runtime. The authors also explored new usages for PyWren, like processing Big Data from
-Object Storage. Then, on September 2020, IBM PyWren authors decided that the project had evolved enough to no longer be
-considered a simple fork of PyWren for IBM cloud and became Lithops. With this change, the project would no longer be
-tied to the old PyWren model and could move to more modern features such as mulit-cloud support or the transparent
-multiprocessing interface.
+By September 2020, the IBM PyWren fork had diverged significantly. The maintainers rebranded the project as **Lithops**, reflecting its broader goals — including multi-cloud compatibility, improved developer experience, and support for modern Python environments and distributed computing patterns.
-You can read more about PyWren IBM Cloud at the Middleware'18 industry paper `Serverless Data Analytics in the IBM Cloud `_.
+For more details, refer to the Middleware'18 industry paper:
+`Serverless Data Analytics in the IBM Cloud `_.
Ray and Dask
------------
-.. image:: https://warehouse-camo.ingress.cmh1.psfhosted.org/98ae79911b7a91517ba16ef2dc7dc3b972214820/68747470733a2f2f6769746875622e636f6d2f7261792d70726f6a6563742f7261792f7261772f6d61737465722f646f632f736f757263652f696d616765732f7261795f6865616465725f6c6f676f2e706e67
- :align: center
+.. image:: https://github.com/ray-project/ray/raw/master/doc/source/images/ray_logo.png
:width: 250
-
.. image:: https://docs.dask.org/en/stable/_images/dask_horizontal.svg
- :align: center
:width: 250
-In comparison with Lithops, both `Ray `_ and `Dask `_ leverage a cluster of nodes for distributed computing, while Lithops
-mainly leverages serverless functions. This restraint makes Ray much less flexible than Lithops in terms of scalability.
+`Ray `_ and `Dask `_ are distributed computing frameworks designed to operate on a **predefined cluster of nodes** (typically virtual machines). In contrast, Lithops relies on **serverless runtimes**, which allows for *elastic and fine-grained scaling* — including scaling to zero — with no idle infrastructure costs.
-Although Dask and Ray can scale and adapt the resources to the amount of computation needed, they don't scale to zero since
-they must keep a "head node" or "master" that controls the cluster and must be kept up.
+While Ray and Dask provide dynamic task scheduling and can autoscale within an IaaS environment, they always require a **centralized "head node" or controller** to manage the cluster, making them less suitable for ephemeral and cost-efficient cloud-native computing.
-In any case, the capacity and scalability of Ray or Dask in IaaS using virtual machines is not comparable to that of serverless functions.
+Additionally, the performance and elasticity of Ray and Dask in IaaS environments are not directly comparable to Lithops' **fully serverless model**, which benefits from the near-infinite parallelism offered by cloud functions.
PySpark
-------
.. image:: https://upload.wikimedia.org/wikipedia/commons/thumb/f/f3/Apache_Spark_logo.svg/2560px-Apache_Spark_logo.svg.png
- :align: center
:width: 250
+`PySpark `_ is the Python interface for Apache Spark, a well-established distributed computing engine. Spark is typically deployed on a **static cluster of machines**, either on-premises or in cloud environments using HDFS or cloud-native file systems.
-Much like Ray or Dask, PySpark is a distributed computing framework that uses cluster technologies. PySpark provides Python bindings for Spark.
-Spark is designed to work with a fixed-size node cluster, and it is typically used to process data from on-prem HDFS
-and analyze it using SparkSQL and Spark DataFrame.
-
+PySpark is optimized for **batch analytics** using DataFrames and SparkSQL, but it lacks native integration with FaaS models. Its operational model is not inherently elastic and requires continuous management of a Spark cluster, which may not align with modern, fully managed, or serverless computing paradigms.
Serverless Framework
--------------------
.. image:: https://cdn.diegooo.com/media/20210606183353/serverless-framework-icon.png
- :align: center
:width: 250
+`Serverless Framework `_ is a deployment toolchain designed primarily for **building and deploying serverless web applications**, especially on AWS, GCP, and Azure. It is widely used to manage HTTP APIs, event-driven services, and infrastructure-as-code (IaC) for cloud-native apps.
-Serverless Framework is a tool to develop serverless applications (mainly NodeJS) and deploy them seemlessly on AWS, GCP
-or Azure.
+Although both Lithops and Serverless Framework leverage **serverless functions**, their objectives are fundamentally different:
+
+- **Serverless Framework** focuses on application deployment (e.g., microservices, REST APIs).
+- **Lithops** targets **parallel and data-intensive workloads**, enabling large-scale execution of Python functions over scientific datasets, data lakes, and unstructured data in object storage.
+
+Summary
+-------
-Although both Serverless Framework and Lithops use serverless functions, their objective is completely different:
-Serverless Framework aims to provide an easy-to-use tool to develop applications related to web services, like HTTP APIs,
-while Lithops aims to develop applications related to highly parallel scientific computation and Big Data processing.
+Lithops stands out as a **cloud-native, serverless-first framework** purpose-built for **parallel computing, data analytics, and scientific workloads**. By abstracting away infrastructure management and providing built-in object storage integration, it delivers a unique balance of **simplicity**, **performance**, and **multi-cloud compatibility** — distinguishing it from traditional cluster-based frameworks and generic serverless tools alike.
diff --git a/docs/source/compute_backends.rst b/docs/source/compute_backends.rst
index 437a7d711..49c52f14f 100644
--- a/docs/source/compute_backends.rst
+++ b/docs/source/compute_backends.rst
@@ -7,27 +7,34 @@ Compute Backends
compute_config/localhost.md
-**Serverless Compute Backends:**
+**Serverless (FaaS) Backends:**
.. toctree::
:glob:
:maxdepth: 1
- compute_config/ibm_cf.md
- compute_config/code_engine.md
compute_config/aws_lambda.md
- compute_config/aws_batch.md
compute_config/gcp_functions.md
- compute_config/gcp_cloudrun.md
compute_config/azure_functions.md
- compute_config/azure_containers.md
compute_config/oracle_functions.md
compute_config/aliyun_functions.md
+ compute_config/openwhisk.md
+
+**Serverless (CaaS) Backends:**
+
+.. toctree::
+ :glob:
+ :maxdepth: 1
+
+ compute_config/code_engine.md
+ compute_config/aws_batch.md
+ compute_config/gcp_cloudrun.md
+ compute_config/azure_containers.md
compute_config/kubernetes.md
compute_config/knative.md
- compute_config/openwhisk.md
+ compute_config/singularity.md
-**Standalone Compute Backends:**
+**Standalone Backends:**
.. toctree::
:glob:
diff --git a/docs/source/compute_config/aliyun_functions.md b/docs/source/compute_config/aliyun_functions.md
index 5702fb60f..8d15966f5 100644
--- a/docs/source/compute_config/aliyun_functions.md
+++ b/docs/source/compute_config/aliyun_functions.md
@@ -6,11 +6,11 @@ Lithops with *Aliyun Function Compute* as serverless compute backend.
1. Install Alibaba Cloud backend dependencies:
-```
+```bash
python3 -m pip install lithops[aliyun]
```
-1. Access to your [console](https://homenew-intl.console.aliyun.com/) and activate your Functions service instance.
+2. Access to your [console](https://homenew-intl.console.aliyun.com/) and activate your Functions service instance.
## Configuration
diff --git a/docs/source/compute_config/aws_batch.md b/docs/source/compute_config/aws_batch.md
index 530340695..d03642b99 100644
--- a/docs/source/compute_config/aws_batch.md
+++ b/docs/source/compute_config/aws_batch.md
@@ -4,54 +4,104 @@ Lithops with *AWS Batch* as serverless batch compute backend.
## Installation
-1. Install Amazon Web Services backend dependencies:
+1. Install AWS backend dependencies:
-```
+```bash
python3 -m pip install lithops[aws]
```
-2. [Login](https://console.aws.amazon.com/?nc2=h_m_mc) to Amazon Web Services Console (or signup if you don't have an account)
+## Configuration
+
+1. [Login](https://console.aws.amazon.com/?nc2=h_m_mc) to Amazon Web Services Console (or signup if you don't have an account)
-3. Navigate to **IAM > Roles** to create the ECS Task Execution Role. AWS provides a defualt role named `ecsTaskExecutionRole`, which can be used instead. If you want to create another role or it is missing, create a new role attached to `Elastic Container Service Task`, and add the following policies:
+2. Navigate to **IAM > Roles** to create the ECS Task Execution Role. AWS provides a default role named `ecsTaskExecutionRole`, which can be used instead. If you want to create another role or it is missing, create a new role attached to `Elastic Container Service Task`, and add the following policies:
- `SecretsManagerReadWrite`
- `AmazonEC2ContainerRegistryFullAccess`
- `CloudWatchFullAccess`
- `AmazonECSTaskExecutionRolePolicy`
-4. Navigate to **IAM > Roles** to create the ECS Instance Role. AWS provides a defualt role named `ecsInstanceRole`, which can be used instead. If you want to create another role or it is missing, create a new role attached to `EC2`, and add the following policy:
+3. Navigate to **IAM > Roles** to create the ECS Instance Role. AWS provides a default role named `ecsInstanceRole`, which can be used instead. If you want to create another role or it is missing, create a new role attached to `EC2`, and add the following policy:
- `AmazonEC2ContainerServiceforEC2Role`
-## Configuration
-
-5. Edit your lithops config and add the following keys:
-
-```yaml
-aws:
- region:
- access_key_id:
- secret_access_key:
-
-aws_batch:
- runtime :
- runtime_timeout:
- runtime_memory:
- worker_processes:
- container_vcpus:
- execution_role:
- instance_role:
- env_type:
- env_max_cpus:
- assign_public_ip:
- subnets:
- -
- -
- - ...
- security_groups:
- -
- -
- - ...
+4. Navigate to **IAM > Policies**. Click on **Create policy**. If you already created this policy for the AWS Lambda or AWS EC2 backend, jump to step 7.
+
+5. Select **JSON** tab and paste the following JSON policy:
+```json
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "s3:*",
+ "lambda:*",
+ "ec2:*",
+ "ecr:*",
+ "sts:GetCallerIdentity",
+ "logs:CreateLogGroup",
+ "logs:CreateLogStream",
+ "logs:PutLogEvents"
+ ],
+ "Resource": "*"
+ }
+ ]
+}
```
+6. Click **Next: Tags** and **Next: Review**. Fill the policy name field (you can name it `lithops-policy` or similar) and create the policy.
+
+7. Go back to **IAM** and navigate to **Roles** tab. Click **Create role**.
+
+8. Choose **Elastic Container Service** on the use case list and then click on **Elastic Container Service Task**. Click **Next: Permissions**. Select the policy created before (`lithops-policy`). Click **Next: Tags** and **Next: Review**. Type a role name, for example `ecsTaskJobRole`. Click on **Create Role**.
+
+## AWS Credential setup
+
+Lithops loads AWS credentials as specified in the [boto3 configuration guide](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html).
+
+In summary, you can use one of the following settings:
+
+1. Provide the credentials via the `~/.aws/config` file, or set the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables.
+
+ You can run `aws configure` command if the AWS CLI is installed to setup the credentials. Then set in the Lithops config file:
+ ```yaml
+ lithops:
+ backend: aws_batch
+
+ aws_batch:
+ region :
+ execution_role:
+ job_role:
+ subnets:
+ -
+ -
+ - ...
+ security_groups:
+ -
+ - ...
+ ```
+
+2. Provide the credentials in the `aws` section of the Lithops config file. In this case you can omit setting the `job_role`:
+ ```yaml
+ lithops:
+ backend: aws_batch
+
+ aws:
+ access_key_id:
+ secret_access_key:
+ region:
+
+ aws_batch:
+ execution_role:
+ job_role: # Not mandatory if the credentials are in the aws section
+ subnets:
+ -
+ -
+ - ...
+ security_groups:
+ -
+ - ...
+ ```
+
## Summary of configuration keys for AWS
### AWS
@@ -59,8 +109,8 @@ aws_batch:
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
|aws | region | |yes | AWS region name. For example `us-east-1` |
-|aws | access_key_id | |yes | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
-|aws | secret_access_key | |yes | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | access_key_id | |no | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | secret_access_key | |no | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
|aws | session_token | |no | Session token for temporary AWS credentials |
|aws | account_id | |no | *This field will be used if present to retrieve the account ID instead of using AWS STS. The account ID is used to format full image names for container runtimes. |
@@ -69,17 +119,18 @@ aws_batch:
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
| aws_batch | execution_role | | yes | ARN of the execution role used to execute AWS Batch tasks on ECS for Fargate environments |
-| aws_batch | instance_role | | yes | ARN of the execution role used to execute AWS Batch tasks on ECS for EC2 environments |
+| aws_batch | job_role | | yes | ARN of the job role used to execute AWS Batch tasks on ECS for Fargate environments. Not mandatory if the credentials are in the `aws` section of the configuration|
| aws_batch | security_groups | | yes | List of Security groups to attach for ECS task containers. By default, you can use a security group that accepts all outbound traffic but blocks all inbound traffic. |
-| aws_batch | subnets | | yes | List of subnets from a VPC where to deploy the ECS task containers. Note that if you are using a **private subnet**, you can set `assing_public_ip` to `false` but make sure containers can reach other AWS services like ECR, Secrets service, etc., by, for example, using a NAT gateway. If you are using a **public subnet** you must set `assing_public_ip` to `true` |
+| aws_batch | subnets | | yes | List of subnets from a VPC where to deploy the ECS task containers. Note that if you are using a **private subnet**, you can set `assign_public_ip` to `false` but make sure containers can reach other AWS services like ECR, Secrets service, etc., by, for example, using a NAT gateway. If you are using a **public subnet** you must set `assign_public_up` to `true` |
+| aws_batch | instance_role | | no | ARN of the execution role used to execute AWS Batch tasks on ECS for EC2 environments. Mandatory if using the **EC2** or **SPOT** `env_type` |
| aws_batch | region | | no | Region name (like `us-east-1`) where to deploy the ECS cluster. Lithops will use the region set under the `aws` section if it is not set here |
-| aws_batch | assign_public_ip | `true` | no | Assing public IPs to ECS task containers. Set to `true` if the tasks are being deployed in a public subnet. Set to `false` when deploying on a private subnet. |
-| aws_batch | runtime | `default_runtime-v3X` | no | Runtime name |
-| aws_batch | runtime_timeout | 180 | no | Runtime timeout |
-| aws_batch | runtime_memory | 1024 | no | Runtime memory |
-| aws_batch | worker_processes | 1 | no | Worker processes |
-| aws_batch | container_vcpus | 0.5 | no | Number of vCPUs assigned to each task container. It can be different from `worker_processes`. Use it to run a task that uses multiple processes within a container. |
-| aws_batch | service_role | `None` | no | Service role for AWS Batch. Leave empty for use a service-linked execution role. More info [here](https://docs.aws.amazon.com/batch/latest/userguide/using-service-linked-roles.html) |
+| aws_batch | assign_public_ip | `true` | no | Assign public IPs to ECS task containers. Set to `true` if the tasks are being deployed in a public subnet. Set to `false` when deploying on a private subnet. |
+| aws_batch | runtime | | no | Container runtime name in ECR. If not provided Lithops will automatically build a default runtime |
+| aws_batch | runtime_timeout | 180 | no | Runtime timeout managed by the cloud provider. |
+| aws_batch | runtime_memory | 1024 | no | Runtime memory assigned to each task container. |
+| aws_batch | runtime_cpu | 0.5 | no | Number of vCPUs assigned to each task container. It can be different from `worker_processes`. |
+| aws_batch | worker_processes | 1 | no | Number of parallel Lithops processes in a worker. This is used to parallelize function activations within the worker. |
+| aws_batch | service_role | | no | Service role for AWS Batch. Leave empty to use a service-linked execution role. More info [here](https://docs.aws.amazon.com/batch/latest/userguide/using-service-linked-roles.html) |
| aws_batch | env_max_cpus | 10 | no | Maximum total CPUs of the compute environment |
| aws_batch | env_type | FARGATE_SPOT | no | Compute environment type, one of: `["EC2", "SPOT", "FARGATE", "FARGATE_SPOT"]` |
@@ -97,4 +148,4 @@ You can view the function executions logs in your local machine using the *litho
```bash
lithops logs poll
-```
\ No newline at end of file
+```
diff --git a/docs/source/compute_config/aws_ec2.md b/docs/source/compute_config/aws_ec2.md
index 45a19efab..0b19b61ef 100644
--- a/docs/source/compute_config/aws_ec2.md
+++ b/docs/source/compute_config/aws_ec2.md
@@ -1,124 +1,240 @@
# AWS Elastic Compute Cloud (EC2)
-The AWS EC2 client of Lithops can provide a truely serverless user experience on top of EC2 where Lithops creates new Virtual Machines (VMs) dynamically in runtime and scale Lithops jobs against them. Alternatively Lithops can start and stop an existing VM instances.
+The AWS EC2 client of Lithops can provide a truely serverless user experience on top of EC2 where Lithops creates new Virtual Machines (VMs) dynamically in runtime and scale Lithops jobs against them (Create & Reuse modes). Alternatively Lithops can start and stop an existing VM instance (Consume mode).
## AWS
The assumption that you already familiar with AWS, and you have AUTH credentials to your account (HMAC Credentials).
### Choose an operating system image for the VM
-Any Virtual Machine (VM) need to define the instance’s operating system and version. Lithops support both standard operating system choices provided by the VPC or using pre-defined custom images that already contains all dependencies required by Lithops.
+Any Virtual Machine (VM) needs to define the instance’s operating system and version. Lithops supports both standard operating system choices provided by the VPC or using pre-defined custom images that already contains all dependencies required by Lithops.
-- Option 1: By default, Lithops uses an Ubuntu 22.04 image. In this case, no further action is required and you can continue to the next step. Lithops will install all required dependencies in the VM by itself. Notice this can consume about 3 min to complete all installations.
+- Option 1: By default, Lithops uses an Ubuntu 22.04 image. In this case, no further action is required and you can continue to the next step. Lithops will install all required dependencies in the VM by itself. Note this can consume about 3 min to complete all installations.
- Option 2: Alternatively, you can use a pre-built custom image that will greatly improve VM creation time for Lithops jobs. To benefit from this approach, navigate to [runtime/aws_ec2](https://github.com/lithops-cloud/lithops/tree/master/runtime/aws_ec2), and follow the instructions.
-## Lithops and the VM consume mode
+## Installation
-In this mode, Lithops can start and stop an existing VM, and deploy an entire job to that VM. The partition logic in this scenario is different from the `create/reuse` modes, since the entire job is executed in the same VM.
-
-### Lithops configuration for the consume mode
+1. Install AWS backend dependencies:
-Edit your lithops config and add the relevant keys:
+```bash
+python3 -m pip install lithops[aws]
+```
-```yaml
-lithops:
- backend: aws_ec2
+## Create and reuse modes
+In the `create` mode, Lithops will automatically create new worker VM instances in runtime, scale Lithops job against generated VMs, and automatically delete the VMs when the job is completed.
+Alternatively, you can set the `reuse` mode to keep running the started worker VMs, and reuse them for further executions. In the `reuse` mode, Lithops checks all the available worker VMs and start new workers only if necessary.
-aws:
- region :
- access_key_id:
- secret_access_key:
-aws_ec2:
- instance_id :
+### Configuration
+
+1. Navigate to **IAM > Policies**. Click on **Create policy**. If you already created this policy for the AWS Lambda or AWS Batch backend, jump to step 4.
+
+2. Select **JSON** tab and paste the following JSON policy:
+```json
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "s3:*",
+ "lambda:*",
+ "ec2:*",
+ "ecr:*",
+ "sts:GetCallerIdentity",
+ "logs:CreateLogGroup",
+ "logs:CreateLogStream",
+ "logs:PutLogEvents"
+ ],
+ "Resource": "*"
+ }
+ ]
+}
```
+3. Click **Next: Tags** and **Next: Review**. Fill the policy name field (you can name it `lithops-policy` or similar) and create the policy.
+
+4. Go back to **IAM** and navigate to **Roles** tab. Click **Create role**.
+
+5. Choose **EC2** on the use case list. Click **Next: Permissions**. Select the policy created before (`lithops-policy`). Click **Next: Tags** and **Next: Review**. Type a role name, for example `ec2LithopsInstanceRole`. Click on **Create Role**.
+
+
+### AWS Credential setup
+
+Lithops loads AWS credentials as specified in the [boto3 configuration guide](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html).
+
+In summary, you can use one of the following settings:
+
+1. Provide the credentials via the `~/.aws/config` file, or set the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables.
+
+ You can run `aws configure` command if the AWS CLI is installed to setup the credentials. Then set in the Lithops config file:
+ ```yaml
+ lithops:
+ backend: aws_ec2
+
+ aws_ec2:
+ region :
+ instance_role:
+ exec_mode: reuse
+ ```
+
+2. Provide the credentials in the `aws` section of the Lithops config file:
+ ```yaml
+ lithops:
+ backend: aws_ec2
+
+ aws:
+ access_key_id:
+ secret_access_key:
+ region:
+
+ aws_ec2:
+ instance_role:
+ exec_mode: reuse
+ ```
### Summary of configuration keys for AWS
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
|aws | region | |yes | AWS Region. For example `us-east-1` |
-|aws | access_key_id | |yes | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
-|aws | secret_access_key | |yes | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | access_key_id | |no | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | secret_access_key | |no | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
|aws | session_token | |no | Session token for temporary AWS credentials |
|aws | account_id | |no | *This field will be used if present to retrieve the account ID instead of using AWS STS. The account ID is used to format full image names for container runtimes. |
-### EC2 - Consume Mode
+### EC2 - Create and Reuse Modes
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|aws_ec2 | instance_id | | yes | virtual server instance ID |
-|aws_ec2 | region | |no | Region name of the VPC. For example `us-east-1`. Lithops will use the region set under the `aws` section if it is not set here |
+|aws_ec2 | region | |no | Region name, for example: `eu-west-1`. Lithops will use the `region` set under the `aws` section if it is not set here |
+|aws_ec2 | instance_role | | yes | EC2 Instance role name created in the configuration section above. Do not use the full ARN here; only the role name is required. For example: `ec2LithopsInstanceRole`|
+|aws_ec2 | vpc_id | | no | VPC id. You can find all the available VPCs in the [VPC Console page](https://console.aws.amazon.com/vpc/v2/home#vpcs:). If not provided, Lithops will create a new VPC |
+|aws_ec2 | public_subnet_id | | no | Public subnet id. You can find all the available Subnets in the [VPC Console page](https://console.aws.amazon.com/vpc/v2/home#subnets:). If not provided, Lithops will create a new public subnet |
+|aws_ec2 | public_subnet_cidr_block | 10.0.1.0/24 | no | In case a `public_subnet_id` is not provided, Lithops will create a new subnet with this CIDR block |
+|aws_ec2 | security_group_id | | no | Security group ID. You can find the available security groups in the [VPC console page](https://console.aws.amazon.com/vpc/v2/home#SecurityGroups:). The security group must have ports 22, 6379, 8080 and 8081 open. If not provided, Lithops will create a new security group |
+|aws_ec2 | ssh_key_name | | no | SSH Key name. You can find the available keys in the [EC2 console page](https://console.aws.amazon.com/ec2/v2/home#KeyPairs:). Create a new one or upload your own key if it does not exist|
|aws_ec2 | ssh_username | ubuntu |no | Username to access the VM |
-|aws_ec2 | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to create the VM. It will use the default path if not provided |
-|aws_ec2 | worker_processes | 2 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of the VM. |
-|aws_ec2 | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the defeuv python3 interpreter of the VM |
+|aws_ec2 | ssh_password | |no | Password for accessing the worker VMs. If not provided, it is created randomly|
+|aws_ec2 | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to access the VPC. If not provided, Lithops will use the default path and create a new ssh key for the VPC |
+|aws_ec2 | request_spot_instances | True | no | Request spot instance for worker VMs|
+|aws_ec2 | target_ami | | no | Virtual machine image id. Default is Ubuntu Server 22.04 |
+|aws_ec2 | master_instance_type | t2.micro | no | Profile name for the master VM |
+|aws_ec2 | worker_instance_type | t2.medium | no | Profile name for the worker VMs |
+|aws_ec2 | delete_on_dismantle | True | no | Delete the worker VMs when they are stopped. Master VM is never deleted when stopped |
+|aws_ec2 | max_workers | 100 | no | Max number of workers per `FunctionExecutor()`|
+|aws_ec2 | worker_processes | AUTO | no | Number of parallel Lithops processes in a worker. This is used to parallelize function activations within the worker. By default it detects the amount of CPUs in the `worker_instance_type` VM|
+|aws_ec2 | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default python3 interpreter of the VM |
|aws_ec2 | auto_dismantle | True |no | If False then the VM is not stopped automatically.|
|aws_ec2 | soft_dismantle_timeout | 300 |no| Time in seconds to stop the VM instance after a job **completed** its execution |
|aws_ec2 | hard_dismantle_timeout | 3600 | no | Time in seconds to stop the VM instance after a job **started** its execution |
+|aws_ec2 | exec_mode | reuse | no | One of: **consume**, **create** or **reuse**. If set to **create**, Lithops will automatically create new VMs for each map() call based on the number of elements in iterdata. If set to **reuse** will try to reuse running workers if exist |
-## Lithops and the VM auto create|reuse mode
-In the `create` mode, Lithops will automatically create new worker VM instances in runtime, scale Lithops job against generated VMs, and automatically delete the VMs when the job is completed.
-Alternatively, you can set the `reuse` mode to keep running the started worker VMs, and reuse them for further executions. In the `reuse` mode, Lithops checks all the available worker VMs and start new workers only if necessary.
+## Additional configuration
-### Lithops configuration for the auto create mode
+# Elastic Block Store (EBS)
-Edit your lithops config and add the relevant keys:
+To attach EBS volumes to an EC2 instance in Lithops, you can configure the `aws_ec2` section as follows.
```yaml
-lithops:
- backend: aws_ec2
-
-aws:
- access_key_id:
- secret_access_key:
- region:
-
aws_ec2:
- iam_role:
- exec_mode: reuse
+ execution_role:
+ region:
+ ...
+ ebs_volumes:
+ - device_name: /dev/xvda
+ ebs:
+ volume_size: 100
+ volume_type: gp2
+ delete_on_termination: true
+ encrypted: false
+ kms_key_id:
+ - device_name: /dev/xvdf
+ ebs:
+ volume_size: 50
+ volume_type: gp3
+ delete_on_termination: true
+ encrypted: false
+ iops: 3000
+ throughput: 125
+ ...
```
+|Group|Key|Default|Mandatory|Additional info|
+|---|---|---|---|---|
+| ebs | volume_size | 8 | No | Size of the volume in GiB |
+| ebs | volume_type | gp2 | No | Type of volume. Options: `gp2`, `gp3`, `io1`, `io2`, `sc1`, `st1`, `standard`|
+| ebs | delete_on_termination| True | No | Whether the volume is deleted automatically when the instance is terminated |
+| ebs | encrypted | False | No | Whether the volume is encrypted |
+| ebs | kms_key_i | | No | ARN of the KMS key used for encryption. If not provided, the default AWS-managed key is used |
+| ebs | iops | | No | Provisioned IOPS for `io1`, `io2`, or `gp3` volumes |
+| ebs | throughput | | No | Throughput in MiB/s for `gp3` volumes |
+
+
+## Consume mode
+
+In this mode, Lithops can start and stop an existing VM, and deploy an entire job to that VM. The partition logic in this scenario is different from the `create/reuse` modes, since the entire job is executed in the same VM.
+
+### AWS Credential setup
+
+Lithops loads AWS credentials as specified in the [boto3 configuration guide](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html).
+
+In summary, you can use one of the following settings:
+
+1. Provide the credentials via the `~/.aws/config` file, or set the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables.
+
+ You can run `aws configure` command if the AWS CLI is installed to setup the credentials. Then set in the Lithops config file:
+ ```yaml
+ lithops:
+ backend: aws_ec2
+
+ aws_ec2:
+ region :
+ exec_mode: consume
+ instance_id :
+ ```
+
+2. Provide the credentials in the `aws` section of the Lithops config file:
+ ```yaml
+ lithops:
+ backend: aws_ec2
+
+ aws:
+ access_key_id:
+ secret_access_key:
+ region:
+
+ aws_ec2:
+ exec_mode: consume
+ instance_id :
+ ```
+
### Summary of configuration keys for AWS
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|aws | region | |yes | AWS Region. For example `us-east-1` |
-|aws | access_key_id | |yes | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
-|aws | secret_access_key | |yes | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | region | |no | AWS Region. For example `us-east-1` |
+|aws | access_key_id | |no | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | secret_access_key | |no | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
|aws | session_token | |no | Session token for temporary AWS credentials |
|aws | account_id | |no | *This field will be used if present to retrieve the account ID instead of using AWS STS. The account ID is used to format full image names for container runtimes. |
-### EC2 - Create and Reuse Modes
+### Summary of configuration keys for the consume Mode
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|aws_ec2 | region | |yes | Region name, for example: `eu-west-1`. Lithops will use the `region` set under the `aws` section if it is not set here |
-|aws_ec2 | iam_role | | yes | IAM EC2 role name. You can find it in the [IAM Console page](https://console.aws.amazon.com/iamv2/home#/roles). Create a new EC2 role if it does not exist|
-|aws_ec2 | vpc_id | | no | VPC id. You can find all the available VPCs in the [VPC Console page](https://console.aws.amazon.com/vpc/v2/home#vpcs:) |
-|aws_ec2 | subnet_id | | no | Subnet id. You can find all the available Subnets in the [VPC Console page](https://console.aws.amazon.com/vpc/v2/home#subnets:) |
-|aws_ec2 | security_group_id | | no | Security group ID. You can find the available security groups in the [VPC console page](https://console.aws.amazon.com/vpc/v2/home#SecurityGroups:). The security group must have ports 22 and 8080 open |
-|aws_ec2 | ssh_key_name | | no | SSH Key name. You can find the available keys in the [EC2 console page](https://console.aws.amazon.com/ec2/v2/home#KeyPairs:). Create a new one or upload your own key if it does not exist|
+|aws_ec2 | instance_id | | yes | virtual server instance ID |
+|aws_ec2 | region | |yes | Region name of the VPC. For example `us-east-1`. Lithops will use the region set under the `aws` section if it is not set here |
|aws_ec2 | ssh_username | ubuntu |no | Username to access the VM |
-|aws_ec2 | ssh_password | |no | Password for accessing the worker VMs. If not provided, it is created randomly|
-|aws_ec2 | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to access the VPC. It will use the default path if not provided |
-|aws_ec2 | request_spot_instances | True | no | Request spot instance for worker VMs|
-|aws_ec2 | target_ami | | no | Virtual machine image id. Default is Ubuntu Server 20.04 |
-|aws_ec2 | master_instance_type | t2.micro | no | Profile name for the master VM |
-|aws_ec2 | worker_instance_type | t2.medium | no | Profile name for the worker VMs |
-|aws_ec2 | delete_on_dismantle | True | no | Delete the worker VMs when they are stopped. Master VM is never deleted when stopped |
-|aws_ec2 | max_workers | 100 | no | Max number of workers per `FunctionExecutor()`|
-|aws_ec2 | worker_processes | 2 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of a worker VM. |
-|aws_ec2 | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default python3 interpreter of the VM |
+|aws_ec2 | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to create the VM. It will use the default path if not provided |
+|aws_ec2 | worker_processes | AUTO | no | Number of parallel Lithops processes in a worker. This is used to parallelize function activations within the worker. By default it detects the amount of CPUs in the VM|
+|aws_ec2 | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default python3 interpreter of the VM |
|aws_ec2 | auto_dismantle | True |no | If False then the VM is not stopped automatically.|
|aws_ec2 | soft_dismantle_timeout | 300 |no| Time in seconds to stop the VM instance after a job **completed** its execution |
|aws_ec2 | hard_dismantle_timeout | 3600 | no | Time in seconds to stop the VM instance after a job **started** its execution |
-|aws_ec2 | exec_mode | consume | no | One of: **consume**, **create** or **reuse**. If set to **create**, Lithops will automatically create new VMs for each map() call based on the number of elements in iterdata. If set to **reuse** will try to reuse running workers if exist |
-|aws_ec2 | pull_runtime | False | no | If set to True, Lithops will execute the command `docker pull ` in each VSI before executing the a job (in case of using a docker runtime)|
-|aws_ec2 | workers_policy | permissive | no | One of: **permissive**, **strict**. If set to **strict** will force creation of required workers number |
+
## Test Lithops
Once you have your compute and storage backends configured, you can run a hello world function with:
@@ -135,10 +251,40 @@ You can view the function executions logs in your local machine using the *litho
lithops logs poll
```
-The master and worker VMs contain the Lithops service logs in `/tmp/lithops-root/service.log`
+## VM Management
+
+Lithops for AWS EC2 follows a Master-Worker architecture (1:N).
+
+All the VMs, including the master VM, are automatically stopped after a configurable timeout (see hard/soft dismantle timeouts).
You can login to the master VM and get a live ssh connection with:
```bash
lithops attach -b aws_ec2
```
+
+The master and worker VMs contain the Lithops service logs in `/tmp/lithops-root/*-service.log`
+
+To list all the available workers in the current moment, use the next command:
+
+```bash
+lithops worker list -b aws_ec2
+```
+
+You can also list all the submitted jobs with:
+
+```bash
+lithops job list -b aws_ec2
+```
+
+You can delete all the workers with:
+
+```bash
+lithops clean -b aws_ec2 -s aws_s3
+```
+
+You can delete all the workers including the Master VM with the `--all` flag:
+
+```bash
+lithops clean -b aws_ec2 -s aws_s3 --all
+```
diff --git a/docs/source/compute_config/aws_lambda.md b/docs/source/compute_config/aws_lambda.md
index 57696f287..8653d72c8 100644
--- a/docs/source/compute_config/aws_lambda.md
+++ b/docs/source/compute_config/aws_lambda.md
@@ -4,23 +4,24 @@ Lithops with *AWS Lambda* as serverless compute backend.
## Installation
-1. Install Amazon Web Services backend dependencies:
+1. Install AWS backend dependencies:
-```
+```bash
python3 -m pip install lithops[aws]
```
-2. [Login](https://console.aws.amazon.com/?nc2=h_m_mc) to Amazon Web Services Console (or signup if you don't have an account)
+## Configuration
+
+1. [Login](https://console.aws.amazon.com/?nc2=h_m_mc) to Amazon Web Services Console (or signup if you don't have an account)
-3. Navigate to **IAM > Policies**. Click on **Create policy**.
+2. Navigate to **IAM > Policies**. Click on **Create policy**.
-4. Select **JSON** tab and paste the following JSON policy:
+3. Select **JSON** tab and paste the following JSON policy:
```json
{
"Version": "2012-10-17",
"Statement": [
{
- "Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"s3:*",
@@ -38,28 +39,43 @@ python3 -m pip install lithops[aws]
}
```
-5. Click **Next: Tags** and **Next: Review**. Fill the policy name field (you can name it `lithops-policy` or simmilar) and create the policy.
+4. Click **Next: Tags** and **Next: Review**. Fill the policy name field (you can name it `lithops-policy` or similar) and create the policy.
-6. Go back to **IAM** and navigate to **Roles** tab. Click **Create role**.
+5. Go back to **IAM** and navigate to **Roles** tab. Click **Create role**.
-7. Choose **Lambda** on the use case list and click **Next: Permissions**. Select the policy created before (`lithops-policy`). Click **Next: Tags** and **Next: Review**. Type a role name, for example `lithops-execution-role`. Click on *Create Role*.
+6. Choose **Lambda** on the use case list and click **Next: Permissions**. Select the policy created before (`lithops-policy`). Click **Next: Tags** and **Next: Review**. Type a role name, for example `lambdaLithopsExecutionRole`. Click on *Create Role*.
-## Configuration
+## AWS Credential setup
-6. Edit your lithops config and add the following keys:
+Lithops loads AWS credentials as specified in the [boto3 configuration guide](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html).
-```yaml
-lithops:
- backend: aws_lambda
+In summary, you can use one of the following settings:
-aws:
- region:
- access_key_id:
- secret_access_key:
+1. Provide the credentials via the `~/.aws/config` file, or set the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables.
-aws_lambda:
- execution_role:
-```
+ You can run `aws configure` command if the AWS CLI is installed to setup the credentials. Then set in the Lithops config file:
+ ```yaml
+ lithops:
+ backend: aws_lambda
+
+ aws_lambda:
+ execution_role:
+ region:
+ ```
+
+2. Provide the credentials in the `aws` section of the Lithops config file:
+ ```yaml
+ lithops:
+ backend: aws_lambda
+
+ aws:
+ access_key_id:
+ secret_access_key:
+ region:
+
+ aws_lambda:
+ execution_role:
+ ```
## Summary of configuration keys for AWS
@@ -68,28 +84,32 @@ aws_lambda:
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
|aws | region | |yes | AWS Region. For example `us-east-1` |
-|aws | access_key_id | |yes | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
-|aws | secret_access_key | |yes | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | access_key_id | |no | Account access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
+|aws | secret_access_key | |no | Account secret access key to AWS services. To find them, navigate to *My Security Credentials* and click *Create Access Key* if you don't already have one. |
|aws | session_token | |no | Session token for temporary AWS credentials |
|aws | account_id | |no | *This field will be used if present to retrieve the account ID instead of using AWS STS. The account ID is used to format full image names for container runtimes. |
### AWS Lambda
-| Group | Key | Default | Mandatory | Additional info |
-|------------|---------------------|---------|-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| aws_lambda | execution_role | | yes | ARN of the execution role created at step 3. You can find it in the Role page at the *Roles* list in the *IAM* section (e.g. `arn:aws:iam::1234567890:role/lithops-execution-role` |
-| aws_lambda | region | | no | Region where the S3 bucket is located and where Lambda functions will be invoked (e.g. `us-east-1`). Lithops will use the `region` set under the `aws` section if it is not set here |
-| aws_lambda | max_workers | 1000 | no | Max number of workers per `FunctionExecutor()` |
-| aws_lambda | worker_processes | 1 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker |
-| aws_lambda | runtime | | no | Docker image name |
-| aws_lambda | runtime_memory | 256 | no | Memory limit in MB. Default 256MB |
-| aws_lambda | runtime_timeout | 180 | no | Runtime timeout in seconds. Default 3 minutes |
-| aws_lambda | invoke_pool_threads | 64 | no | Number of concurrent threads used for invocation |
-| aws_lambda | remote_invoker | False | no | Activate the remote invoker feature that uses one cloud function to spawn all the actual `map()` activations |
-| aws_lambda | architecture | x86_64 | no | Runtime architecture. One of **x86_64** or **arm64** |
-| aws_lambda | ephemeral_storage | 512 | no | Ephemeral storage (`/tmp`) size in MB (must be between 512 MB and 10240 MB) |
-| aws_lambda | env_vars | {} | no | List of {name: ..., value: ...} pairs for Lambda instance environment variables |
-
+| Group | Key | Default | Mandatory | Additional info |
+| --- | --- | --- | --- | --- |
+| aws_lambda | execution_role | | yes | ARN of the execution role created at step 3. You can find it in the Role page at the *Roles* list in the *IAM* section (e.g. `arn:aws:iam::1234567890:role/lambdaLithopsExecutionRole` |
+| aws_lambda | region | | no | Region where Lambda functions will be invoked (e.g. `us-east-1`). Lithops will use the `region` set under the `aws` section if it is not set here |
+| aws_lambda | max_workers | 1000 | no | Max number of workers per `FunctionExecutor()` |
+| aws_lambda | worker_processes | 1 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker |
+| aws_lambda | runtime | | no | Docker image name |
+| aws_lambda | runtime_memory | 256 | no | Memory limit in MB. Default 256MB |
+| aws_lambda | runtime_timeout | 180 | no | Runtime timeout in seconds. Default 3 minutes |
+| aws_lambda | invoke_pool_threads | 64 | no | Number of concurrent threads used for invocation |
+| aws_lambda | remote_invoker | False | no | Activate the remote invoker feature that uses one cloud function to spawn all the actual `map()` activations |
+| aws_lambda | architecture | x86_64 | no | Runtime architecture. One of **x86_64** or **arm64** |
+| aws_lambda | ephemeral_storage | 512 | no | Ephemeral storage (`/tmp`) size in MB (must be between 512 MB and 10240 MB) |
+| aws_lambda | user_tags | {} | no | List of {name: ..., value: ...} pairs for Lambda instance user tags |
+| aws_lambda | env_vars | {} | no | List of {name: ..., value: ...} pairs for Lambda instance environment variables |
+| aws_lambda | namespace | | no | Virtual namespace. This can be useful to virtually group Lithops function workers. The functions deployed by lithops will be prefixed by this namespace. For example you can set it to differentiate between `prod`, `dev` and `stage` environments. |
+| aws_lambda | runtime_include_function | False | no | If set to true, Lithops will automatically build a new runtime, including the function's code, instead of transferring it through the storage backend at invocation time. This is useful when the function's code size is large (in the order of 10s of MB) and the code does not change frequently |
+
+
## Additional configuration
### VPC
@@ -155,4 +175,4 @@ You can view the function executions logs in your local machine using the *litho
```bash
lithops logs poll
-```
\ No newline at end of file
+```
diff --git a/docs/source/compute_config/azure_containers.md b/docs/source/compute_config/azure_containers.md
index 656e805d6..637042d35 100644
--- a/docs/source/compute_config/azure_containers.md
+++ b/docs/source/compute_config/azure_containers.md
@@ -6,7 +6,7 @@ Lithops with Azure Container Apps as serverless compute backend.
1. Install Microsoft Azure backend dependencies:
-```
+```bash
python3 -m pip install lithops[azure]
```
@@ -95,7 +95,7 @@ az containerapp env create --name lithops --resource-group LithopsResourceGroup
|---|---|---|---|---|
|azure_containers| resource_group | |no | Name of a resource group, for example: `LithopsResourceGroup`. Lithops will use the `resource_group` set under the `azure` section if it is not set here |
|azure_containers| region | |no | The location where you created the `lithops` Container APP environment. For example: `westeurope`, `westus2`, etc. Lithops will use the `region` set under the `azure` section if it is not set here|
-|azure_containers| environment | lithops |no | The environemnt name you created in the step 5 of the installation |
+|azure_containers| environment | lithops |no | The environment name you created in the step 5 of the installation |
|azure_containers | docker_server | index.docker.io |no | Container registry URL |
|azure_containers | docker_user | |no | Container registry user name |
|azure_containers | docker_password | |no | Container registry password/token. In case of Docker hub, login to your docker hub account and generate a new access token [here](https://hub.docker.com/settings/security)|
@@ -106,6 +106,8 @@ az containerapp env create --name lithops --resource-group LithopsResourceGroup
|azure_containers | runtime_timeout | 600 |no | Runtime timeout in seconds. Default 10 minutes |
|azure_containers| trigger | pub/sub | no | Currently it supports pub/sub invocation|
|azure_containers | invoke_pool_threads | 32 |no | Number of concurrent threads used for invocation |
+|azure_containers | runtime_include_function | False | no | If set to true, Lithops will automatically build a new runtime, including the function's code, instead of transferring it through the storage backend at invocation time. This is useful when the function's code size is large (in the order of 10s of MB) and the code does not change frequently |
+
## Test Lithops
diff --git a/docs/source/compute_config/azure_functions.md b/docs/source/compute_config/azure_functions.md
index 561a14bd7..d48d83160 100644
--- a/docs/source/compute_config/azure_functions.md
+++ b/docs/source/compute_config/azure_functions.md
@@ -6,7 +6,7 @@ Lithops with Azure Functions as serverless compute backend.
1. Install Microsoft Azure backend dependencies:
-```
+```bash
python3 -m pip install lithops[azure]
```
@@ -89,7 +89,7 @@ az login
|---|---|---|---|---|
|azure_functions| resource_group | |no | Name of a resource group, for example: `LithopsResourceGroup`. Lithops will use the `resource_group` set under the `azure` section if it is not set here |
|azure_functions| region | |no | The location of the consumption plan for the runtime. Use `az functionapp list-consumption-locations` to view the available locations. For example: `westeurope`, `westus2`, etc. Lithops will use the `region` set under the `azure` section if it is not set here|
-|azure_functions | max_workers | 1000 | no | Max number of parallel workers. Altough Azure limits the number of workrs to 200, it is convenient to keep this value high|
+|azure_functions | max_workers | 1000 | no | Max number of parallel workers. Although Azure limits the number of workers to 200, it is convenient to keep this value high|
|azure_functions | worker_processes | 1 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker |
|azure_functions| runtime | |no | Runtime name already deployed in the service|
|azure_functions | runtime_timeout | 300 |no | Runtime timeout in seconds. Default 5 minutes |
diff --git a/docs/source/compute_config/azure_vms.md b/docs/source/compute_config/azure_vms.md
index cdfcd5ab3..53916033c 100644
--- a/docs/source/compute_config/azure_vms.md
+++ b/docs/source/compute_config/azure_vms.md
@@ -1,18 +1,58 @@
-# Azure Virtual Machines (Beta)
+# Azure Virtual Machines
-The Azure Virtual Machines client of Lithops can provide a truely serverless user experience on top of Azure VMs where Lithops creates new Virtual Machines (VMs) dynamically in runtime and scale Lithops jobs against them. Alternatively Lithops can start and stop an existing VM instances.
+The Azure Virtual Machines client of Lithops can provide a truely serverless user experience on top of Azure VMs where Lithops creates new Virtual Machines (VMs) dynamically in runtime and scale Lithops jobs against them (Create & Reuse modes). Alternatively Lithops can start and stop an existing VM instance (Consume Mode).
+## Installation
-### Choose an operating system image for the VM
+1. Install Microsoft Azure backend dependencies:
+
+```bash
+python3 -m pip install lithops[azure]
+```
+
+2. Install [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest)
+
+3. Sign in with the Azure CLI:
+
+```bash
+az login
+```
+
+4. Create a Resource Group and a Storage Account:
+
+ Option 1:
+
+ 1. Access to the [Azure portal Resource Groups](https://portal.azure.com/#view/HubsExtension/BrowseResourceGroups) and create a new Resource group named **LithopsResourceGroup** (or similar) in your preferred region. If you already have a resource group, omit this step.
+
+ 2. Access to the [Azure portal Storage Accounts](https://portal.azure.com/#view/HubsExtension/BrowseResource/resourceType/Microsoft.Storage%2FStorageAccounts) and create a new Storage Account with a unique name, for example: **lithops0sa25s1**. If you already have a storage account, omit this step.
+
+ Option 2:
+
+ 1. Create a Resource Group in a specific location. If you already have a resource group, omit this step.
+
+ ```bash
+ az group create --name LithopsResourceGroup --location westeurope
+ ```
+
+ 2. Create a Storage Account with a unique name. If you already have a storage account, omit this step.
+
+ ```bash
+ storage_account_name=lithops$(openssl rand -hex 3)
+ echo $storage_account_name
+ az storage account create --name $storage_account_name --location westeurope \
+ --resource-group LithopsResourceGroup --sku Standard_LRS
+ ```
+
+## Choose an operating system image for the VM
- Option 1: By default, Lithops uses an Ubuntu 22.04 image. In this case, no further action is required and you can continue to the next step. Lithops will install all required dependencies in the VM by itself. Notice this can consume about 3 min to complete all installations.
- Option 2: Alternatively, you can use a pre-built custom image that will greatly improve VM creation time for Lithops jobs. To benefit from this approach, navigate to [runtime/azure_vms](https://github.com/lithops-cloud/lithops/tree/master/runtime/azure_vms), and follow the instructions.
-## Lithops and the VM consume mode
-
-In this mode, Lithops can start and stop an existing VM, and deploy an entire job to that VM. The partition logic in this scenario is different from the `create/reuse` modes, since the entire job is executed in the same VM.
+## Create and reuse modes
+In the `create` mode, Lithops will automatically create new worker VM instances in runtime, scale Lithops job against generated VMs, and automatically delete the VMs when the job is completed.
+Alternatively, you can set the `reuse` mode to keep running the started worker VMs, and reuse them for further executions. In the `reuse` mode, Lithops checks all the available worker VMs and start new workers only if necessary.
-### Lithops configuration for the consume mode
+### Lithops configuration for the create or reuse modes
Edit your lithops config and add the relevant keys:
@@ -26,9 +66,7 @@ Edit your lithops config and add the relevant keys:
subscription_id:
azure_vms:
- instance_name:
- ssh_username:
- ssh_key_filename:
+ exec_mode: reuse
```
@@ -40,26 +78,32 @@ Edit your lithops config and add the relevant keys:
|azure| region | |yes | Location of the resource group, for example: `westeurope`, `westus2`, etc|
|azure| subscription_id | |yes | Subscription ID from your account. Find it [here](https://portal.azure.com/#view/Microsoft_Azure_Billing/SubscriptionsBlade)|
-### Azure VMs - Consume Mode
+### Azure VMs - Create and Reuse Modes
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|azure_vms | instance_name | | yes | virtual server instance Name. The instance must exists in your resource group |
-|azure_vms | ssh_username | ubuntu | yes | Username to access the VM. It will use `ubuntu` if not provided |
-|azure_vms | ssh_key_filename | ~/.ssh/id_rsa | yes | Path to the ssh key file provided to create the VM. It will use the default path if not provided |
-|azure_vms | region | |no | Location of the resource group, for example: `westeurope`, `westus2`, etc. Lithops will use the region set under the `azure` section if it is not set here |
-|azure_vms | worker_processes | 2 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of the VM. |
-|azure_vms | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the defeuv python3 interpreter of the VM |
+|azure_vms| region | |no | Azure location for deploying the VMS. For example: `westeurope`, `westus2`, etc. Lithops will use the `region` set under the `azure` section if it is not set here|
+|azure_vms | image_id | Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest |no | Image ID. ARM resource identifier |
+|azure_vms | ssh_username | ubuntu |no | Username to access the VM |
+|azure_vms | ssh_password | |no | Password for accessing the worker VMs. If not provided, it is created randomly|
+|azure_vms | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to access the VPC. It will use the default path if not provided |
+|azure_vms | master_instance_type | Standard_B1s | no | Profile name for the master VM |
+|azure_vms | worker_instance_type | Standard_B2s | no | Profile name for the worker VMs |
+|azure_vms | delete_on_dismantle | False | no | Delete the worker VMs when they are stopped. Master VM is never deleted when stopped. `True` is NOT YET SUPPORTED |
+|azure_vms | max_workers | 100 | no | Max number of workers per `FunctionExecutor()`|
+|azure_vms | worker_processes | AUTO | no | Number of parallel Lithops processes in a worker. This is used to parallelize function activations within the worker. By default it detects the amount of CPUs in the `worker_instance_type` VM|
+|azure_vms | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default python3 interpreter of the VM |
|azure_vms | auto_dismantle | True |no | If False then the VM is not stopped automatically.|
|azure_vms | soft_dismantle_timeout | 300 |no| Time in seconds to stop the VM instance after a job **completed** its execution |
|azure_vms | hard_dismantle_timeout | 3600 | no | Time in seconds to stop the VM instance after a job **started** its execution |
+|azure_vms | exec_mode | reuse | no | One of: **consume**, **create** or **reuse**. If set to **create**, Lithops will automatically create new VMs for each map() call based on the number of elements in `iterdata`. If set to **reuse** will try to reuse running workers if exist |
-## Lithops and the VM auto create and reuse modes
-In the `create` mode, Lithops will automatically create new worker VM instances in runtime, scale Lithops job against generated VMs, and automatically delete the VMs when the job is completed.
-Alternatively, you can set the `reuse` mode to keep running the started worker VMs, and reuse them for further executions. In the `reuse` mode, Lithops checks all the available worker VMs and start new workers only if necessary.
+## Consume mode
-### Lithops configuration for the create or reuse modes
+In this mode, Lithops can start and stop an existing VM, and deploy an entire job to that VM. The partition logic in this scenario is different from the `create/reuse` modes, since the entire job is executed in the same VM.
+
+### Lithops configuration for the consume mode
Edit your lithops config and add the relevant keys:
@@ -73,7 +117,10 @@ Edit your lithops config and add the relevant keys:
subscription_id:
azure_vms:
- exec_mode: reuse
+ exec_mode: consume
+ instance_name:
+ ssh_username:
+ ssh_key_filename:
```
@@ -85,27 +132,20 @@ Edit your lithops config and add the relevant keys:
|azure| region | |yes | Location of the resource group, for example: `westeurope`, `westus2`, etc|
|azure| subscription_id | |yes | Subscription ID from your account. Find it [here](https://portal.azure.com/#view/Microsoft_Azure_Billing/SubscriptionsBlade)|
-### Azure VMs - Create and Reuse Modes
+### Azure VMs - Consume Mode
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|azure_vms| region | |no | Azure location for deploying the VMS. For example: `westeurope`, `westus2`, etc. Lithops will use the `region` set under the `azure` section if it is not set here|
-|azure_vms | image_id | Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest |no | Image ID. ARM resource identifier |
-|azure_vms | ssh_username | ubuntu |no | Username to access the VM |
-|azure_vms | ssh_password | |no | Password for accessing the worker VMs. If not provided, it is created randomly|
-|azure_vms | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to access the VPC. It will use the default path if not provided |
-|azure_vms | master_instance_type | Standard_B1s | no | Profile name for the master VM |
-|azure_vms | worker_instance_type | Standard_B2s | no | Profile name for the worker VMs |
-|azure_vms | delete_on_dismantle | False | no | Delete the worker VMs when they are stopped. Master VM is never deleted when stopped. `True` is NOT YET SUPPORTED |
-|azure_vms | max_workers | 100 | no | Max number of workers per `FunctionExecutor()`|
-|azure_vms | worker_processes | 2 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of a worker VM. |
-|azure_vms | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default python3 interpreter of the VM |
+|azure_vms | instance_name | | yes | virtual server instance Name. The instance must exists in your resource group |
+|azure_vms | ssh_username | ubuntu | yes | Username to access the VM. It will use `ubuntu` if not provided |
+|azure_vms | ssh_key_filename | ~/.ssh/id_rsa | yes | Path to the ssh key file provided to create the VM. It will use the default path if not provided |
+|azure_vms | region | |no | Location of the resource group, for example: `westeurope`, `westus2`, etc. Lithops will use the region set under the `azure` section if it is not set here |
+|azure_vms | worker_processes | AUTO | no | Number of parallel Lithops processes in a worker. This is used to parallelize function activations within the worker. By default it detects the amount of CPUs in the VM|
+|azure_vms | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the defeuv python3 interpreter of the VM |
|azure_vms | auto_dismantle | True |no | If False then the VM is not stopped automatically.|
|azure_vms | soft_dismantle_timeout | 300 |no| Time in seconds to stop the VM instance after a job **completed** its execution |
|azure_vms | hard_dismantle_timeout | 3600 | no | Time in seconds to stop the VM instance after a job **started** its execution |
-|azure_vms | exec_mode | consume | no | One of: **consume**, **create** or **reuse**. If set to **create**, Lithops will automatically create new VMs for each map() call based on the number of elements in iterdata. If set to **reuse** will try to reuse running workers if exist |
-|azure_vms | pull_runtime | False | no | If set to True, Lithops will execute the command `docker pull ` in each VSI before executing the a job (in case of using a docker runtime)|
-|azure_vms | workers_policy | permissive | no | One of: **permissive**, **strict**. If set to **strict** will force creation of required workers number |
+
## Test Lithops
Once you have your compute and storage backends configured, you can run a hello world function with:
@@ -122,10 +162,40 @@ You can view the function executions logs in your local machine using the *litho
lithops logs poll
```
-The master and worker VMs contain the Lithops service logs in `/tmp/lithops-root/service.log`
+## VM Management
+
+Lithops for Azure VMs follows a Master-Worker architecture (1:N).
+
+All the VMs, including the master VM, are automatically stopped after a configurable timeout (see hard/soft dismantle timeouts).
You can login to the master VM and get a live ssh connection with:
```bash
lithops attach -b azure_vms
```
+
+The master and worker VMs contain the Lithops service logs in `/tmp/lithops-root/*-service.log`
+
+To list all the available workers in the current moment, use the next command:
+
+```bash
+lithops worker list -b azure_vms
+```
+
+You can also list all the submitted jobs with:
+
+```bash
+lithops job list -b azure_vms
+```
+
+You can delete all the workers with:
+
+```bash
+lithops clean -b azure_vms -s azure_storage
+```
+
+You can delete all the workers including the Master VM with the `--all` flag:
+
+```bash
+lithops clean -b azure_vms -s azure_storage --all
+```
diff --git a/docs/source/compute_config/code_engine.md b/docs/source/compute_config/code_engine.md
index fd927788d..e34e3542a 100644
--- a/docs/source/compute_config/code_engine.md
+++ b/docs/source/compute_config/code_engine.md
@@ -2,8 +2,13 @@
[IBM Code Engine](https://cloud.ibm.com/codeengine/overview) allows you to run your application, job or container on a managed serverless platform. Auto-scale workloads and only pay for the resources you consume.
-IBM Code Engine exposes both Knative and Kubernetes Job Descriptor API. Lithops supports both of them. Follow IBM Code Engine documentation to get more details on the difference between those APIs.
+## Installation
+1. Install IBM Cloud backend dependencies:
+
+```bash
+python3 -m pip install lithops[ibm]
+```
## Configuration
@@ -13,7 +18,7 @@ IBM Code Engine exposes both Knative and Kubernetes Job Descriptor API. Lithops
3. Copy the generated IAM API key (You can only see the key the first time you create it, so make sure to copy it).
-4. Naviagete to the [resource groups dashboard](https://cloud.ibm.com/account/resource-groups), and copy the desired resource group ID.
+4. Navigate to the [resource groups dashboard](https://cloud.ibm.com/account/resource-groups), and copy the desired resource group ID.
5. Edit your lithops config and add the following keys:
@@ -27,6 +32,37 @@ IBM Code Engine exposes both Knative and Kubernetes Job Descriptor API. Lithops
resource_group_id:
```
+
+## Summary of configuration keys for IBM Cloud:
+
+### IBM IAM:
+
+|Group|Key|Default|Mandatory|Additional info|
+|---|---|---|---|---|
+|ibm | iam_api_key | |yes | IBM Cloud IAM API key to authenticate against IBM services. Obtain the key [here](https://cloud.ibm.com/iam/apikeys) |
+|ibm | region | |yes | IBM Region. One of: `eu-gb`, `eu-de`, `us-south`, `us-east`, `br-sao`, `ca-tor`, `jp-tok`, `jp-osa`, `au-syd` |
+|ibm | resource_group_id | | yes | Resource group id from your IBM Cloud account. Get it from [here](https://cloud.ibm.com/account/resource-groups) |
+
+## Code Engine:
+
+|Group|Key|Default|Mandatory|Additional info|
+|---|---|---|---|---|
+|code_engine | project_name | |no | Project name that already exists in Code Engine. If not provided lithops will automatically create a new project|
+|code_engine | namespace | |no | Alternatively to `project_name`, you can provide `namespace`. Get it from you code engine k8s config file.|
+|code_engine | region | | no | Cluster region. One of: `eu-gb`, `eu-de`, `us-south`, `us-east`, `br-sao`, `ca-tor`, `jp-tok`, `jp-osa`, `au-syd`. Lithops will use the `region` set under the `ibm` section if it is not set here |
+|code_engine | docker_server | docker.io |no | Container registry URL |
+|code_engine | docker_user | |no | Container registry user name |
+|code_engine | docker_password | |no | Container registry password/token. In case of Docker hub, login to your docker hub account and generate a new access token [here](https://hub.docker.com/settings/security)|
+|code_engine | max_workers | 1000 | no | Max number of workers per `FunctionExecutor()`|
+|code_engine | worker_processes | 1 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of the container. |
+|code_engine | runtime | |no | Docker image name.|
+|code_engine | runtime_cpu | 0.125 |no | CPU limit. Default 0.125vCPU. See [valid combinations](https://cloud.ibm.com/docs/codeengine?topic=codeengine-mem-cpu-combo) |
+|code_engine | runtime_memory | 256 |no | Memory limit in MB. Default 256Mi. See [valid combinations](https://cloud.ibm.com/docs/codeengine?topic=codeengine-mem-cpu-combo) |
+|code_engine | runtime_timeout | 600 |no | Runtime timeout in seconds. Default 600 seconds |
+|code_engine | connection_retries | |no | If specified, number of job invoke retries in case of connection failure with error code 500 |
+|code_engine | runtime_include_function | False | no | If set to true, Lithops will automatically build a new runtime, including the function's code, instead of transferring it through the storage backend at invocation time. This is useful when the function's code size is large (in the order of 10s of MB) and the code does not change frequently |
+
+
## Runtime
### Use your own runtime
@@ -54,7 +90,7 @@ code_engine:
....
docker_server : docker.io
docker_user :
- docker_password :
+ docker_password :
```
#### Configure IBM Container Registry
@@ -66,43 +102,10 @@ code_engine:
docker_server : us.icr.io # Change-me if you have the CR in another region
docker_user : iamapikey
docker_password :
+ docker_namespace : # namespace name from https://cloud.ibm.com/registry/namespaces
```
-## Summary of configuration keys for IBM Cloud:
-
-### IBM IAM:
-
-|Group|Key|Default|Mandatory|Additional info|
-|---|---|---|---|---|
-|ibm | iam_api_key | |yes | IBM Cloud IAM API key to authenticate against IBM services. Obtain the key [here](https://cloud.ibm.com/iam/apikeys) |
-|ibm | region | |yes | IBM Region. One of: `eu-gb`, `eu-de`, `us-south`, `us-east`, `br-sao`, `ca-tor`, `jp-tok`, `jp-osa`, `au-syd` |
-|ibm | resource_group_id | | yes | Resource group id from your IBM Cloud account. Get it from [here](https://cloud.ibm.com/account/resource-groups) |
-
-## Code Engine:
-
-|Group|Key|Default|Mandatory|Additional info|
-|---|---|---|---|---|
-|code_engine | project_name | |no | Project name that already exists in Code Engine. If not provided lithops will automatically create a new project|
-|code_engine | namespace | |no | Alternatively to `project_name`, you can provide `namespace`. Get it from you code engine k8s config file.|
-|code_engine | region | | no | Cluster region. One of: `eu-gb`, `eu-de`, `us-south`, `us-east`, `br-sao`, `ca-tor`, `jp-tok`, `jp-osa`, `au-syd`. Lithops will use the `region` set under the `ibm` section if it is not set here |
-|code_engine | docker_server | docker.io |no | Container registry URL |
-|code_engine | docker_user | |no | Container registry user name |
-|code_engine | docker_password | |no | Container registry password/token. In case of Docker hub, login to your docker hub account and generate a new access token [here](https://hub.docker.com/settings/security)|
-|code_engine | max_workers | 1000 | no | Max number of workers per `FunctionExecutor()`|
-|code_engine | worker_processes | 1 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of the container. |
-|code_engine | runtime | |no | Docker image name.|
-|code_engine | runtime_cpu | 0.125 |no | CPU limit. Default 0.125vCPU. See [valid combinations](https://cloud.ibm.com/docs/codeengine?topic=codeengine-mem-cpu-combo) |
-|code_engine | runtime_memory | 256 |no | Memory limit in MB. Default 256Mi. See [valid combinations](https://cloud.ibm.com/docs/codeengine?topic=codeengine-mem-cpu-combo) |
-|code_engine | runtime_timeout | 600 |no | Runtime timeout in seconds. Default 600 seconds |
-|code_engine | connection_retries | |no | If specified, number of job invoke retries in case of connection failure with error code 500 |
-
-
-## Lithops using Knative API of Code Engine
-
-The preferable way to run Lithops in Code Engine is by using the JOB API. However, Lithops can be also executed in Code Engine using the Knative API. To configure this mode of execution refer to the [Knative documentation](https://github.com/lithops-cloud/lithops/blob/master/config/compute/knative.md#configuration) and follow the steps to configure Knative.
-
-
## Test Lithops
Once you have your compute and storage backends configured, you can run a hello world function with:
@@ -118,4 +121,3 @@ You can view the function executions logs in your local machine using the *litho
```bash
lithops logs poll
```
-
diff --git a/docs/source/compute_config/gcp_cloudrun.md b/docs/source/compute_config/gcp_cloudrun.md
index 9777ad095..a36454356 100644
--- a/docs/source/compute_config/gcp_cloudrun.md
+++ b/docs/source/compute_config/gcp_cloudrun.md
@@ -7,7 +7,7 @@ Lithops with *GCP Cloud Run* as serverless compute backend.
1. Install Google Cloud Platform backend dependencies:
```bash
-python3 -m install lithops[gcp]
+python3 -m pip install lithops[gcp]
```
2. [Login](https://console.cloud.google.com) to Google Cloud Console (or sign up if you don't have an account).
@@ -29,6 +29,8 @@ python3 -m install lithops[gcp]
9. Enable the **Cloud Run API** : Navigate to *APIs & services* tab on the menu. Click *ENABLE APIS AND SERVICES*. Look for "Cloud Run API" at the search bar. Click *Enable*.
+10. Enable the **Artifact Registry API**: Navigate to *APIs & services* tab on the menu. Click *ENABLE APIS AND SERVICES*. Look for "Artifact Registry API" at the search bar. Click *Enable*.
+
## Configuration
1. Edit your lithops config and add the following keys:
@@ -64,7 +66,7 @@ python3 -m install lithops[gcp]
|gcp_cloudrun | runtime_timeout | 300 |no | Runtime timeout in seconds. Default 5 minutes |
|gcp_cloudrun | trigger | https | no | Currently it supports 'https' trigger|
|gcp_cloudrun | invoke_pool_threads | 100 |no | Number of concurrent threads used for invocation |
-
+|gcp_cloudrun | runtime_include_function | False | no | If set to true, Lithops will automatically build a new runtime, including the function's code, instead of transferring it through the storage backend at invocation time. This is useful when the function's code size is large (in the order of 10s of MB) and the code does not change frequently |
## Test Lithops
Once you have your compute and storage backends configured, you can run a hello world function with:
diff --git a/docs/source/compute_config/gcp_functions.md b/docs/source/compute_config/gcp_functions.md
index a3a22b2d9..e1cc082e3 100644
--- a/docs/source/compute_config/gcp_functions.md
+++ b/docs/source/compute_config/gcp_functions.md
@@ -7,7 +7,7 @@ Lithops with *GCP Functions* as serverless compute backend.
1. Install Google Cloud Platform backend dependencies:
```bash
-python3 -m install lithops[gcp]
+python3 -m pip install lithops[gcp]
```
2. [Login](https://console.cloud.google.com) to Google Cloud Console (or sign up if you don't have an account).
@@ -30,6 +30,8 @@ python3 -m install lithops[gcp]
9. Enable the **Cloud Functions API** : Navigate to *APIs & services* tab on the menu. Click *ENABLE APIS AND SERVICES*. Look for "Cloud Functions API" at the search bar. Click *Enable*.
+10. Enable the **Artifact Registry API**: Navigate to *APIs & services* tab on the menu. Click *ENABLE APIS AND SERVICES*. Look for "Artifact Registry API" at the search bar. Click *Enable*.
+
## Configuration
1. Edit your lithops config and add the following keys:
diff --git a/docs/source/compute_config/ibm_cf.md b/docs/source/compute_config/ibm_cf.md
index 5dc906d3f..60f58646d 100644
--- a/docs/source/compute_config/ibm_cf.md
+++ b/docs/source/compute_config/ibm_cf.md
@@ -2,6 +2,16 @@
Lithops with *IBM Cloud Functions* as compute backend.
+**Note**: This backend is deprecated. See the [deprecation overview](https://cloud.ibm.com/docs/openwhisk?topic=openwhisk-dep-overview)
+
+## Installation
+
+1. Install IBM Cloud backend dependencies:
+
+```bash
+python3 -m pip install lithops[ibm]
+```
+
## Configuration
1. Login to IBM Cloud and open up your [dashboard](https://cloud.ibm.com/).
@@ -12,7 +22,7 @@ Lithops with *IBM Cloud Functions* as compute backend.
4. Copy the generated IAM API key (You can only see the key the first time you create it, so make sure to copy it).
-5. Naviagete to the [resource groups dashboard](https://cloud.ibm.com/account/resource-groups), and copy the desired resource group ID.
+5. Navigate to the [resource groups dashboard](https://cloud.ibm.com/account/resource-groups), and copy the desired resource group ID.
5. Edit your lithops config and add the following keys:
@@ -40,8 +50,8 @@ Lithops with *IBM Cloud Functions* as compute backend.
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|ibm_cf| namespace | |no | Value of CURRENT NAMESPACE from [here](https://cloud.ibm.com/functions/namespace-settings). Provide it if you want to use an existsing `namespace`. Lithops will automatically create a new namespace if not provided.|
-|ibm_cf| namespace_id | |no | Value of 'GUID' from [here](https://cloud.ibm.com/functions/namespace-settings). Provide it if you want to use an existsing `namespace`. Provide it along with `namespace`.|
+|ibm_cf| namespace | |no | Value of CURRENT NAMESPACE from [here](https://cloud.ibm.com/functions/namespace-settings). Provide it if you want to use an existing `namespace`. Lithops will automatically create a new namespace if not provided.|
+|ibm_cf| namespace_id | |no | Value of 'GUID' from [here](https://cloud.ibm.com/functions/namespace-settings). Provide it if you want to use an existing `namespace`. Provide it along with `namespace`.|
|ibm_cf | region | |no | Service region. One of: `jp-tok`, `au-syd`, `eu-gb`, `eu-de`, `us-south`, `us-east`. Lithops will use the `region` set under the `ibm` section if it is not set here |
|ibm_cf| endpoint | |no | IBM Cloud Functions endpoint (if region not provided). Make sure to use https:// prefix, for example: https://us-east.functions.cloud.ibm.com |
|ibm_cf | max_workers | 1200 | no | Max number of workers per `FunctionExecutor()`|
@@ -51,6 +61,8 @@ Lithops with *IBM Cloud Functions* as compute backend.
|ibm_cf | runtime_timeout | 600 |no | Runtime timeout in seconds. Default 600 seconds |
|ibm_cf | invoke_pool_threads | 500 |no | Number of concurrent threads used for invocation |
|ibm_cf | remote_invoker | False | no | Activate the remote invoker feature that uses one cloud function to spawn all the actual `map()` activations |
+|ibm_cf | runtime_include_function | False | no | If set to true, Lithops will automatically build a new runtime, including the function's code, instead of transferring it through the storage backend at invocation time. This is useful when the function's code size is large (in the order of 10s of MB) and the code does not change frequently |
+
## Test Lithops
diff --git a/docs/source/compute_config/ibm_vpc.md b/docs/source/compute_config/ibm_vpc.md
index 8ac8aa614..0930f8f34 100644
--- a/docs/source/compute_config/ibm_vpc.md
+++ b/docs/source/compute_config/ibm_vpc.md
@@ -1,21 +1,30 @@
# IBM Virtual Private Cloud
-The IBM VPC client of Lithops can provide a truely serverless user experience on top of IBM VPC where Lithops creates new VSIs (Virtual Server Instance) dynamically in runtime, and scale Lithops jobs against them. Alternatively Lithops can start and stop an existing VSI instances.
+The IBM VPC client of Lithops can provide a truely serverless user experience on top of IBM VPC where Lithops creates new VSIs (Virtual Server Instance) dynamically in runtime, and scale Lithops jobs against them (Create & Reuse modes). Alternatively Lithops can start and stop an existing VSI instance (Consume mode).
+
+## Installation
+
+1. Install IBM Cloud backend dependencies:
+
+```bash
+python3 -m pip install lithops[ibm]
+```
## IBM VPC
The assumption that you already familiar with IBM Cloud, have your IBM IAM API key created (you can create new keys [here](https://cloud.ibm.com/iam/apikeys)), have valid IBM COS account, region and resource group.
-Follow [IBM VPC setup](https://cloud.ibm.com/vpc-ext/overview) if you need to create IBM Virtual Private Cloud. Decide the region for your VPC. The best practice is to use the same region both for VPC and IBM COS, hoewever there is no requirement to keep them in the same region.
+Follow [IBM VPC setup](https://cloud.ibm.com/vpc-ext/overview) if you need to create IBM Virtual Private Cloud. Decide the region for your VPC. The best practice is to use the same region both for VPC and IBM COS, however there is no requirement to keep them in the same region.
## Choose an operating system image for VSI
-Any Virtual Service Instance (VSI) need to define the instance’s operating system and version. Lithops support both standard Ubuntu operting system choices provided by the VPC and using pre-defined custom images that already contains all dependencies required by Lithops.
+Any Virtual Service Instance (VSI) need to define the instance’s operating system and version. Lithops support both standard Ubuntu operating system choices provided by the VPC and using pre-defined custom images that already contains all dependencies required by Lithops.
- Option 1: Lithops is compatible with any Ubuntu 22.04 image provided in IBM Cloud. In this case, no further action is required and you can continue to the next step. Lithops will install all required dependencies in the VSI by itself. Notice this can consume about 3 min to complete all installations.
- Option 2: Alternatively, you can use a pre-built custom image (based on Ubuntu) that will greatly improve VSI creation time for Lithops jobs. To benefit from this approach, navigate to [runtime/ibm_vpc](https://github.com/lithops-cloud/lithops/tree/master/runtime/ibm_vpc), and follow the instructions.
-## Lithops and the VM auto create|reuse mode
+## Create and reuse modes
+
In the `create` mode, Lithops will automatically create new worker VM instances in runtime, scale Lithops job against generated VMs, and automatically delete the VMs when the job is completed.
Alternatively, you can set the `reuse` mode to keep running the started worker VMs, and reuse them for further executions. In the `reuse` mode, Lithops checks all the available worker VMs and start new workers only if necessary.
@@ -56,6 +65,7 @@ ibm_vpc:
docker_server : us.icr.io # Change-me if you have the CR in another region
docker_user : iamapikey
docker_password :
+ docker_namespace : # namespace name from https://cloud.ibm.com/registry/namespaces
```
@@ -73,36 +83,34 @@ ibm_vpc:
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|ibm_vpc | region | |no | VPC Region. For example `us-south`. Choose one region from [here](https://cloud.ibm.com/docs/vpc?topic=vpc-service-endpoints-for-vpc). Lithops will use the `region` set under the `ibm` section if it is not set here |
+|ibm_vpc | region | |no | VPC Region. For example `us-south`. Choose one region from [here](https://cloud.ibm.com/docs/vpc?topic=vpc-service-endpoints-for-vpc). Lithops will use the `region` set under the `ibm` section if it is not set here. Alternatively you can specify a Zone, for example: `eu-gb-2` |
|ibm_vpc | vpc_id | | no | VPC id of an existing VPC. Get it from [here](https://cloud.ibm.com/vpc-ext/network/vpcs) |
|ibm_vpc | vpc_name | | no | VPC name of an existing VPC (if `vpc_id` is not provided) |
|ibm_vpc | security_group_id | | no | Security group id of an existing VPC. Get it from [here](https://cloud.ibm.com/vpc-ext/network/securityGroups)|
|ibm_vpc | subnet_id | | no | Subnet id of an existing VPC. Get it from [here](https://cloud.ibm.com/vpc-ext/network/subnets)|
|ibm_vpc | ssh_key_id | | no | SSH public key id. Get it from [here](https://cloud.ibm.com/vpc-ext/compute/sshKeys)|
|ibm_vpc | gateway_id | | no | Gateway id. Get it from [here](https://cloud.ibm.com/vpc-ext/network/publicGateways)|
-|ibm_vpc | image_name | lithops-worker-default | no | Virtual machine image name |
-|ibm_vpc | image_id | | no | Virtual machine image id |
+|ibm_vpc | image_id | | no | Virtual machine image id. Default is Ubuntu Server 22.04 |
+|ibm_vpc | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default python3 interpreter of the VM |
|ibm_vpc | ssh_username | root |no | Username to access the VM |
|ibm_vpc | ssh_password | |no | Password for accessing the worker VMs. If not provided, it is created randomly|
|ibm_vpc | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to access the VPC. It will use the default path if not provided |
|ibm_vpc | boot_volume_profile | general-purpose | no | Virtual machine boot volume profile |
-|ibm_vpc | boot_volume_capacity | 100 | no | Virtual machine boot volume capacity in GB. Set it to 10 if using a custom image. |
+|ibm_vpc | boot_volume_capacity | 100 | no | Virtual machine boot volume capacity in GB. |
|ibm_vpc | worker_profile_name | cx2-2x4 | no | Profile name for the worker VMs |
|ibm_vpc | master_profile_name | cx2-2x4 | no | Profile name for the master VM |
-|ibm_vpc | delete_on_dismantle | True | no | Delete the worekr VMs when they are stopped |
+|ibm_vpc | verify_resources | True | no | Verify the resources that are stored in the local cache, and expected to be already created (VPC, subnet, floating IP, etc.), exist every time a `FunctionExecutor()` is created |
+|ibm_vpc | delete_on_dismantle | True | no | Delete the worker VMs when they are stopped |
|ibm_vpc | max_workers | 100 | no | Max number of workers per `FunctionExecutor()`|
-|ibm_vpc | worker_processes | 2 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of a worker VM. |
-|ibm_vpc | singlesocket | False | no | Try to allocate workers with single socket CPU. If eventually running on multiple socket, a warning message printed to user. Is **True** standalone **workers_policy** must be set to **strict** to trace workers states|
-|ibm_vpc | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default python3 interpreter of the VM |
+|ibm_vpc | worker_processes | AUTO | no | Number of Lithops processes within a given worker. This is used to parallelize function activations within a worker. By default it detects the amount of CPUs in the worker VM|
|ibm_vpc | auto_dismantle | True |no | If False then the VM is not stopped automatically.|
|ibm_vpc | soft_dismantle_timeout | 300 |no| Time in seconds to stop the VM instance after a job **completed** its execution |
|ibm_vpc | hard_dismantle_timeout | 3600 | no | Time in seconds to stop the VM instance after a job **started** its execution |
-|ibm_vpc | exec_mode | consume | no | One of: **consume**, **create** or **reuse**. If set to **create**, Lithops will automatically create new VMs for each map() call based on the number of elements in iterdata. If set to **reuse** will try to reuse running workers if exist |
-|ibm_vpc | pull_runtime | False | no | If set to True, Lithops will execute the command `docker pull ` in each VSI before executing the a job (in case of using a docker runtime)|
-|ibm_vpc | workers_policy | permissive | no | One of: **permissive**, **strict**. If set to **strict** will force creation of required workers number |
-|ibm_vpc | gpu | False | no | If True docker started with gpu support. Requires host to have neccessary hardware and software preconfigured and docker image runtime with gpu support specified |
+|ibm_vpc | exec_mode | reuse | no | One of: **consume**, **create** or **reuse**. If set to **create**, Lithops will automatically create new VMs for each map() call based on the number of elements in iterdata. If set to **reuse** will try to reuse running workers if exist |
+|ibm_vpc | singlesocket | False | no | Try to allocate workers with single socket CPU. If eventually running on multiple socket, a warning message printed to user. Is **True** standalone **workers_policy** must be set to **strict** to trace workers states|
+|ibm_vpc | gpu | False | no | If `True` docker started with gpu support. Requires host to have necessary hardware and software pre-configured, and docker image runtime with gpu support specified |
-## Lithops and the VSI consume mode
+## Consume mode
In this mode, Lithops can start and stop an existing VM, and deploy an entire job to that VM. The partition logic in this scenario is different from the `create/reuse` modes, since the entire job is executed in the same VM.
@@ -110,18 +118,19 @@ In this mode, Lithops can start and stop an existing VM, and deploy an entire jo
Edit your lithops config and add the relevant keys:
- ```yaml
- lithops:
- backend: ibm_vpc
+```yaml
+lithops:
+ backend: ibm_vpc
- ibm:
- iam_api_key:
+ibm:
+ iam_api_key:
- ibm_vpc:
- region :
- instance_id :
- floating_ip :
- ```
+ibm_vpc:
+ exec_mode: consume
+ region :
+ instance_id :
+ floating_ip :
+```
If you need to create new VM, then follow the steps to create and update Lithops configuration:
@@ -133,7 +142,7 @@ If you need to create new VM, then follow the steps to create and update Lithops
## Summary of configuration keys for IBM Cloud:
-### IBM IAM:
+### IBM:
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
@@ -146,15 +155,14 @@ If you need to create new VM, then follow the steps to create and update Lithops
|---|---|---|---|---|
|ibm_vpc | region | |yes | VPC Region. For example `us-south`. Choose one region from [here](https://cloud.ibm.com/docs/vpc?topic=vpc-service-endpoints-for-vpc). Lithops will use the region set under the `ibm` section if it is not set here |
|ibm_vpc | instance_id | | yes | virtual server instance ID |
-|ibm_vpc | floating_ip | | yes | Floatting IP address atached to your VM instance|
+|ibm_vpc | floating_ip | | yes | Floating IP address attached to your VM instance|
|ibm_vpc | ssh_username | root |no | Username to access the VM |
|ibm_vpc | ssh_key_filename | ~/.ssh/id_rsa | no | Path to the ssh key file provided to create the VM. It will use the default path if not provided |
-|ibm_vpc | worker_processes | 2 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of the VM. |
-|ibm_vpc | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the defeuv python3 interpreter of the VM |
+|ibm_vpc | worker_processes | AUTO | no | Number of Lithops processes within a given worker. This is used to parallelize function activations within the worker. By default it detects the amount of CPUs in the VM|
+|ibm_vpc | runtime | python3 | no | Runtime name to run the functions. Can be a container image name. If not set Lithops will use the default `python3` interpreter of the VM |
|ibm_vpc | auto_dismantle | True |no | If False then the VM is not stopped automatically.|
|ibm_vpc | soft_dismantle_timeout | 300 |no| Time in seconds to stop the VM instance after a job **completed** its execution |
|ibm_vpc | hard_dismantle_timeout | 3600 | no | Time in seconds to stop the VM instance after a job **started** its execution |
-|ibm_vpc | pull_runtime | False | no | If set to True, Lithops will execute the command `docker pull ` in each VSI before executing the a job (in case of using a docker runtime)|
## Test Lithops
@@ -173,10 +181,40 @@ You can view the function executions logs in your local machine using the *litho
lithops logs poll
```
-The master and worker VMs contain the Lithops service logs in `/tmp/lithops-root/service.log`
+## VM Management
+
+Lithops for IBM VPC follows a Mater-Worker architecture (1:N).
+
+All the VMs, including the master VM, are automatically stopped after a configurable timeout (see hard/soft dismantle timeouts).
You can login to the master VM and get a live ssh connection with:
```bash
lithops attach -b ibm_vpc
```
+
+The master and worker VMs contain the Lithops service logs in `/tmp/lithops-root/*-service.log`
+
+To list all the available workers in the current moment, use the next command:
+
+```bash
+lithops worker list -b ibm_vpc
+```
+
+You can also list all the submitted jobs with:
+
+```bash
+lithops job list -b ibm_vpc
+```
+
+You can delete all the workers with:
+
+```bash
+lithops clean -b ibm_vpc -s ibm_cos
+```
+
+You can delete all the workers including the Master VM with the `--all` flag:
+
+```bash
+lithops clean -b ibm_vpc -s ibm_cos --all
+```
diff --git a/docs/source/compute_config/knative.md b/docs/source/compute_config/knative.md
index 315fef72b..51f51a4b9 100644
--- a/docs/source/compute_config/knative.md
+++ b/docs/source/compute_config/knative.md
@@ -6,51 +6,57 @@ Lithops with *Knative* as serverless compute backend. Lithops also supports vani
Note that Lithops automatically builds the default runtime the first time you run a script. For this task it uses the **docker** command installed locally in your machine.
-1. [Install the Docker CE version](https://docs.docker.com/get-docker/).
+1. Install Knative backend dependencies:
-2. Login to your docker account:
+```bash
+python3 -m pip install lithops[knative]
+```
+
+2. [Install the Docker CE version](https://docs.docker.com/get-docker/).
+
+3. Login to your docker account:
```bash
docker login
```
-3. Choose one of these 3 installation options:
+4. Choose one of these 3 installation options:
### Option 1 - Minikube:
-4. Start minikube with the 'ingress' addon:
+5. Start minikube with the 'ingress' addon:
```bash
minikube start --addons=ingress
```
-5. [Follow this instructions to install knative serving.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/)
+6. [Follow this instructions to install knative serving.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/)
-6. Install a networking layer. Currently Lithops supports **Kourier**. [Follow these instructions to install Kourier.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/#install-a-networking-layer)
+7. Install a networking layer. Currently Lithops supports **Kourier**. [Follow these instructions to install Kourier.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/#install-a-networking-layer)
-7. Edit your lithops config and add:
+8. Edit your lithops config and add:
```yaml
knative:
ingress_endpoint : http://127.0.0.1:80
```
-8. On a separate terminal, keep running:
+9. On a separate terminal, keep running:
```bash
minikube tunnel
```
### Option 2 - IBM IKS:
-4. Access to the [IBM dashboard](https://cloud.ibm.com/kubernetes/landing) and create a new Kubernetes cluster.
+5. Access to the [IBM dashboard](https://cloud.ibm.com/kubernetes/landing) and create a new Kubernetes cluster.
-5. Once the cluster is running, follow the instructions of the "Actions"--> "Connect via CLI" option of the dashboard to configure the *kubectl* client in your local machine.
+6. Once the cluster is running, follow the instructions of the "Actions"--> "Connect via CLI" option of the dashboard to configure the *kubectl* client in your local machine.
-6. [Follow this instructions to install knative serving.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/)
+7. [Follow this instructions to install knative serving.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/)
-7. Install a networking layer. Currently Lithops supports **Kourier**. [Follow these instructions to install Kourier.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/#install-a-networking-layer)
+8. Install a networking layer. Currently Lithops supports **Kourier**. [Follow these instructions to install Kourier.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/#install-a-networking-layer)
### Option 3 - IBM IKS or any other Kubernetes Cluster:
-4. Install Kubernetes >= v1.16 and make sure the *kubectl* client is running.
+5. Install Kubernetes >= v1.16 and make sure the *kubectl* client is running.
6. [Follow this instructions to install knative serving.](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/)
@@ -92,6 +98,7 @@ knative:
docker_server : us.icr.io
docker_user : iamapikey
docker_password :
+ docker_namespace : # namespace name from https://cloud.ibm.com/registry/namespaces
```
## Summary of configuration keys for Knative:
diff --git a/docs/source/compute_config/kubernetes.md b/docs/source/compute_config/kubernetes.md
index 8c158b804..796f67d89 100644
--- a/docs/source/compute_config/kubernetes.md
+++ b/docs/source/compute_config/kubernetes.md
@@ -2,6 +2,13 @@
Lithops with kubernetes as serverless compute backend.
+## Installation
+
+1. Install kubernetes backend dependencies:
+
+```bash
+python3 -m pip install lithops[kubernetes]
+```
## Configuration
@@ -36,7 +43,7 @@ k8s:
....
docker_server : docker.io
docker_user :
- docker_password :
+ docker_password :
```
### Configure IBM Container Registry
@@ -48,6 +55,7 @@ k8s:
docker_server : us.icr.io
docker_user : iamapikey
docker_password :
+ docker_namespace : # namespace name from https://cloud.ibm.com/registry/namespaces
```
## Summary of configuration keys for kubernetes:
@@ -60,12 +68,14 @@ k8s:
|k8s | docker_server | docker.io |no | Container registry URL |
|k8s | docker_user | |no | Container registry user name |
|k8s | docker_password | |no | Container registry password/token. In case of Docker hub, login to your docker hub account and generate a new access token [here](https://hub.docker.com/settings/security)|
+|k8s | rabbitmq_executor | False | no | Alternative K8s backend accelerating parallel function execution (map) thanks to rabbitmq group calls and warm-state pods of higher granularity. For more information [here](./kubernetes_rabbitmq.md).|
|k8s | max_workers | 100 | no | Max number of workers per `FunctionExecutor()`|
|k8s | worker_processes | 1 | no | Number of Lithops processes within a given worker. This can be used to parallelize function activations within a worker. It is recommendable to set this value to the same number of CPUs of the container. |
|k8s | runtime | |no | Docker image name.|
|k8s | runtime_cpu | 1 |no | CPU limit. Default 1vCPU |
|k8s | runtime_memory | 512 |no | Memory limit in MB. Default 512MB |
|k8s | runtime_timeout | 600 |no | Runtime timeout in seconds. Default 600 seconds |
+|k8s | master_timeout | 600 |no | Master pod timeout in seconds. Default 600 seconds |
## Test Lithops
diff --git a/docs/source/compute_config/kubernetes_rabbitmq.md b/docs/source/compute_config/kubernetes_rabbitmq.md
new file mode 100644
index 000000000..33db5533c
--- /dev/null
+++ b/docs/source/compute_config/kubernetes_rabbitmq.md
@@ -0,0 +1,131 @@
+# Kubernetes RabbitMQ (batch/job)
+
+**Lithops for Kubernetes RabbitMQ** is an **experimental k8s backend** designed to leverage the capabilities of RabbitMQ for more efficient execution of **group invocations**. It introduces the use of pods for **warm starts**, optimizes resource allocation, and offers various enhancements.
+
+All of these changes are **ideal** for pipelines where launching **hundreds of parallel tasks as quickly as possible** is a critical requirement, in a fixed size heterogeneous cluster.
+
+## Changes of K8s RabbitMQ
+
+* **Utilization of RabbitMQ:** Within this architecture, RabbitMQ is employed to launch group invocations in a single call, avoiding the need for multiple calls for each function execution. Additionally, it enables data exchange between the client and running pods, bypassing the Storage Backend as an intermediary, which is slower. This accelerates and streamlines communication significantly.
+
+* **Warm Start Capability:** Unlike K8s, Lithops K8s RabbitMQ introduces the ability to perform warm starts on the workers pods. This means that previous workers pods still listening to RabbitMQ to launch a new task, further reducing invocation time to almost 0.
+
+* **Improved Invocation Time:** Lithops K8s RabbitMQ offers an **up x4** significant enhancement in cold start time, effectively reducing the delay before your functions start executing.
+
+* **Resource Utilization:** In this backend, CPU assignment is employed by allocating identifiers to each CPU. This approach facilitates more effective resource management within our cluster, enabling the creation of pods that by default match the entire capacity of a machine (worker_processes) and allowing the launch of precisely the number of tasks that can run concurrently.
+
+## Installation
+
+1. Install kubernetes backend dependencies:
+
+```bash
+python3 -m pip install lithops[kubernetes]
+```
+
+## Configuration
+
+1. Edit your Lithops config and add the following keys:
+
+```yaml
+ lithops:
+ backend : k8s
+
+ k8s:
+ ....
+ docker_server : docker.io
+ docker_user :
+ docker_password :
+ ....
+ rabbitmq_executor : True
+```
+
+2. Make sure you have a kubernetes cluster configuration file.
+ - Option 1: You have the config file in `~/.kube/config`
+
+ - Option 2: You have the config file in another location, and you exported the KUBECONFIG variable:
+ ```bash
+ export KUBECONFIG=
+ ```
+
+ - Option 3: You have the config file in another location, and you set the `kubecfg_path` var in the Lithops config:
+ ```yaml
+ k8s:
+ kubecfg_path:
+ ```
+
+ 3. For this version, a connection to [rabbitMQ](../monitoring.rst) is required.
+ To enable Lithops to use this service, add the AMQP_URL key into the rabbitmq section in the configuration, for example:
+ ```yaml
+ rabbitmq:
+ amqp_url: # amqp://
+ ```
+ In addition, you need to activate the monitoring service in the configuration (Lithops section):
+
+ ```yaml
+ lithops:
+ monitoring: rabbitmq
+ ```
+
+## Comparison
+
+In these graphs, we will compare the usage of the original K8s architecture with the RabbitMQ implementation.
+
+All of these tests consist of running 225 functions on a 2-node cluster, each with 128 CPUs. The executed function involves a 5-second sleep.
+
+In this scenario, it is evident that the invocation time is consistently reduced by a factor of **up to 5x** on cold start and **up to 7x** on warm start. This represents a significant enhancement for parallel function execution.
+
+- Plot 1: Kubernetes K8s original.
+
+*Elapsed time = 16,9 sec.*
+
+
+
+- Plot 2: Kubernetes K8s original with master on Warm Start.
+
+*Elapsed time = 8,1 sec.*
+
+
+
+- Plot 3: Kubernetes K8s RabbitMQ.
+
+*Elapsed time = 8 sec.*
+
+
+
+- Plot 4: Kubernetes K8s RabbitMQ with workers on Warm Start.
+
+*Elapsed time = 5,9 sec.*
+
+
+
+## Summary of configuration keys for kubernetes:
+
+|Group|Key|Default|Mandatory|Additional info|
+|---|---|---|---|---|
+|k8s | kubecfg_path | |no | Path to kubecfg file. Mandatory if config file not in `~/.kube/config` or KUBECONFIG env var not present|
+|k8s | kubecfg_context | |no | kubernetes context to use from your kubeconfig file. It will use the default active context if not provided |
+|k8s | namespace | default |no | Kubernetes namespace to use for lithops execution |
+|k8s | docker_server | docker.io |no | Container registry URL |
+|k8s | docker_user | |no | Container registry user name |
+|k8s | docker_password | |no | Container registry password/token. In case of Docker hub, login to your docker hub account and generate a new access token [here](https://hub.docker.com/settings/security)|
+|k8s | rabbitmq_executor | False | yes | Alternative K8s backend accelerating parallel function execution (map) thanks to rabbitmq group calls and warm-state pods of higher granularity.|
+|k8s | worker_processes | |no | CPUs per pod. This enables pod granularity. Default gets all CPUs of the nodes. |
+|k8s | runtime | |no | Docker image name.|
+|k8s | runtime_cpu | 1 |no | CPU limit. Default 1vCPU |
+|k8s | runtime_memory | 512 |no | Memory limit in MB per pod. Default 512MB |
+
+## Test Lithops
+
+Once you have your compute and storage backends configured, you can run a hello world function with:
+
+```bash
+lithops hello -b k8s -s ibm_cos
+```
+
+## Viewing the execution logs
+
+You can view the function executions logs in your local machine using the *lithops client*:
+
+```bash
+lithops logs poll
+```
\ No newline at end of file
diff --git a/docs/source/compute_config/localhost.md b/docs/source/compute_config/localhost.md
index a6fc9a511..6d0f4abe3 100644
--- a/docs/source/compute_config/localhost.md
+++ b/docs/source/compute_config/localhost.md
@@ -14,7 +14,7 @@ lithops:
## Execution Environments
-The localhost backend can run functions both using the local ``python3`` interpreter, or using a ``docker container`` image. The environment is automatically chosen depending on whether or not you provided a Docker image as a runtime.
+The localhost backend can run functions both using the local ``python3`` interpreter, or using a ``container`` image. The environment is automatically chosen depending on whether or not you provided a Docker image as a runtime.
In both cases, you can view the executions logs in your local machine using the *lithops client*:
@@ -24,7 +24,7 @@ lithops logs poll
### Default Environment
-By default Lithops uses the local python interpreter to run the functions. That is, if for example you executed the main script with ``python3.8``, your functions will run with ``python3.8``. in this case, you must ensure that all the dependencies of your script are installed in your machine.
+By default Lithops uses the local python interpreter to run the functions. That is, if for example you executed the main script with ``python3.12``, your functions will run with ``python3.12``. in this case, you must ensure that all the dependencies of your script are installed in your machine.
```python
# As we use the default FunctionExecutor(), backend must be set to localhost in config
@@ -38,25 +38,25 @@ or alternatively, you can force the Localhost executor with:
fexec = lithops.LocalhostExecutor()
```
-### Docker Environment
+### Container Environment
-The Docker environment runs the functions within a ``docker container``. In this case you must [install the Docker CE version](https://docs.docker.com/get-docker/) in your machine. This environment is automatically activated when you provide a docker image as a runtime. For example, by adding the following keys in the config:
+The Container environment runs the functions within a ``docker container``. In this case you must [install the Docker CE version](https://docs.docker.com/get-docker/) in your machine. This environment is automatically activated when you provide a docker image as a runtime. For example, by adding the following keys in the config:
```yaml
localhost:
- runtime: ibmfunctions/action-python-v3.8
+ runtime: docker.io/lithopscloud/ibmcf-python-v312
```
or by using the ``runtime`` param in a function executor:
```python
-# As we use the default FunctionExecutor(), backend must be set to localhost in config
-fexec = lithops.FunctionExecutor(runtime='jsampe/action-python-v3.8')
+# As we use the default FunctionExecutor(), the "backend" config parameter must be set to localhost in config
+fexec = lithops.FunctionExecutor(runtime='docker.io/lithopscloud/ibmcf-python-v312')
```
```python
-# As we use/force the LocalhostExecutor(), backend does not need to be set to localhost in config
-fexec = lithops.LocalhostExecutor(runtime='jsampe/action-python-v3.8')
+# As we use/force the LocalhostExecutor(), the "backend" config parameter does not need to be set to localhost in config
+fexec = lithops.LocalhostExecutor(runtime='docker.io/lithopscloud/ibmcf-python-v312')
```
In this mode of execution, you can use any docker image that contains all the required dependencies. For example, the IBM Cloud Functions and Knative runtimes are compatible with it.
@@ -65,7 +65,8 @@ In this mode of execution, you can use any docker image that contains all the re
|Group|Key|Default|Mandatory|Additional info|
|---|---|---|---|---|
-|localhost | runtime | python3 | no | Docker image name |
+|localhost | runtime | python3 | no | By default it uses the `python3` interpreter. It can be a container image name |
+|localhost | version | 2 | no | There are 2 different localhost implementations. Use '1' for using the alternative version |
|localhost | worker_processes | CPU_COUNT | no | Number of Lithops processes. This is used to parallelize function activations. By default it is set to the number of CPUs of your machine |
## Test Lithops
@@ -82,4 +83,6 @@ You can view the function executions logs in your local machine using the *litho
```bash
lithops logs poll
-```
\ No newline at end of file
+```
+
+You can view the localhost runner logs in `/tmp/lithops-*/localhost-runner.log`
\ No newline at end of file
diff --git a/docs/source/compute_config/openwhisk.md b/docs/source/compute_config/openwhisk.md
index a700e3145..ce32361f6 100644
--- a/docs/source/compute_config/openwhisk.md
+++ b/docs/source/compute_config/openwhisk.md
@@ -80,6 +80,7 @@ Lithops with *OpenWhisk* as serverless compute backend. Lithops can also run fun
|openwhisk | runtime_memory | 256 |no | Memory limit in MB. Default 256MB |
|openwhisk | runtime_timeout | 600 |no | Runtime timeout in seconds. Default 10 minutes |
|openwhisk | invoke_pool_threads | 500 |no | Number of concurrent threads used for invocation |
+|openwhisk | runtime_include_function | False | no | If set to true, Lithops will automatically build a new runtime, including the function's code, instead of transferring it through the storage backend at invocation time. This is useful when the function's code size is large (in the order of 10s of MB) and the code does not change frequently |
## Test Lithops
diff --git a/docs/source/compute_config/oracle_functions.md b/docs/source/compute_config/oracle_functions.md
index 6b7fbbce8..f7a999960 100644
--- a/docs/source/compute_config/oracle_functions.md
+++ b/docs/source/compute_config/oracle_functions.md
@@ -1,4 +1,4 @@
-# Oracle Functions (beta)
+# Oracle Functions
Lithops with *Oracle Functions* as serverless compute backend.
@@ -7,7 +7,8 @@ Lithops with *Oracle Functions* as serverless compute backend.
## Installation
1. Install Oracle Cloud backend dependencies:
-```
+
+```bash
python3 -m pip install lithops[oracle]
```
@@ -117,6 +118,7 @@ docker login .ocir.io -u / -p :@:/
+ ```
+
+ Replace ``, ``, ``, ``, and `` with your RabbitMQ credentials.
+
+### Configure Singularity backend
+
+ ```yaml
+ singularity:
+ worker_processes:
+ runtime:
+ sif_path:
+ ```
+
+## Summary of Configuration Keys for Singularity
+
+| Group | Key | Default | Mandatory | Additional info |
+|-------------|----------------------|---------|-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| singularity | worker_processes | 1 | no | Number of functions sent in each RabbitMQ package. Ideally, set to a multiple of the node's CPU count. |
+| singularity | runtime | | yes | Name of the Singularity runtime image (`.sif`) file containing the Lithops runtime environment. |
+| singularity | sif_path | /tmp | no | Directory path where the Singularity runtime image `.sif` will be stored. |
+
+## Deploying the Runtime Image
+
+Since Lithops doesn't directly manage Singularity instances on your cluster, you need to ensure the runtime image is available on **each** node:
+
+1. **Transfer:** Manually copy the built `.sif` runtime image to each node in your cluster.
+
+2. **Start:** Start a new Singularity instance on each node using the `.sif` file. Then run the instance and add the RabbitMQ server details to the environment variables.
+
+ ```bash
+ singularity instance start --fakeroot /path/to/sif/your-singularity-runtime.sif
+ singularity run instance:// --env AMQP_URL=amqp://:@:/
+ ```
+
+Depending on your cluster setup, you might need to adjust permissions of the `.sif` file or the [singularity flags](https://docs.sylabs.io/guides/latest/user-guide/cli/singularity_exec.html#singularity-exec) to ensure that the user running the Lithops worker can access and execute it.
+
+
+## Test Lithops
+Once you have your compute and storage backends configured, you can run a hello world function with:
+
+```bash
+lithops hello -b singularity
+```
+
+## Viewing the execution logs
+
+You can view the function executions logs in your local machine using the *lithops client*:
+
+```bash
+lithops logs poll
+```
diff --git a/docs/source/compute_config/vm.md b/docs/source/compute_config/vm.md
index 4e8d77da4..c828c0c1d 100644
--- a/docs/source/compute_config/vm.md
+++ b/docs/source/compute_config/vm.md
@@ -1,6 +1,6 @@
# Virtual Machine
-Lithops can run functions using a remote host or a virtual machine (VM). In this backend, Lithops uses all the available VM CPUs to parallelize the tasks of a job. For testing purposes, it is preferable to have an Ubuntu > 20.04 host.
+Lithops can run functions using a remote host or a virtual machine (VM). In this backend, Lithops uses all the available VM CPUs to parallelize the tasks of a job. For testing purposes, it is preferable to have an Ubuntu > 22.04 host.
## Configuration
@@ -33,13 +33,13 @@ The Docker environment runs the functions within a ``docker container``. In this
```yaml
vm:
- runtime: lithopscloud/ibmcf-python-v38
+ runtime: lithopscloud/ibmcf-python-v312
```
or by using the ``runtime`` param in a function executor:
```python
-fexec = lithops.FunctionExecutor(runtime='lithopscloud/ibmcf-python-v38')
+fexec = lithops.FunctionExecutor(runtime='lithopscloud/ibmcf-python-v312')
```
In this backend, you can use any docker image that contains all the required dependencies. For example, the IBM Cloud Functions and Knative runtimes are compatible with it.
diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst
index 00f20f11d..273bb5254 100644
--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -29,8 +29,9 @@ Choose your compute and storage engines from the table below:
|| `Azure Container Apps `_ || `Redis `_ |
|| `Aliyun Function Compute `_ || `OpenStack Swift `_ |
|| `Oracle Functions `_ || `Oracle Object Storage `_ |
-|| `Kubernetes `_ || |
+|| `Kubernetes `_ || |
|| `Knative `_ || |
+|| `Singularity `_ || |
|| `OpenWhisk `_ || |
|| `Remote Host / Virtual Machine `_ || |
|| `IBM Virtual Private Cloud `_ || |
@@ -44,7 +45,7 @@ Configuration File
To configure Lithops through a `configuration file `_
you have multiple options:
-1. Create e new file called ``config`` in the ``~/.lithops`` folder.
+1. Create a new file called ``config`` in the ``~/.lithops`` folder.
2. Create a new file called ``.lithops_config`` in the root directory of your project from where you will execute your
Lithops scripts.
@@ -70,22 +71,30 @@ Here is an example of providing configuration keys for IBM Cloud Functions and I
.. code:: python
- import lithops
-
-
- config = {'lithops': {'backend': 'ibm_cf', 'storage': 'ibm_cos'},
- 'ibm': {'region': 'REGION',
- 'iam_api_key': 'IAM_API_KEY',
- 'resource_group_id': 'RESOURCE_GROUP_ID'}
- 'ibm_cos': {'storage_bucket': 'STORAGE_BUCKET'}}
-
- def hello_world(name):
- return 'Hello {}!'.format(name)
-
- if __name__ == '__main__':
- fexec = lithops.FunctionExecutor(config=config)
- fexec.call_async(hello_world, 'World')
- print(fexec.get_result())
+ import lithops
+
+ config = {
+ 'lithops': {
+ 'backend': 'code_engine',
+ 'storage': 'ibm_cos'
+ },
+ 'ibm': {
+ 'region': 'REGION',
+ 'iam_api_key': 'IAM_API_KEY',
+ 'resource_group_id': 'RESOURCE_GROUP_ID'
+ },
+ 'ibm_cos': {
+ 'storage_bucket': 'STORAGE_BUCKET'
+ }
+ }
+
+ def hello_world(number):
+ return f'Hello {number}!'
+
+ if __name__ == '__main__':
+ fexec = lithops.FunctionExecutor(config=config)
+ fexec.map(hello_world, [1, 2, 3, 4])
+ print(fexec.get_result())
.. _config-reference-label:
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index d0bb84ec4..01aa4e074 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -18,24 +18,39 @@ To contribute a patch
1. Break your work into small, single-purpose patches if possible. It's much
harder to merge in a large change with a lot of disjoint features.
2. Submit the patch as a GitHub pull request against the master branch.
-3. Make sure that your code passes the unit tests.
-4. Make sure that your code passes the linter.
-5. Add new unit tests for your code.
-
-
-Unit testing
-------------
-
-To test that all is working as expected, run either:
-
-.. code::
-
- $ lithops test
-
-
-.. code::
-
- $ python3 -m lithops.tests.tests_main
-
-
-Please follow the guidelines in :ref:`testing` for more details.
\ No newline at end of file
+3. Make sure that your code passes the tests.
+4. Make sure that your code passes the linter. Install `flake8` with `pip3 install flake8` and run the next command until you don't see any linitng error:
+ ```bash
+ flake8 lithops --count --max-line-length=180 --statistics --ignore W605,W503
+ ```
+5. Add new tests for your code.
+
+
+Testing
+-------
+
+To test that all is working as expected, you must install `pytest`, navigate to the tests folder `lithops/tests/`, and execute:
+```bash
+pytest -v
+```
+
+If you made changes to a specific backend, please run tests on that backend.
+For example, if you made changes to the AWS Lambda backend, execute the tests with:
+```bash
+pytest -v --backend aws_lambda --storage aws_s3
+```
+
+You can list all the available tests using:
+```bash
+pytest --collect-only
+```
+
+To run a specific test or group of tests, use the `-k` parameter, for example:
+```bash
+pytest -v --backend localhost --storage localhost -k test_map
+```
+
+To view all the Lithops logs during the tests, and in DEBUG mode, execute:
+```bash
+pytest -o log_cli=true --log-cli-level=DEBUG --backend localhost --storage localhost
+```
diff --git a/docs/source/customized_runtime.rst b/docs/source/customized_runtime.rst
deleted file mode 100644
index ab1143408..000000000
--- a/docs/source/customized_runtime.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-
-Dynamic Runtime Customization
-=============================
-
-.. note:: Currently this feature only works with dcoker-based backends.
-
-This feature enables early preparation of Lithops workers with the map function and custom Lithops
-runtime already deployed, and ready to be used in consequent computations. This can reduce overall map/reduce
-computation latency significantly, especially when the computation overhead (pickle stage) is longer compared to
-the actual computation performed at the workers.
-
-.. warning:: To protect your privacy, use a private docker registry instead of public docker hub.
-
-To activate this mode, set to True the ``customized_runtime`` property under ``lithops`` section of the config file.
-
-.. code:: yaml
-
- lithops:
- customized_runtime: True
diff --git a/docs/source/data_processing.rst b/docs/source/data_processing.rst
index 1fa0d48a1..4ddfb9479 100644
--- a/docs/source/data_processing.rst
+++ b/docs/source/data_processing.rst
@@ -1,156 +1,292 @@
.. _data-processing:
-Processing data from the Cloud
-===========================================
+Processing Data from the Cloud
+==============================
-Lithops has built-in logic for processing data objects from public URLs and object storage services. This logic is automatically activated with the reseverd parameter named **obj**. When you write in the parameters of a function the parameter name **obj**, you are telling to Lithops that you want to process objects located in an object storage service, public urls, or localhost files.
+Lithops provides built-in support for reading and processing data from **object storage**, **public URLs**, and **local files**. This functionality is automatically enabled when your function includes a reserved parameter named **obj**.
-Additionally, the built-in data-processing logic integrates a **data partitioner** system that allows to automatically split the dataset in smallest chunks. Splitting a file into smaller chunks permit to leverage the parallelism provided by the compute backends to process the data. We designed the partitioner within the ``map()`` and ``map_reduce()`` API calls, an it is configurable by specifying the *size of the chunk*, or the *number of chunks* to split each file. The current implementation of the data partitioner supports to split files that contain multiple lines (or rows) ended by '\n', for example, a .txt book or a common .csv file among others. More data-types will be supported in future releases.
+When you define a function with the parameter `obj`, Lithops knows to pass in a special object representing a file (or a chunk of a file) from an external data source. This allows you to write scalable data processing workflows with minimal boilerplate.
+Data Partitioning
+-----------------
-Cloud Object Storage
---------------------
-For processing data from a cloud object storage service, the input data must be either a list of buckets, a list of buckets with object prefix, or a list of data objects. If you set the *size of the chunk* or the *number of chunks*, the partitioner is activated inside Lithops and it is responsible to split the objects into smaller chunks, eventually running one function activation for each generated chunk. If *size of the chunk* and *number of chunks* are not set, chunk is an entire object, so one function activation is executed for each individual object.
+Lithops includes an integrated **data partitioner** that allows you to automatically split large datasets into smaller, more manageable chunks. This partitioning enables massive parallelism across the compute backend, accelerating data processing tasks.
-The **obj** parameter is a python class from where you can access all the information related to the object (or chunk) that the function is processing. For example, consider the following function that shows all the available attributes in **obj** when you are processing objects from an object store:
+Partitioning is supported directly within the :meth:`map()` and :meth:`map_reduce()` APIs and can be controlled via:
+- **`obj_chunk_size`**: The size (in bytes) of each chunk to split the object into.
+- **`obj_chunk_number`**: The total number of chunks to split each object into.
-.. code:: python
+Currently, the partitioner supports **text-based files** where rows are separated by newline characters (`\n`), such as `.txt` and `.csv`. Support for additional data types is planned in future releases.
+
+Cloud Object Storage Integration
+--------------------------------
+
+When processing data from cloud object storage, your input must be one of the following:
+
+1. A single bucket or a list of buckets
+2. A bucket prefix (e.g., a folder path)
+3. A list of specific object keys
+
+Based on your configuration:
+
+- If `obj_chunk_size` or `obj_chunk_number` is set, **each object is automatically split into smaller chunks**, and Lithops runs one function activation per chunk.
+- If chunking is not configured, Lithops runs one function activation per full object.
+
+Accessing Object Metadata
+--------------------------
+
+Inside your function, the `obj` parameter gives you access to metadata and data for the current chunk being processed.
+
+Example:
+
+.. code-block:: python
def my_map_function(obj):
- print(obj.bucket)
- print(obj.key)
- print(obj.part)
- print(obj.data_byte_range)
- print(obj.chunk_size)
-
- data = obj.data_stream.read()
+ print(obj.bucket) # Bucket name
+ print(obj.key) # Object key
+ print(obj.part) # Chunk number
+ print(obj.data_byte_range) # Byte range for this chunk
+ print(obj.chunk_size) # Chunk size in bytes
+
+ data = obj.data_stream.read() # Read the data for this chunk
+
+Accepted Input Formats
+-----------------------
-The allowed inputs of a function can be:
+Lithops accepts **only one type** of input format per execution. Do not mix formats in the same list. The supported formats are:
-- Input data is a bucket or a list of buckets. See an example in [map_reduce_cos_bucket.py](../../examples/map_reduce_cos_bucket.py):
+- **Buckets**: One or more buckets
+ *(See: `map_reduce_cos_bucket.py <../../examples/map_reduce_cos_bucket.py>`_)*
-.. code:: python
+ .. code-block:: python
- iterdata = 'bucket1'
+ iterdata = ['my-bucket-1', 'my-bucket-2']
-- Input data is a bucket(s) with object prefix. See an example in [map_cos_prefix.py](../../examples/map_cos_prefix.py):
+- **Object Prefixes**: Folder-like paths ending with `/`
+ *(See: `map_cos_prefix.py <../../examples/map_cos_prefix.py>`_)*
-.. code:: python
+ .. code-block:: python
- iterdata = ['bucket1/images/', 'bucket1/videos/']
+ iterdata = ['my-bucket/data/csvs/', 'my-bucket/logs/']
-Notice that you must write the end slash (/) to inform partitioner you are providing an object prefix.
+ ⚠️ Prefixes must end with a `/` to indicate to the partitioner that you're specifying a folder-like path.
-- Input data is a list of object keys. See an example in [map_reduce_cos_key.py](../../examples/map_reduce_cos_key.py):
+- **Object Keys**: Specific file paths
+ *(See: `map_reduce_cos_key.py <../../examples/map_reduce_cos_key.py>`_)*
-.. code:: python
+ .. code-block:: python
- iterdata = ['bucket1/object1', 'bucket1/object2', 'bucket1/object3']
+ iterdata = ['my-bucket/file1.csv', 'my-bucket/file2.csv']
-Notice that *iterdata* must be only one of the previous 3 types. Intermingled types are not allowed. For example, you cannot set in the same *iterdata* a bucket and some object keys:
+❌ **Mixing formats is not allowed**:
-.. code:: python
+.. code-block:: python
- iterdata = ['bucket1', 'bucket1/object2', 'bucket1/object3'] # Not allowed
+ # This will raise an error
+ iterdata = ['my-bucket', 'my-bucket/file2.csv']
-Once iterdata is defined, you can execute Lithops as usual, either using *map()* or *map_reduce()* calls. If you need to split the files in smaller chunks, you can set (optionally) the *obj_chunk_size* or *obj_chunk_number* parameters.
+Putting It All Together
+------------------------
-.. code:: python
+Once you've defined your input and function, you can run Lithops as usual with optional chunking:
+
+.. code-block:: python
import lithops
- object_chunksize = 4*1024**2 # 4MB
+ object_chunksize = 4 * 1024 ** 2 # 4 MB per chunk
fexec = lithops.FunctionExecutor()
fexec.map_reduce(my_map_function, iterdata, obj_chunk_size=object_chunksize)
result = fexec.get_result()
-Processing data from public URLs
---------------------------------
-For processing data from public URLs, the input data must be either a single URL or a list of URLs. As in the previous case, if you set the *size of the chunk* or the *number of chunks*, the partitioner is activated inside Lithops and it is responsible to split the objects into smaller chunks, as long as the remote storage server allows requests in chunks (ranges). If range requests are not allowed in the remote storage server, each URL is treated as a single object.
-The **obj** parameter is a python class from where you can access all the information related to the object (or chunk) that the function is processing. For example, consider the following function that shows all the available attributes in **obj** when you are processing URLs:
+Processing Data from Public URLs
+================================
+
+Lithops also supports processing data directly from **public URLs**. The input can be a single URL or a list of URLs.
+If you set the `obj_chunk_size` or `obj_chunk_number`, Lithops activates its internal partitioner to split each file into smaller chunks—**provided that the remote server supports HTTP range requests**. If range requests are not supported, each URL is processed as a single object.
-.. code:: python
+As with other backends, the special **`obj`** parameter gives you access to metadata and the content of the chunk being processed.
+
+Example:
+
+.. code-block:: python
import lithops
def my_map_function(obj):
- print(obj.url)
- print(obj.part)
- print(obj.data_byte_range)
- print(obj.chunk_size)
+ print(obj.url) # Full URL of the object
+ print(obj.part) # Chunk number
+ print(obj.data_byte_range) # Byte range for this chunk
+ print(obj.chunk_size) # Size of this chunk (in bytes)
data = obj.data_stream.read()
for line in data.splitlines():
- # Do some process
- return partial_intersting_data
+ # Process each line
+ pass
+
+ return partial_result
def my_reduce_function(results):
- for partial_intersting_data in results:
- # Do some process
+ for partial_result in results:
+ # Aggregate results
+ pass
+
return final_result
- iterdata = ['http://myurl/my_file_1.csv', 'http://myurl/my_file_2.csv']
- object_chunk_number= 2
+ iterdata = ['http://example.com/file1.csv', 'http://example.com/file2.csv']
+ chunk_number = 2
fexec = lithops.FunctionExecutor()
fexec.map_reduce(my_map_function, iterdata, my_reduce_function,
- obj_chunk_number=object_chunk_number)
+ obj_chunk_number=chunk_number)
result = fexec.get_result()
-See a complete example in `map_reduce_url.py `_
+📄 See the full example in:
+`map_reduce_url.py `_
+
+
+Processing Data from Localhost Files
+====================================
+.. note:: This feature is only available when using the **localhost backend**.
-Processing data from localhost files
-------------------------------------
+Lithops can also process files stored on the local filesystem. The input can be:
-.. note:: This is only allowed when running Lithops with the localhost backend
+- A single file path
+- A list of file paths
+- A directory path
+- A list of directory paths
-For processing data from localhost files, the input data must be either a directory path, a list of directory paths, a file path a list of file paths. As in the previous cases, if you set the *size of the chunk* or the *number of chunks*, the partitioner is activated inside Lithops and it is responsible to split the objects into smaller chunks, eventually spawning one function for each generated chunk. If *size of the chunk* and *number of chunks* are not set, chunk is an entire object, so one function activation is executed for each individual object.
+As in other cases, if you set `obj_chunk_size` or `obj_chunk_number`, the file(s) will be split into chunks and processed in parallel. If not set, each file is processed as a single object.
-The **obj** parameter is a python class from where you can access all the information related to the object (or chunk) that the function is processing. For example, consider the following function that shows all the available attributes in **obj** when you are processing localhost files:
+The **`obj`** parameter again exposes the metadata and content of the chunk.
-.. code:: python
+Example:
+
+.. code-block:: python
import lithops
def my_map_function(obj):
- print(obj.path)
- print(obj.part)
- print(obj.data_byte_range)
- print(obj.chunk_size)
+ print(obj.path) # Full local file path
+ print(obj.part) # Chunk number
+ print(obj.data_byte_range) # Byte range for this chunk
+ print(obj.chunk_size) # Size of this chunk (in bytes)
data = obj.data_stream.read()
for line in data.splitlines():
- # Do some process
- return partial_intersting_data
+ # Process each line
+ pass
+
+ return partial_result
def my_reduce_function(results):
- for partial_intersting_data in results:
- # Do some process
+ for partial_result in results:
+ # Aggregate results
+ pass
+
return final_result
- iterdata = ['/home/user/data/my_file_1.csv', '/home/user/data/my_file_2.csv']
- object_chunk_number= 2
+ iterdata = ['/home/user/file1.csv', '/home/user/file2.csv']
+ chunk_number = 2
fexec = lithops.FunctionExecutor()
fexec.map_reduce(my_map_function, iterdata, my_reduce_function,
- obj_chunk_number=object_chunk_number)
+ obj_chunk_number=chunk_number)
result = fexec.get_result()
-See a complete example in `map_reduce_localhost.py `_.
+📄 See the full example in:
+`map_reduce_localhost.py `_
-Reducer granularity
+Reducer Granularity
-------------------
-When using the ``map_reduce()`` API call with ``obj_chunk_size`` or ``obj_chunk_number``, by default there will be only one reducer for all the object chunks from all the objects. Alternatively, you can spawn one reducer for each object by setting the parameter ``obj_reduce_by_key=True``.
-.. code:: python
+When using the :meth:`map_reduce()` API along with `obj_chunk_size` or `obj_chunk_number`, Lithops defaults to using **a single reducer** to aggregate results across **all chunks and objects**.
+
+If you'd prefer to reduce results **per original object** (e.g., one reducer per file), you can set the parameter `obj_reduce_by_key=True`.
+
+Example:
+
+.. code-block:: python
fexec.map_reduce(my_map_function, bucket_name, my_reduce_function,
- obj_chunk_size=obj_chunk_size, obj_reduce_by_key=True)
+ obj_chunk_size=obj_chunk_size,
+ obj_reduce_by_key=True)
+
+
+Elastic Data Processing and Cloud-Optimized Formats
+===================================================
+
+Lithops is especially powerful for **massively parallel data processing**. When the input to `map()` or `map_reduce()` is a **storage bucket** or a collection of large files, Lithops will automatically:
+
+- Launch one function per file, or
+- Partition large files into chunks and assign each chunk to a different function
+
+This behavior enables **elastic scaling** that fully utilizes the underlying compute backend.
+
+Cloud-Optimized Formats
+------------------------
+
+Lithops is ideally suited for processing **cloud-optimized data formats** such as:
+
+- **ZARR**
+- **COG** (Cloud Optimized GeoTIFF)
+- **COPC** (Cloud Optimized Point Clouds)
+- **FlatGeoBuf**
+
+These formats are designed to support **random access via HTTP range requests**, making them a perfect match for cloud object storage and serverless computing.
+
+By leveraging HTTP range primitives, Lithops enables fast and scalable parallel processing — distributing workload across many concurrent function activations, each fetching only the data it needs. This approach takes full advantage of the **high aggregate bandwidth** provided by modern object storage systems.
+
+Partitioning Non-Optimized Formats with Dataplug
+-------------------------------------------------
+
+Thanks to the `DATAPLUG `_ library, Lithops also supports **on-the-fly partitioning** of data formats that are **not cloud-optimized**. Supported formats include:
+
+- Genomics: **FASTA**, **FASTQ**, **FASTQ.GZ**
+- Metabolomics: **mlMZ**
+- Geospatial: **LIDAR (.laz)**
+
+Dataplug wraps these formats into cloud-native interfaces and exposes partitioning strategies that Lithops can consume directly.
+
+Example: Parallel Processing of a Cloud-Hosted LIDAR File
+----------------------------------------------------------
+
+In the example below, we use Dataplug to wrap a COPC (Cloud Optimized Point Cloud) file stored in S3, partition it into spatial chunks, and process each chunk in parallel using Lithops:
+
+.. code-block:: python
+
+ from dataplug import CloudObject
+ from dataplug.formats.geospatial.copc import CloudOptimizedPointCloud, square_split_strategy
+ import laspy
+ import lithops
+
+ # Function to process each LiDAR slice
+ def process_lidar_slice(data_slice):
+ las_data = data_slice.get()
+ lidar_file = laspy.open(las_data)
+ ...
+
+ # Load the COPC file from S3 using Dataplug
+ co = CloudObject.from_s3(
+ CloudOptimizedPointCloud,
+ "s3://geospatial/copc/CA_YosemiteNP_2019/USGS_LPC_CA_YosemiteNP_2019_D19_11SKB6892.laz",
+ s3_config=local_minio,
+ )
+
+ # Partition the point cloud into 9 spatial chunks
+ slices = co.partition(square_split_strategy, num_chunks=9)
+
+ # Process slices in parallel using Lithops
+ with lithops.FunctionExecutor() as executor:
+ futures = executor.map(process_lidar_slice, slices)
+ results = executor.get_result(futures)
+
+This enables truly **elastic and serverless geospatial processing pipelines**, with no infrastructure overhead and full cloud-native efficiency.
diff --git a/docs/source/design.rst b/docs/source/design.rst
index 0f1eaa80f..2286c8463 100644
--- a/docs/source/design.rst
+++ b/docs/source/design.rst
@@ -43,7 +43,7 @@ In Lithops, each map or reduce computation is executed as a separate compute *jo
As mentioned above, the ``FunctionExecutor`` class is responsible for orchestrating the computation in Lithops. One ``FunctionExecutor`` object is instantiated prior to any use of Lithops. Its initialization includes these important steps: 1. It sets up the workers (depending on the specific compute backend), such as constructing docker images, defining IBM Cloud Functions, etc. This step may not include actually creating the workers, as this may be done automatically by the backend on-demand. 2. It defines a bucket in object storage (depending on the storage backend) in which each job will store job and call data (prior to computation) and results (when computation is complete). 3. It creates a ``FunctionInvoker`` object, which is responsible for executing a job as a set of independent per-worker calls.
-Compute jobs are created in the functions of the ``job`` module (see chart above), invoked from the respective API method of ``FunctionExecutor``. Map jobs are created in ``create_map_job()`` and reduce jobs in ``create_reduce_job()``. The flow in both functions is quite similar. First, data is partitioned, with the intention of each partition be processed by one worker. For map jobs, this is done by invoking the ``create_partitions()`` function of the ``partitioner`` module, yielding a partition map.
+Compute jobs are created in the functions of the ``job`` module (see chart above), invoked from the respective API method of ``FunctionExecutor``. Map jobs are created in ``create_map_job()`` and reduce jobs in ``create_reduce_job()``. The flow in both functions is quite similar. First, data is partitioned, with the intention that each partition be processed by one worker. For map jobs, this is done by invoking the ``create_partitions()`` function of the ``partitioner`` module, yielding a partition map.
For reduce jobs, Lithops currently supports two modes: reduce per object, where each object is processed by a reduce function, and global (default) reduce, where all data is processed by a single reduce function. Respectively, data is partitioned as either one partition per storage object, or one global partition with all data. This process yields a partition map similar to map jobs. Additionally, ``create_reduce_job()`` wraps the reduce function in a special wrapper function that forces waiting for data before the actual reduce function is invoked. This is because reduce jobs follow map jobs, so the output of the map jobs needs to finish before reduce can run.
@@ -58,4 +58,4 @@ Completion of a computation job in Lithops is detected in one of two techniques:
**RabbitMQ**: A unique RabbitMQ topic is defined for each job. combining the executor id and job id. Each worker, once completes a call, posts a notification message on that topic (code in ``function_handler()`` in ``handler`` module, called from ``entry_point`` module of the worker). The ``wait_rabbitmq()`` function from ``wait_rabbitmq`` module, which is called from ``FunctionExecutor.wait()``, consumes a number of messages on that topic equal to ``total_calls`` and determines completion.
-**Object Storage**: As explained above, each call persists its computation results in a specific object. Determining completion of a job is by the ``FunctionExecutor.wait()`` invoking the ``wait_storage()`` function from the ``wait_storage`` module. This function repeatedly, once per fixed period (controllable), polls the executor’s bucket for status objects of a subset of calls that have still not completed. This allows control of resource usage and eventual detection of all calls.
\ No newline at end of file
+**Object Storage**: As explained above, each call persists its computation results in a specific object. Determining completion of a job is by the ``FunctionExecutor.wait()`` invoking the ``wait_storage()`` function from the ``wait_storage`` module. This function repeatedly, once per fixed period (controllable), polls the executor’s bucket for status objects of a subset of calls that have still not completed. This allows control of resource usage and eventual detection of all calls.
diff --git a/docs/source/execution_modes.rst b/docs/source/execution_modes.rst
index 8649bffc6..1d280d883 100644
--- a/docs/source/execution_modes.rst
+++ b/docs/source/execution_modes.rst
@@ -1,20 +1,25 @@
Execution Modes
===============
-Lithops compute backends can be classified in 3 different execution modes depending on the backend you choose.
+Lithops compute backends can be classified in 3 different execution modes depending
+on the backend you choose.
Localhost mode
--------------
-The "localhost mode" in Lithops is a convenient feature that enables you to execute functions on your local machine using processes. It serves as the default execution mode if no specific configuration is provided.
+The "localhost mode" in Lithops is a convenient feature that enables you to execute
+functions on your local machine using processes, without relying on cloud resources
+or serverless computing environments. It serves as the default execution
+mode if no specific configuration is provided.
-In localhost mode, you can run your code locally without relying on cloud resources or serverless computing environments. It allows you to leverage the power of Lithops and its distributed computing capabilities right on your own machine.
+To use localhost mode, you can simply write your functions using the Lithops programming
+model and execute them locally. Lithops will handle the distribution and coordination
+of the function executions, optimizing performance by leveraging multiple processes.
-By utilizing processes, Lithops efficiently manages the execution of functions in parallel, taking advantage of the available resources on your local system. This mode is particularly useful for development, testing, and debugging purposes, as it eliminates the need to deploy code to a cloud environment during the development phase.
+This mode is particularly useful for development, testing, and debugging purposes,
+as it eliminates the need to deploy code to a cloud environment during the
+development phase.
-To use localhost mode, you can simply write your functions using the Lithops programming model and execute them locally. Lithops will handle the distribution and coordination of the function executions, optimizing performance by leveraging multiple processes.
-
-Whether you're exploring Lithops for the first time or working on local development tasks, the localhost mode offers a seamless experience, empowering you to harness the capabilities of Lithops without the need for cloud infrastructure.
.. note:: This is the preferable option for starting with Lithops, and for testing (debugging) your applications.
@@ -28,33 +33,49 @@ Whether you're exploring Lithops for the first time or working on local developm
Serverless mode
---------------
-The "serverless mode" in Lithops is designed to execute functions using publicly accessible serverless compute services, including IBM Cloud Functions, Amazon Lambda, Google Cloud Functions, and more, enabling parallel task execution in isolated cloud environments.
-
-In serverless mode, Lithops leverages the power of these serverless platforms to execute functions as independent tasks. Each function invocation is treated as a separate parallel task, benefiting from the scalability, automatic provisioning of resources, and isolation provided by the serverless compute service.
-
-By utilizing serverless platforms, developers can offload the burden of managing infrastructure and focus solely on writing and deploying their functions. The serverless mode in Lithops abstracts away the complexities of configuring and scaling embarrassingly parallel applications, making it easier to develop and deploy large-scale data processing workloads.
-
-This execution mode offers flexibility and elasticity, as resources are dynamically allocated based on workload demands, ensuring efficient utilization of compute power. It allows developers to seamlessly leverage the scalability and reliability of serverless platforms while benefiting from Lithops' programming model and distributed computing capabilities.
-
-Whether you're processing large datasets, handling real-time event-driven tasks, or building serverless applications, Lithops' serverless mode provides a convenient and scalable approach to execute functions on popular serverless compute services, simplifying the development and deployment process.
+The "serverless mode" in Lithops is designed to execute functions using publicly
+accessible serverless compute services, including IBM Cloud Functions, Amazon Lambda,
+Google Cloud Functions, and more, enabling parallel task execution in isolated cloud
+environments.
+
+In serverless mode, Lithops leverages the power of these serverless platforms to execute
+functions as independent tasks. Each function invocation is treated as a separate parallel
+task, benefiting from the scalability, automatic provisioning of resources, and isolation
+provided by the serverless compute service.
+
+By utilizing serverless platforms, developers can offload the burden of managing
+infrastructure and focus solely on writing and deploying their functions.
+The serverless mode in Lithops abstracts away the complexities of configuring and
+scaling embarrassingly parallel applications, making it easier to develop and deploy
+large-scale data processing workloads.
+
+This execution mode offers flexibility and elasticity, as resources are dynamically
+allocated based on workload demands, ensuring efficient utilization of compute power.
+It allows developers to seamlessly leverage the scalability and reliability of
+serverless platforms while benefiting from Lithops' programming model and distributed
+computing capabilities.
.. code:: python
fexec = lithops.ServerlessExecutor()
-- Available backends: `IBM Cloud Functions `_, `IBM Code Engine `_, `AWS Lambda `_, `AWS Batch `_, `Google Cloud Functions `_, `Google Cloud Run `_, `Azure Functions `_, `Azure Container APPs `_, `Aliyun Function Compute `_, `Oracle Functions `_, `Kubernetes Jobs `_, `Knative `_, `OpenWhisk `_
+- Available backends: `IBM Cloud Functions `_, `IBM Code Engine `_, `AWS Lambda `_, `AWS Batch `_, `Google Cloud Functions `_, `Google Cloud Run `_, `Azure Functions `_, `Azure Container APPs `_, `Aliyun Function Compute `_, `Oracle Functions `_, `Kubernetes Jobs `_, `Knative `_, `Singularity `_, `OpenWhisk