Spaces:
Runtime error
Runtime error
| $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json | |
| name: bloom-deployment | |
| endpoint_name: bloom-inference | |
| model: azureml:bloom-safetensors:1 | |
| model_mount_path: /var/azureml-model | |
| environment_variables: | |
| WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors | |
| MODEL_ID: bigscience/bloom | |
| NUM_SHARD: 8 | |
| environment: | |
| image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0 | |
| inference_config: | |
| liveness_route: | |
| port: 80 | |
| path: /health | |
| readiness_route: | |
| port: 80 | |
| path: /health | |
| scoring_route: | |
| port: 80 | |
| path: /generate | |
| instance_type: Standard_ND96amsr_A100_v4 | |
| request_settings: | |
| request_timeout_ms: 90000 | |
| max_concurrent_requests_per_instance: 256 | |
| liveness_probe: | |
| initial_delay: 600 | |
| timeout: 90 | |
| period: 120 | |
| success_threshold: 1 | |
| failure_threshold: 5 | |
| readiness_probe: | |
| initial_delay: 600 | |
| timeout: 90 | |
| period: 120 | |
| success_threshold: 1 | |
| failure_threshold: 5 | |
| instance_count: 1 | |