-
Notifications
You must be signed in to change notification settings - Fork 133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Could not find host server definition #221
Comments
@egormcobakaster This seems to indicate the environment in which the clearml-agent running your pipeline is deployed is not properly configured. Where are you running this clearml-agent? Did you complete |
i am running clearml-agent on the same machine as the clearml-server. when I start a new agent with a new queue: clearml-agent daemon --queue 6c86514d67014415967bc1d319f03fac this error disappears and individual tasks are launched from the ui, but when I start pipline, the first task gets queued and does not leave the queue |
Hi @egormcobakaster, Can you share the log of the pipeline task and your pipeline code? Also, do you only have a single clearml-agent running? and what is the queue name it listens to? |
Hi @jkhenning, pipeline log: Environment setup completed successfully
the first task only gets queued and is not executed: |
@jkhenning , @ainoam Thanks for the answers, it helped me to create another queue. one for the pipeline and the other for tasks |
when i run pipline from ui appears error:
clearml_agent: ERROR: Could not find host server definition (missing
~/clearml.conf
or Environment CLEARML_API_HOST)To get started with ClearML: setup your own
clearml-server
, or create a free account at https://app.clear.ml and runclearml-agent init
##docker-compose.yaml:
version: "3.6"
services:
apiserver:
command:
- apiserver
container_name: clearml-apiserver
image: allegroai/clearml:latest
restart: unless-stopped
volumes:
- ./logs:/var/log/clearml
- ./config:/opt/clearml/config
- ./data/fileserver:/mnt/fileserver
depends_on:
- redis
- mongo
- elasticsearch
- fileserver
environment:
CLEARML_ELASTIC_SERVICE_HOST: elasticsearch
CLEARML_ELASTIC_SERVICE_PORT: 9200
CLEARML_ELASTIC_SERVICE_PASSWORD: ${ELASTIC_PASSWORD}
CLEARML_MONGODB_SERVICE_HOST: mongo
CLEARML_MONGODB_SERVICE_PORT: 27017
CLEARML_REDIS_SERVICE_HOST: redis
CLEARML_REDIS_SERVICE_PORT: 6379
CLEARML_SERVER_DEPLOYMENT_TYPE: ${CLEARML_SERVER_DEPLOYMENT_TYPE:-linux}
CLEARML__apiserver__pre_populate__enabled: "true"
CLEARML__apiserver__pre_populate__zip_files: "/opt/clearml/db-pre-populate"
CLEARML__apiserver__pre_populate__artifacts_path: "/mnt/fileserver"
CLEARML__services__async_urls_delete__enabled: "true"
CLEARML__services__async_urls_delete__fileserver__url_prefixes: "[${CLEARML_FILES_HOST:-}]"
ports:
- "8008:8008"
networks:
- backend
- frontend
elasticsearch:
networks:
- backend
container_name: clearml-elastic
environment:
ES_JAVA_OPTS: -Xms2g -Xmx2g -Dlog4j2.formatMsgNoLookups=true
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD}
bootstrap.memory_lock: "true"
cluster.name: clearml
cluster.routing.allocation.node_initial_primaries_recoveries: "500"
cluster.routing.allocation.disk.watermark.low: 500mb
cluster.routing.allocation.disk.watermark.high: 500mb
cluster.routing.allocation.disk.watermark.flood_stage: 500mb
discovery.zen.minimum_master_nodes: "1"
discovery.type: "single-node"
http.compression_level: "7"
node.ingest: "true"
node.name: clearml
reindex.remote.whitelist: '.'
xpack.monitoring.enabled: "false"
xpack.security.enabled: "false"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.7
restart: unless-stopped
volumes:
- ./data/elastic_7:/usr/share/elasticsearch/data
- /usr/share/elasticsearch/logs
fileserver:
networks:
- backend
- frontend
command:
- fileserver
container_name: clearml-fileserver
image: allegroai/clearml:latest
environment:
CLEARML__fileserver__delete__allow_batch: "true"
restart: unless-stopped
volumes:
- ./logs:/var/log/clearml
- ./data/fileserver:/mnt/fileserver
- ./config:/opt/clearml/config
ports:
- "8081:8081"
mongo:
networks:
- backend
container_name: clearml-mongo
image: mongo:4.4.9
restart: unless-stopped
command: --setParameter internalQueryMaxBlockingSortMemoryUsageBytes=196100200
volumes:
- ./data/mongo_4/db:/data/db
- ./data/mongo_4/configdb:/data/configdb
redis:
networks:
- backend
container_name: clearml-redis
image: redis:5.0
restart: unless-stopped
volumes:
- ./data/redis:/data
webserver:
command:
- webserver
container_name: clearml-webserver
# environment:
# CLEARML_SERVER_SUB_PATH : clearml-web # Allow Clearml to be served with a URL path prefix.
image: allegroai/clearml:latest
restart: unless-stopped
depends_on:
- apiserver
ports:
- "8080:80"
networks:
- backend
- frontend
async_delete:
depends_on:
- apiserver
- redis
- mongo
- elasticsearch
- fileserver
container_name: async_delete
image: allegroai/clearml:latest
networks:
- backend
restart: unless-stopped
environment:
CLEARML_ELASTIC_SERVICE_HOST: elasticsearch
CLEARML_ELASTIC_SERVICE_PORT: 9200
CLEARML_ELASTIC_SERVICE_PASSWORD: ${ELASTIC_PASSWORD}
CLEARML_MONGODB_SERVICE_HOST: mongo
CLEARML_MONGODB_SERVICE_PORT: 27017
CLEARML_REDIS_SERVICE_HOST: redis
CLEARML_REDIS_SERVICE_PORT: 6379
PYTHONPATH: /opt/clearml/apiserver
CLEARML__services__async_urls_delete__fileserver__url_prefixes: "[${CLEARML_FILES_HOST:-}]"
entrypoint:
- python3
- -m
- jobs.async_urls_delete
- --fileserver-host
- http://fileserver:8081
volumes:
- ./logs:/var/log/clearml
agent-services:
networks:
- backend
container_name: clearml-agent-services
image: allegroai/clearml-agent-services:latest
deploy:
restart_policy:
condition: on-failure
privileged: true
environment:
CLEARML_HOST_IP: ${CLEARML_HOST_IP}
CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-}
CLEARML_API_HOST: http://apiserver:8008
CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-}
CLEARML_API_ACCESS_KEY: KN8KN262M335YBUKM5UH
CLEARML_API_SECRET_KEY: X5slLqO7Lnq5IfRpt1rwqOBAWekipI9GC1e3LjtcG1DT1geDI0
CLEARML_AGENT_GIT_USER: ${CLEARML_AGENT_GIT_USER}
CLEARML_AGENT_GIT_PASS: ${CLEARML_AGENT_GIT_PASS}
CLEARML_AGENT_UPDATE_VERSION: ${CLEARML_AGENT_UPDATE_VERSION:->=0.17.0}
CLEARML_AGENT_DEFAULT_BASE_DOCKER: "ubuntu:18.04"
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}
AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-}
AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
CLEARML_WORKER_ID: "clearml-services"
CLEARML_AGENT_DOCKER_HOST_MOUNT: "/opt/clearml/agent:/root/.clearml"
SHUTDOWN_IF_NO_ACCESS_KEY: 1
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ./agent:/root/.clearml
depends_on:
- apiserver
entrypoint: >
bash -c "curl --retry 10 --retry-delay 10 --retry-connrefused 'http://apiserver:8008/debug.ping' && /usr/agent/entrypoint.sh"
networks:
backend:
driver: bridge
frontend:
driver: bridge
The text was updated successfully, but these errors were encountered: