diff --git a/daemons/management/commands/health_check.py b/daemons/management/commands/health_check.py new file mode 100644 index 0000000..dd10204 --- /dev/null +++ b/daemons/management/commands/health_check.py @@ -0,0 +1,43 @@ +from time import sleep + +from django.core.management.base import BaseCommand +from django.db import connections, OperationalError +from requests import get + +from daemons.management.commands.bot import bot + + +class Command(BaseCommand): + help = "starts health check" + + def go(self): + db_conn = connections["default"] + try: + db_conn.cursor() + except OperationalError: + connected = False + else: + connected = True + if not connected: + bot.send_message(84367486, "База сдохла") + return + web_working = True + try: + code = get("http://dev.sprinthub.ru/").status_code + if code != 200: + web_working = False + except: + web_working = False + if not web_working: + bot.send_message(84367486, "Сайт сдох") + return + try: + get("http://dev.sprinthub.ru:5555") + except: + bot.send_message(84367486, "Файловое хранилище сдохло") + + def handle(self, *args, **options): + sleep(60 * 5) + while True: + self.go() + sleep(60 * 30) diff --git a/docker-compose-deploy.yaml b/docker-compose-deploy.yaml index 4bd5f52..de2b0ff 100644 --- a/docker-compose-deploy.yaml +++ b/docker-compose-deploy.yaml @@ -106,6 +106,26 @@ services: parallelism: 1 order: start-first + health_check: + image: mathwave/sprint-repo:sprint + command: ./manage.py health_check + environment: + SOLUTIONS_ROOT_EXTERNAL: "/sprint-data/data/solutions" + DB_HOST: $DB_HOST + DB_PASSWORD: $DB_PASSWORD + RABBIT_HOST: $RABBIT_HOST + FS_HOST: $FS_HOST + DEBUG: $DEBUG + TELEGRAM_TOKEN: $TELEGRAM_TOKEN + FS_TOKEN: $FS_TOKEN + deploy: + mode: replicated + restart_policy: + condition: on-failure + update_config: + parallelism: 1 + order: start-first + bot: image: mathwave/sprint-repo:sprint environment: