health check

This commit is contained in:
Egor Matveev 2022-03-24 23:04:34 +03:00
parent f12d2ea5d2
commit 3c7e5fb7b8
2 changed files with 63 additions and 0 deletions

View File

@ -0,0 +1,43 @@
from time import sleep
from django.core.management.base import BaseCommand
from django.db import connections, OperationalError
from requests import get
from daemons.management.commands.bot import bot
class Command(BaseCommand):
help = "starts health check"
def go(self):
db_conn = connections["default"]
try:
db_conn.cursor()
except OperationalError:
connected = False
else:
connected = True
if not connected:
bot.send_message(84367486, "База сдохла")
return
web_working = True
try:
code = get("http://dev.sprinthub.ru/").status_code
if code != 200:
web_working = False
except:
web_working = False
if not web_working:
bot.send_message(84367486, "Сайт сдох")
return
try:
get("http://dev.sprinthub.ru:5555")
except:
bot.send_message(84367486, "Файловое хранилище сдохло")
def handle(self, *args, **options):
sleep(60 * 5)
while True:
self.go()
sleep(60 * 30)

View File

@ -106,6 +106,26 @@ services:
parallelism: 1
order: start-first
health_check:
image: mathwave/sprint-repo:sprint
command: ./manage.py health_check
environment:
SOLUTIONS_ROOT_EXTERNAL: "/sprint-data/data/solutions"
DB_HOST: $DB_HOST
DB_PASSWORD: $DB_PASSWORD
RABBIT_HOST: $RABBIT_HOST
FS_HOST: $FS_HOST
DEBUG: $DEBUG
TELEGRAM_TOKEN: $TELEGRAM_TOKEN
FS_TOKEN: $FS_TOKEN
deploy:
mode: replicated
restart_policy:
condition: on-failure
update_config:
parallelism: 1
order: start-first
bot:
image: mathwave/sprint-repo:sprint
environment: