Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce pg dump metrics #69

Merged
merged 6 commits into from
Feb 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
---
repos:
- repo: git://github.com/Lucas-C/pre-commit-hooks
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.1.10
hooks:
- id: remove-tabs

- repo: git://github.com/pre-commit/pre-commit-hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
hooks:
- id: trailing-whitespace
Expand All @@ -22,7 +22,7 @@ repos:
- id: check-ast
- id: debug-statements

- repo: git://github.com/pycqa/pydocstyle.git
- repo: https://github.com/pycqa/pydocstyle.git
rev: 6.0.0
hooks:
- id: pydocstyle
Expand Down
21 changes: 21 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,24 @@ Thoth Graph Metrics Exporter
:alt: Quay - Build

Periodic job that exports metrics out of the main database asynchronously.


Run single task
===============

You can run single tasks selecting the name of the task from the allows methods:

.. list-table::
:widths: 25 25
:header-rows: 1

* - Task name
- Description
* - ``graph_corruption_check``
- Check if the database is corruped.
* - ``graph_table_bloat_data_check``
- Check if the database tables are bloated.
* - ``graph_index_bloat_data_check``
- Check if the database index tables are bloated.
* - ``graph_database_dumps_check``
- Check if database dumps are correctly created.
88 changes: 82 additions & 6 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@

import os
import logging

import click
from datetime import datetime, timedelta
from enum import Enum

import click
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway

from thoth.common import init_logging
from thoth.storages import GraphDatabase
from thoth.storages import GraphDatabase, GraphBackupStore
from thoth.storages import __version__ as __storages_version__
from thoth.common import __version__ as __common_version__

Expand All @@ -43,13 +43,16 @@
THOTH_METRICS_PUSHGATEWAY_URL = os.environ["PROMETHEUS_PUSHGATEWAY_URL"]
THOTH_DEPLOYMENT_NAME = os.environ["THOTH_DEPLOYMENT_NAME"]

GRAPH_BACKUP_CHECK_DATE = int(os.getenv("THOTH_GRAPH_BACKUP_CHECK_DAYS", 7))


class TaskEnum(Enum):
"""Class for the task to be run."""

CORRUPTION_CHECK = "graph_corruption_check"
TABLE_BLOAT_DATA = "graph_table_bloat_data_check"
INDEX_BLOAT_DATA = "graph_index_bloat_data_check"
DATABASE_DUMPS = "graph_database_dumps_check"


init_logging()
Expand Down Expand Up @@ -96,6 +99,38 @@ class TaskEnum(Enum):
registry=PROMETHEUS_REGISTRY,
)

# Expose number of dumps available
graphdb_dump_count = Gauge(
"thoth_graphdb_dump_count",
"Number of pg dumps stored on Ceph.",
["env"],
registry=PROMETHEUS_REGISTRY,
)

# Expose last dump
graphdb_last_dump = Gauge(
"thoth_graphdb_last_dump",
"Last dump date stored on Ceph.",
["date", "env"],
registry=PROMETHEUS_REGISTRY,
)

# Check if dumps are not correctly cleaned
graphdb_dump_not_cleaned = Gauge(
"thoth_graphdb_dump_not_cleaned",
"Check if the number of dumps on Ceph is higher than expected.",
["env"],
registry=PROMETHEUS_REGISTRY,
)

# Check if last expected dump is missing
graphdb_dump_missed = Gauge(
"thoth_graphdb_dump_missed",
"Check if the last expected dump is missing.",
["env"],
registry=PROMETHEUS_REGISTRY,
)


def _create_common_metrics():
"""Create common metrics to pushgateway."""
Expand Down Expand Up @@ -192,6 +227,41 @@ def _graph_index_bloat_data(graph: GraphDatabase):
_LOGGER.info("thoth_graphdb_mb_index_bloat_data_table is empty")


def _graph_database_dumps(adapter: GraphBackupStore) -> None:
pg_dumps = []
for pg_dump in adapter.get_document_listing():
pg_dumps.append(
datetime.strptime(pg_dump[len("pg_dump-") :], GraphBackupStore._BACKUP_FILE_DATETIME_FORMAT).date()
)

pg_dumps_number = len(pg_dumps)
graphdb_dump_count.labels(THOTH_DEPLOYMENT_NAME).set(pg_dumps_number)
_LOGGER.info(f"Number of database dumps available on Ceph is: {pg_dumps_number}")

pg_dumps_expected = GraphBackupStore.GRAPH_BACKUP_STORE_ROTATE
_LOGGER.info(f"Number of database dumps expected: {pg_dumps_expected}")

if pg_dumps_number > pg_dumps_expected:
graphdb_dump_not_cleaned.labels(THOTH_DEPLOYMENT_NAME).set(1)
else:
graphdb_dump_not_cleaned.labels(THOTH_DEPLOYMENT_NAME).set(0)

# Consider only last uploaded pg dump
last_dump_date = max(pg_dumps)

_LOGGER.info(f"Last database dump was stored on: {last_dump_date}")
graphdb_last_dump.labels(THOTH_DEPLOYMENT_NAME, last_dump_date).inc()

last_expected_dump_date = datetime.utcnow().date() - timedelta(days=GRAPH_BACKUP_CHECK_DATE)

_LOGGER.info(f"Last expected database dump date is: {last_expected_dump_date}")

if last_dump_date < last_expected_dump_date:
graphdb_dump_missed.labels(THOTH_DEPLOYMENT_NAME).set(1)
else:
graphdb_dump_missed.labels(THOTH_DEPLOYMENT_NAME).set(0)


@click.command()
@click.option(
"--task", "-t", type=click.Choice([entity.value for entity in TaskEnum], case_sensitive=False), required=False
Expand All @@ -202,14 +272,17 @@ def main(task):

_create_common_metrics()

graph = GraphDatabase()
graph.connect()

if task:
_LOGGER.info(f"{task} task starting...")
else:
_LOGGER.info("No specific task selected, all tasks will be run...")

graph = GraphDatabase()
graph.connect()

adapter = GraphBackupStore()
adapter.connect()

if task == TaskEnum.CORRUPTION_CHECK.value or not task:
_graph_corruption_check(graph=graph)

Expand All @@ -219,6 +292,9 @@ def main(task):
if task == TaskEnum.INDEX_BLOAT_DATA.value or not task:
_graph_index_bloat_data(graph=graph)

if task == TaskEnum.DATABASE_DUMPS.value or not task:
_graph_database_dumps(adapter=adapter)

_send_metrics()
_LOGGER.info("Graph metrics exporter finished.")

Expand Down