#!/usr/bin/env python3
# _*_ coding: utf-8 _*_
# pylint: disable=invalid-name,consider-using-with,broad-except,too-many-branches,too-many-statements
"""
Ceph check script for use with Nagios
Copyright 2021- Mark Schouten <mark@tuxis.nl>

Inspired by https://github.com/ceph/ceph-nagios-plugins/blob/master/src/check_ceph_health

# Copyright 2023, Mark Schouten <mark@tuxis.nl>, Tuxis B.V.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""

import subprocess
import sys
import re
import json
import argparse
import copy

# Commands command
CEPH_COMMAND="/usr/bin/ceph"
IP="/bin/ip"

# Nagios exit codes
EXIT_CODES = {
    'OK': 0,
    'WARNING': 1,
    'CRITICAL': 2,
    'UNKNOWN': 3
}

def fetch_our_ips():
    """Fetch all IP's on this box so we can complain about only our services"""
    addresses = []
    fetch_ips = [IP, '-j', 'addr', 'list']
    p = subprocess.Popen(fetch_ips,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    output, err = p.communicate()

    if output:
        for interface in json.loads(output):
            if 'addr_info' in interface and interface['addr_info']:
                for address in interface['addr_info']:
                    addresses.append(address['local'])
    if err:
        raise Exception(err.decode())

    return addresses

def match_addr(pool, search):
    """Walk through all addresses we have and return True if it is in the string 'search'"""
    for addr in pool:
        if ':' in addr:
            regexp = r'\[%s\]:' % (addr)
        else:
            regexp = r'%s:' % (addr)

        if re.search(regexp, str(search), re.I):
            return True

    return False

def test_mons(ceph_exec):
    """This checks the mon on this node."""
    ceph_mon_status = copy.deepcopy(ceph_exec)
    ceph_mon_status.append('quorum_status')
    p = subprocess.Popen(ceph_mon_status,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    output, err = p.communicate()

    MSGS = []

    if output:
        joutput = json.loads(output)
        mons_in_q = joutput['quorum_names']
        for mon in joutput['monmap']['mons']:
            if 'public_addr' in mon and \
                match_addr(ADDRESSES, mon['public_addr']):
                if mon['name'] not in mons_in_q:
                    MSGS.append("Monitor %s not in quorum" % (mon['name']))
            elif 'public_addrs' in mon and \
                match_addr(ADDRESSES, mon['public_addrs']):
                if mon['name'] not in mons_in_q:
                    MSGS.append("Monitor %s not in quorum" % (mon['name']))
    elif err:
        raise Exception(err.decode())

    return MSGS

def test_osds(ceph_exec):
    """This checks all OSD's on this node. We want them all `up` and `in`.
    Otherwise, there is an issue."""
    ceph_osd_dump = copy.deepcopy(ceph_exec)
    ceph_osd_dump.extend(['osd', 'dump', '--format', 'json'])
    p = subprocess.Popen(ceph_osd_dump,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    output, err = p.communicate()

    MSGS = []

    if output:
        for osd in json.loads(output)['osds']:
            if 'public_addr' in osd and \
               match_addr(ADDRESSES, osd['public_addr']):
                if osd['in'] != 1:
                    MSGS.append("OSD %s is out" % (osd['osd']))
                if osd['up'] != 1:
                    MSGS.append("OSD %s is down" % (osd['osd']))
            elif 'public_addrs' in osd and \
               match_addr(ADDRESSES, osd['public_addrs']):
                if osd['in'] != 1:
                    MSGS.append("OSD %s is out" % (osd['osd']))
                if osd['up'] != 1:
                    MSGS.append("OSD %s is down" % (osd['osd']))
    elif err:
        raise Exception(err.decode())

    return MSGS

def test_generic(ceph_exec, detail=False):
    """This checks all OSD's on this node. We want them all `up` and `in`.
    Otherwise, there is an issue."""
    ceph_health = copy.deepcopy(ceph_exec)
    ceph_health.extend(['health', 'detail', '--format', 'json'])
    p = subprocess.Popen(ceph_health,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    output, err = p.communicate()

    MSGS = []

    STATE = 'HEALTH_OK'

    if output:
        joutput = json.loads(output)
        for check in joutput['checks']:
            if not check.startswith('MON_') and \
               not check.startswith('OSD_'):
                STATE = joutput['status']
                MSGS.append(joutput['checks'][check]['summary']['message'])
                if detail:
                    for msg in joutput['checks'][check]['detail']:
                        STATE = joutput['status']
                        MSGS.append(msg['message'])
    elif err:
        raise Exception(err.decode())

    return (STATE, MSGS)


def main():
    """The main loop"""

    # parse args
    parser = argparse.ArgumentParser(description="Tuxis' ceph nagios plugin.")
    parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
    parser.add_argument('-c','--conf', help='alternative ceph conf file')
    parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
    parser.add_argument('-i','--id', help='ceph client id')
    parser.add_argument('-n','--name', help='ceph client name')
    parser.add_argument('-k','--keyring', help='ceph client keyring file')
    parser.add_argument('-d','--detail', help="exec 'ceph health detail'", action='store_true')
    args = parser.parse_args()

    ceph_exec = [CEPH_COMMAND]
    if args.monaddress:
        ceph_exec.append('-m')
        ceph_exec.append(args.monaddress)
    if args.conf:
        ceph_exec.append('-c')
        ceph_exec.append(args.conf)
    if args.id:
        ceph_exec.append('--id')
        ceph_exec.append(args.id)
    if args.name:
        ceph_exec.append('--name')
        ceph_exec.append(args.name)
    if args.keyring:
        ceph_exec.append('--keyring')
        ceph_exec.append(args.keyring)

    check_state = 'UNKNOWN'

    try:
        details = []
        osd_state = test_osds(ceph_exec)
        mon_state = test_mons(ceph_exec)
        (ceph_state, gen_state) = test_generic(ceph_exec, args.detail)

        if osd_state:
            details = details + osd_state
            check_state = 'CRITICAL'

        if mon_state:
            details = details + mon_state
            check_state = 'CRITICAL'

        if gen_state:
            details = details + gen_state
            if check_state != 'CRITICAL':
                if ceph_state == 'HEALTH_ERR':
                    check_state = 'CRITICAL'
                elif ceph_state == 'HEALTH_WARN':
                    check_state = 'WARNING'
                elif ceph_state == 'HEALTH_OK':
                    check_state = 'OK'
        else:
            check_state = 'OK'

        print("%s: [%s] " % (check_state, ceph_state), end="")
        if details:
            print('; '.join(details))
    except Exception as err:
        print("UNKNOWN: %s" % (err))
        return EXIT_CODES[check_state]

    return EXIT_CODES[check_state]

if __name__ == "__main__":
    ADDRESSES = fetch_our_ips()

    sys.exit(main())
