#!/bin/bash

#
# CONSTANTS
#
IBDIAGM_EXEC_COMMAND="ibdiagnet"
IBDIAGM_EXEC_ARGS="-i \$dev -p \$port -o \$log_dir"
IBDIAGM_LOGS_DIR_SUFFIX="ibdiagm"
IBDIAGM_LOGS_STD_LOG="ibdiagm.output"
DEF_CONFIG_FILE="/etc/etc/ibutils2/ibdiagm.conf"
REGEX_PID='^[1-9][0-9]*$'
REGEX_PORT='^[1-9][0-9]*$'

#
# DEFAULTS
#
IBDIAGM_EXEC_INTERVAL_SEC_DEFAULT=$((60 * 60 * 24)) # run once a day
IBDIAGM_LOGS_DIR_LOCATION_DEFAULT="/var/log"
IBDIAGM_LOGS_MAX_DAYS_DEFAULT=10
IBDIAGM_ENABLE_DEFAULT=0

CONFIGURABLE_PARAMS=(\
    IBDIAGM_USER_EXEC_ARGS      '^.*$'
    IBDIAGM_EXEC_INTERVAL_SEC   '^[1-9][0-9]*$'
    IBDIAGM_LOGS_DIR_LOCATION   '^[0-9a-zA-Z_."\-\/]+$'
    IBDIAGM_LOGS_MAX_DAYS       '^[1-9][0-9]*$'
    IBDIAGM_ENABLE              '^[01]$'
    )

#
# INPUT VARIABLES
#
opensm_pid=""
configuration_file=$DEF_CONFIG_FILE

#
# FUNCTIONS
#

# Write error message

echo_error() {
    echo "ERROR: " $@
}


# Parse configuration file
load_configuration() {
    local IFS="="
    while read -r name value; do
        for ((c = 0; c < ${#CONFIGURABLE_PARAMS[@]}; c = c + 2)); do
            param_name=${CONFIGURABLE_PARAMS[$c]}
            if [[ ! $param_name == $name ]]; then
                continue
            fi

            if [[ ! $value =~ ${CONFIGURABLE_PARAMS[$c + 1]} ]]; then
                echo_error "Invalid value for $param_name"
                break
            fi

            EVAL_STR="$param_name=\${$param_name:-\$value}"
            eval "$EVAL_STR"
        done
    done < $1;
    if [ $? != 0 ]; then
        echo_error "Failed to load configuration"
        return 1
    fi;

    echo
    return 0
}

# Show parameters
show_parameters() {
    for ((c = 0; c < ${#CONFIGURABLE_PARAMS[@]}; c = c + 2)); do
        param_name=${CONFIGURABLE_PARAMS[$c]}
        echo "Paremter $param_name value set to \"${!param_name}\""
    done
    echo
    return 0
}

# Update defaults
update_defaults() {
    IBDIAGM_EXEC_INTERVAL_SEC=${IBDIAGM_EXEC_INTERVAL_SEC:-$IBDIAGM_EXEC_INTERVAL_SEC_DEFAULT}
    IBDIAGM_LOGS_DIR_LOCATION=${IBDIAGM_LOGS_DIR_LOCATION:-$IBDIAGM_LOGS_DIR_LOCATION_DEFAULT}
    IBDIAGM_LOGS_MAX_DAYS=${IBDIAGM_LOGS_MAX_DAYS:-$IBDIAGM_LOGS_MAX_DAYS_DEFAULT}
    IBDIAGM_ENABLE=${IBDIAGM_ENABLE:-$IBDIAGM_ENABLE_DEFAULT}
}





# Check that process is responsive
check_process() {
    kill -0 $1 &> /dev/null;
    if [ $? != 0 ]; then
        return 1;
    fi;

    return 0
}


# Get SM state on local port
get_sm_state() {
    sminfo_result=`sminfo -C $1 -P $2 -D 0 | egrep '.*state\s[0-9]+.*'`
    if [ $? != 0 -o -z "$sminfo_result" ]; then
        sm_state=0;
        return 1;
    fi;

    sm_state=`echo $sminfo_result | sed -e "s/.*state\s\([0-9]\+\).*/\\1/"`
    if [ $? != 0 -o -z "$sm_state" ]; then
        sm_state=0;
        return 2;
    fi;

    return 0;
}


# Get list of device names and port numbers
get_local_ports() {
    local local_ports=`ibstatus | grep 'Infiniband device' | sed "s/Infiniband device '\([^']\+\)' port \([0-9]\+\).*/\1 \2/g"`;
    if [ $? != 0 -o -z "$local_ports" ]; then
        local_ports="";
        return 1;
    fi;

    local_ports_arr=($local_ports)
    return 0
}


# Get local port state
get_port_state() {
    local port_state=`ibstat $1 $2 | grep State | sed 's/State:\s//g'`
    if [ $? != 0 -o -z "$port_state" ]; then
        port_state="";
        return 1;
    fi;

    return 0;
}


# Get port capability mask
get_capability_mask() {
    capability_mask=`ibstat $1 $2 | grep Capability | sed 's/Capability mask:\s//g'`
    if [ $? != 0 -o -z "$capability_mask" ]; then
        capability_mask="";
        return 1;
    fi;

    return 0;
}


# Create directory for log files directories
create_parent_logs_dir() {

    p_logs_dir=$IBDIAGM_LOGS_DIR_LOCATION/$IBDIAGM_LOGS_DIR_SUFFIX
    if [ ! -d "$p_logs_dir" ]; then
        mkdir -p $p_logs_dir
        if [ $? != 0 ]; then
            echo_error "Failed to create logs dir ($p_logs_dir)"
            return 1
        fi
    fi

    return 0
}


# Create directory for log files
create_log_dir() {
    create_parent_logs_dir
    if [ $? != 0 ]; then
        return 1
    fi

    log_dir="$p_logs_dir/`date +%Y%m%d_%H%M%S`_$1_$2"
    if [ $? != 0 -o -z "$log_dir" ]; then
        return 2
    fi

    mkdir -p $log_dir
    if [ $? != 0 ]; then
        return 3
    fi

    return 0
}


# Execute command
execute_command() {
    dev=$1
    port=$2
    eval "cmd=\"$IBDIAGM_EXEC_COMMAND $IBDIAGM_EXEC_ARGS $IBDIAGM_USER_EXEC_ARGS\""
    echo "Executing... [$cmd]"
    echo "$cmd" > $log_dir/$IBDIAGM_LOGS_STD_LOG
    eval "$cmd &>> $log_dir/$IBDIAGM_LOGS_STD_LOG"
    return $?
}


# Remove old logs
remove_old_logs() {
    if [ $IBDIAGM_LOGS_MAX_DAYS == 0 ]; then
        return 0
    fi

    p_logs_dir=$IBDIAGM_LOGS_DIR_LOCATION/$IBDIAGM_LOGS_DIR_SUFFIX
    local old_log_dirs=`find $p_logs_dir -maxdepth 1 -mtime +$IBDIAGM_LOGS_MAX_DAYS`
    if [ $? != 0 -o -z "$old_log_dirs" ]; then
        return 1
    fi

    for old_logs_dir in $old_log_dirs; do
        rm -rf $old_logs_dir
    done

    return 0
} &> /dev/null


# Main function
start_monitor() {

    create_parent_logs_dir
    if [ $? != 0 ]; then
        exit 1
    fi

    while true; do
        sleep $IBDIAGM_EXEC_INTERVAL_SEC

        if [[ $opensm_pid != "" ]]; then
            check_process $opensm_pid;
            if [ $? != 0 -o -z "$opensm_pid" ]; then
                echo "Process with PID $opensm_pid is not running"
                break
            fi
        fi

        remove_old_logs
        get_local_ports

        if [[ $ca == "" ]]; then
            for ((c = 0; c < ${#local_ports_arr[@]}; c = c + 2)); do
                dev_name=${local_ports_arr[$c]}
                dev_port=${local_ports_arr[$c + 1]}

                get_capability_mask $dev_name $dev_port
                if [ $? != 0 -o -z "$capability_mask" ]; then
                    continue
                fi

                # check is_sm bit of the port's capability mask
                # in order to know whether SM is binded to it.
                let is_sm="$capability_mask & 2"
                if [ $? != 0 -o $is_sm != "2" ]; then
                    continue
                fi

                get_sm_state $dev_name $dev_port
                if [ $? != 0 -o $sm_state != "3" ]; then
                    continue
                fi

                create_log_dir $dev_name $dev_port
                if [ $? != 0 -o -z "$log_dir" ]; then
                    continue
                fi

                execute_command $dev_name $dev_port
            done;
        else
            create_log_dir $ca $port
            if [ $? != 0 -o -z "$log_dir" ]; then
                continue
            fi

            execute_command $ca $port
        fi

    done;
}


# Show usage
usage() {
    echo
    echo "Usage: `basename $0` <-p <PID> || -c <Ca> -P <port>> [-f < configuration file >]"
    echo
    echo "Wake up every few seconds and runs ibdiagnet from all local ports with master SM"
    echo "or from specified port. The process continues running until the process"
    echo "with the given PID stops or will never stop in case if process id wasn't provided."
    echo
    echo "Parameters"
    echo "    -p | --pid <pid>      PID of the process to watch."
    echo
    echo "    -C | --Ca <ca>        Ca name to use."
    echo "    -P | --Port <port>    Ca port number to use."
    echo
    echo "    -f | --file <path>    Configuration file."
    echo "    -h | --help           Show help."
    echo "    -c | --create         Create default configuration file."
    echo
    echo
}

usage_verbose() {
    usage
    echo "Parameters in configuration file:"
    echo "    IBDIAGM_USER_EXEC_ARGS            User arguments for ibdiagnet (default: empty)"
    echo "    IBDIAGM_LOGS_DIR_LOCATION         Location for log files (default: /var/log)"
    echo "    IBDIAGM_LOGS_MAX_DAYS             Delete logs older than the given number of days (default: 10)"
    echo "    IBDIAGM_EXEC_INTERVAL_SEC         Command execution interval in seconds (default: once a day)"
    echo "    IBDIAGM_ENABLE                    Enable/disable the utility, 0 - disable, 1 - enable (default: 0)"
    echo 
    echo "Notice. All configuration fields can be overloaded by environment variables."
    echo
}

create_config() {
    echo "Create configuration file..."
    echo > $1
    if [ $? != 0 ]; then
        echo_error "Failed create configuration file"
        return 1
    fi;

    for ((c = 0; c < ${#CONFIGURABLE_PARAMS[@]}; c = c + 2)); do
        param_name=${CONFIGURABLE_PARAMS[$c]}
        value=${!param_name}
        echo $param_name=$value >> $1
        if [ $? != 0 ]; then
            echo_error "Failed create configuration file"
            return 1
        fi;
    done
}

#
# MAIN
#
while [ "$1" != "" ]; do
    case $1 in
        -p | --pid )    shift
                        opensm_pid=$1
                        ;;

        -C | --Ca )     shift
                        ca=$1
                        ;;

        -P | --Port )   shift
                        port=$1
                        ;;

        -f | --file )   shift
                        configuration_file=$1
                        ;;

        -h | --help )   usage_verbose
                        exit
                        ;;

        -c | --create ) shift
                        update_defaults
                        create_config $1
                        exit
                        ;;

        * )             usage
                        exit 1
    esac
    shift
done

if [[ "$opensm_pid"  == "" && ( $ca == "" || $port == "" ) ]]; then
    echo_error "PID or Ca/Port should be provided."
    usage
    exit 1
fi

if [[ "$opensm_pid"  != "" && ! $opensm_pid  =~ $REGEX_PID ]]; then
    echo_error "Invalid PID ($opensm_pid)"
    usage
    exit 1
fi

if [[ "$port"  != "" && ! $port  =~ $REGEX_PORT ]]; then
    echo_error "Invalid port ($port)"
    usage
    exit 1
fi

if [[ ! -f $configuration_file ]]; then
    echo_error "Configuration file not found ($configuration_file)" > 2
    usage
    exit 1
fi

echo
echo "Loading configuration file ($configuration_file)"
echo
load_configuration $configuration_file
if [ $? != 0 ]; then
    echo_error "Failed to load configuration file"
    usage
    exit 1
fi

update_defaults
show_parameters

if [[ "$IBDIAGM_ENABLE" == "0" ]]; then
    echo "IBDIAGM is disabled"
    exit 0
fi

start_monitor

