#!/bin/bash

###############################################################################
# MCC Monitoring Stack - Diagnostic Script
# This script checks all components of the monitoring stack
# Usage: sudo ./diagnose.sh
###############################################################################

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# Check for docker-compose or docker compose
if command -v docker-compose &> /dev/null; then
    DOCKER_COMPOSE="docker-compose"
elif docker compose version &> /dev/null 2>&1; then
    DOCKER_COMPOSE="docker compose"
else
    echo -e "${RED}Docker Compose not found!${NC}"
    exit 1
fi

echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}🔍 MCC Monitoring Stack Diagnosis${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

# 1. Check if containers are running
echo -e "${YELLOW}1️⃣  Container Status:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
$DOCKER_COMPOSE ps
echo ""

# 2. Check service health
echo -e "${YELLOW}2️⃣  Service Health Checks:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

check_service() {
    local name=$1
    local url=$2
    if curl -s --max-time 5 "$url" > /dev/null 2>&1; then
        echo -e "  ${GREEN}✓${NC} $name is healthy"
    else
        echo -e "  ${RED}✗${NC} $name is NOT responding"
    fi
}

check_service "Grafana" "http://localhost:3000/api/health"
check_service "Loki" "http://localhost:3100/ready"
check_service "InfluxDB" "http://localhost:8086/health"
check_service "Prometheus" "http://localhost:9090/-/healthy"
echo ""

# 3. Check MCC logs
echo -e "${YELLOW}3️⃣  MCC Log Files:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
MCC_LOG_PATH=$(grep -oP '\- \K[^:]+(?=:/var/log/mcc)' docker-compose.yml 2>/dev/null || echo "/media/data/node1/logs")
echo "  Log path: $MCC_LOG_PATH"

if [ -f "$MCC_LOG_PATH/access.log" ]; then
    echo -e "  ${GREEN}✓${NC} access.log exists"
    echo "  Size: $(ls -lh "$MCC_LOG_PATH/access.log" | awk '{print $5}')"
    echo "  Last 2 lines:"
    tail -2 "$MCC_LOG_PATH/access.log" | sed 's/^/    /'
else
    echo -e "  ${RED}✗${NC} access.log NOT found at $MCC_LOG_PATH"
fi
echo ""

# 4. Check Promtail
echo -e "${YELLOW}4️⃣  Promtail Status:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if docker ps --format '{{.Names}}' | grep -q "mcc-promtail"; then
    echo -e "  ${GREEN}✓${NC} Promtail container running"
    echo "  Log access check:"
    docker exec mcc-promtail ls -lh /var/log/mcc/ 2>/dev/null | head -5 | sed 's/^/    /'
    echo ""
    echo "  Position file:"
    docker exec mcc-promtail cat /var/lib/promtail/positions.yaml 2>/dev/null | sed 's/^/    /'
else
    echo -e "  ${RED}✗${NC} Promtail container NOT running"
fi
echo ""

# 5. Check Loki data
echo -e "${YELLOW}5️⃣  Loki Data Check:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
LOKI_LABELS=$(curl -s "http://localhost:3100/loki/api/v1/labels" 2>/dev/null)
if [ -n "$LOKI_LABELS" ]; then
    echo "  Available labels:"
    echo "$LOKI_LABELS" | grep -oP '"[^"]+' | tr -d '"' | grep -v "status\|data" | sed 's/^/    /'
    
    # Check for MCC job
    if echo "$LOKI_LABELS" | grep -q "mcc"; then
        echo -e "  ${GREEN}✓${NC} MCC job found in Loki"
    else
        echo -e "  ${YELLOW}⚠${NC} MCC job not found - logs may not be ingesting"
    fi
else
    echo -e "  ${RED}✗${NC} Cannot query Loki"
fi
echo ""

# 6. Check InfluxDB data
echo -e "${YELLOW}6️⃣  InfluxDB Data Check:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
INFLUX_TOKEN=$(grep -oP 'token: \K.*' grafana/provisioning/datasources/influxdb.yml 2>/dev/null || echo "mcc-monitoring-token-2025")
MEASUREMENTS=$(curl -s -H "Authorization: Token $INFLUX_TOKEN" \
    "http://localhost:8086/api/v2/query?org=mcc" \
    --data-urlencode 'query=import "influxdata/influxdb/schema" schema.measurements(bucket: "metrics")' 2>/dev/null)

if [ -n "$MEASUREMENTS" ]; then
    echo "  Available measurements:"
    echo "$MEASUREMENTS" | grep -oP '_value,\K[^,]+' | head -10 | sed 's/^/    /'
else
    echo -e "  ${YELLOW}⚠${NC} Cannot query InfluxDB or no data yet"
fi
echo ""

# 7. Check Telegraf
echo -e "${YELLOW}7️⃣  Telegraf Status:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if docker ps --format '{{.Names}}' | grep -q "mcc-telegraf"; then
    echo -e "  ${GREEN}✓${NC} Telegraf container running"
    echo "  Recent logs:"
    docker logs mcc-telegraf --tail 5 2>&1 | sed 's/^/    /'
else
    echo -e "  ${RED}✗${NC} Telegraf container NOT running"
fi
echo ""

# 8. Check BGP scripts
echo -e "${YELLOW}8️⃣  BGP Monitoring:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
MCC_CONTAINER=$(grep -oP 'MCC_CONTAINER_NAME="\K[^"]+' telegraf/scripts/bgp_status.sh 2>/dev/null || echo "MCC")
echo "  MCC Container Name: $MCC_CONTAINER"

if docker ps --format '{{.Names}}' | grep -q "^${MCC_CONTAINER}$"; then
    echo -e "  ${GREEN}✓${NC} MCC container '$MCC_CONTAINER' found"
    echo "  BGP Status:"
    docker exec $MCC_CONTAINER birdc show protocols 2>/dev/null | grep -i bgp | head -3 | sed 's/^/    /'
else
    echo -e "  ${YELLOW}⚠${NC} MCC container '$MCC_CONTAINER' not found"
fi
echo ""

# 9. Check Grafana datasources
echo -e "${YELLOW}9️⃣  Grafana Datasources:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "  InfluxDB UID: $(grep -oP 'uid: \K.*' grafana/provisioning/datasources/influxdb.yml 2>/dev/null)"
echo "  Loki UID: $(grep -oP 'uid: \K.*' grafana/provisioning/datasources/loki.yml 2>/dev/null)"
echo ""

# 10. Network connectivity
echo -e "${YELLOW}🔟  Container Network:${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "  Promtail -> Loki:"
docker exec mcc-promtail wget -q -O- --timeout=3 http://loki:3100/ready 2>/dev/null && echo -e "    ${GREEN}✓ Connected${NC}" || echo -e "    ${RED}✗ Cannot connect${NC}"

echo "  Grafana -> Loki:"
docker exec mcc-grafana wget -q -O- --timeout=3 http://loki:3100/ready 2>/dev/null && echo -e "    ${GREEN}✓ Connected${NC}" || echo -e "    ${RED}✗ Cannot connect${NC}"

echo "  Grafana -> InfluxDB:"
docker exec mcc-grafana wget -q -O- --timeout=3 http://influxdb:8086/health 2>/dev/null && echo -e "    ${GREEN}✓ Connected${NC}" || echo -e "    ${RED}✗ Cannot connect${NC}"
echo ""

# Summary
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}📊 Access Points${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo "  • Grafana:    http://localhost:3000 (admin/admin)"
echo "  • Loki:       http://localhost:3100"
echo "  • InfluxDB:   http://localhost:8086"
echo "  • Prometheus: http://localhost:9090"
echo ""

echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "✅ Diagnosis Complete"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
echo "📊 If Loki has data but Grafana shows no data:"
echo "   1. Open http://localhost:3000"
echo "   2. Login: admin/admin"
echo "   3. Go to: Connections > Data sources > Loki"
echo "   4. Click 'Test' button at bottom"
echo "   5. Check if time range in dashboard is correct"
echo "   6. Try query: {job=\"nginx\"} |= \"HIT\" | logfmt"
echo ""
