#!/bin/bash

#############################################
# LanCache Domain Collector Service
# Background service to passively collect DNS queries
# Lightweight - monitors Bind9 logs continuously
#############################################

# Configuration
DOMAINS_DB="/var/lib/lancache/collected-domains.txt"
LOG_FILE="/var/log/lancache/domain-collector.log"
LOCK_FILE="/var/run/lancache-domain-collector.lock"

# Create directories
mkdir -p /var/lib/lancache
mkdir -p /var/log/lancache

log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}

# Detect DNS service name (named or bind9)
if systemctl is-active --quiet named 2>/dev/null; then
    DNS_SERVICE="named"
elif systemctl is-active --quiet bind9 2>/dev/null; then
    DNS_SERVICE="bind9"
else
    log "ERROR: Neither 'named' nor 'bind9' service is running!"
    exit 1
fi

# Check if already running
if [ -f "$LOCK_FILE" ]; then
    PID=$(cat "$LOCK_FILE")
    if ps -p "$PID" > /dev/null 2>&1; then
        log "ERROR: Service already running (PID: $PID)"
        exit 1
    else
        log "Removing stale lock file"
        rm -f "$LOCK_FILE"
    fi
fi

# Create lock file
echo $$ > "$LOCK_FILE"

# Cleanup on exit
cleanup() {
    log "Service stopping..."
    rm -f "$LOCK_FILE"
    exit 0
}

trap cleanup SIGTERM SIGINT

# Domains to ignore (common non-cacheable)
IGNORE_PATTERNS=(
    "google"
    "facebook"
    "youtube"
    "twitter"
    "instagram"
    "amazon"
    "cloudflare"
    "akamai"
    "localhost"
    "local"
    "lan"
    "in-addr.arpa"
)

# Check if domain should be ignored
should_ignore() {
    local domain="$1"
    for pattern in "${IGNORE_PATTERNS[@]}"; do
        if [[ "$domain" == *"$pattern"* ]]; then
            return 0
        fi
    done
    return 1
}

# Add domain to database (unique)
add_domain() {
    local domain="$1"
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')

    # Skip if already exists
    if grep -q "^${domain}|" "$DOMAINS_DB" 2>/dev/null; then
        return
    fi

    # Add with timestamp
    echo "${domain}|${timestamp}|collected" >> "$DOMAINS_DB"
    log "COLLECTED: $domain"
}

log "=== LanCache Domain Collector Service Started ==="
log "Monitoring $DNS_SERVICE DNS queries..."
log "Database: $DOMAINS_DB"

# Monitor DNS logs using journalctl (efficient)
journalctl -u "$DNS_SERVICE" -f --since now -o cat | while read -r line; do
    # Extract query domains from Bind9 logs
    # Format: "client @0x... 10.0.0.5#12345 (cdn.example.com): query: cdn.example.com IN A + (10.0.0.1)"

    if [[ "$line" =~ query:\ ([a-zA-Z0-9._-]+)\ IN\ (A|AAAA) ]]; then
        domain="${BASH_REMATCH[1]}"

        # Clean domain
        domain=$(echo "$domain" | tr '[:upper:]' '[:lower:]' | sed 's/\.$//')

        # Skip if empty or localhost
        [[ -z "$domain" ]] && continue
        [[ "$domain" == "localhost" ]] && continue

        # Skip ignored patterns
        should_ignore "$domain" && continue

        # Add to database
        add_domain "$domain"
    fi
done

# Should never reach here (runs forever)
cleanup
