#!/bin/bash

# Configuration
LOG_DIR="/www/wwwlogs"
DATASET_URL="https://ipinfo.io/data/free/country_asn.csv.gz?token=b807873daea34f"
DATASET_FILE="country_asn.csv.gz"
UNZIPPED_DATASET="country_asn.csv"

# Function to convert IP to integer
ip2int() {
    local ip=$1
    IFS=. read -r i1 i2 i3 i4 <<< "$ip"
    echo $(( (i1 << 24) + (i2 << 16) + (i3 << 8) + i4 ))
}

# Function to get IP info from local dataset
get_ip_info() {
    local ip=$1
    local ip_int=$(ip2int "$ip")
    local info=$(awk -F',' -v ip_int="$ip_int" '
        function ip2int(ip) {
            split(ip, octets, ".")
            return (octets[1] * 2^24) + (octets[2] * 2^16) + (octets[3] * 2^8) + octets[4]
        }
        NR > 1 {
            start = ip2int($1)
            end = ip2int($2)
            if (ip_int >= start && ip_int <= end) {
                print $8 "|" $4
                exit
            }
        }
    ' "$UNZIPPED_DATASET")
    if [ -z "$info" ]; then
        echo "Unknown|Unknown"
    else
        echo "$info"
    fi
}

# Function to display spinning animation
spinner() {
    local pid=$1
    local delay=0.1
    local spinstr='|/-\'
    while [ "$(ps a | awk '{print $1}' | grep $pid)" ]; do
        local temp=${spinstr#?}
        printf " [%c]  " "$spinstr"
        local spinstr=$temp${spinstr%"$temp"}
        sleep $delay
        printf "\b\b\b\b\b\b"
    done
    printf "    \b\b\b\b"
}

# Download and prepare dataset
echo "Checking for dataset updates..."
if [ -f "$UNZIPPED_DATASET" ]; then
    wget -q -O "${DATASET_FILE}.new" "$DATASET_URL"
    if ! cmp -s "$DATASET_FILE" "${DATASET_FILE}.new"; then
        echo "New dataset available. Updating..."
        mv "${DATASET_FILE}.new" "$DATASET_FILE"
        gunzip -f "$DATASET_FILE"
    else
        echo "Dataset is up to date."
        rm "${DATASET_FILE}.new"
    fi
else
    echo "Downloading initial dataset..."
    wget -q "$DATASET_URL" -O "$DATASET_FILE"
    gunzip -f "$DATASET_FILE"
fi

echo "Analyzing Nginx logs for 444 status codes..."

# Process all log files and sort results
find "$LOG_DIR" -name "*.log" -type f -print0 | xargs -0 awk '$9 == 444 {print $1}' | sort | uniq -c | sort -rn > temp_ip_list.txt

# Display header
printf "\n%-6s %-15s %-40s %s\n" "Count" "IP Address" "AS Name" "Country"
printf "%s\n" "$(printf '=%.0s' {1..80})"

# Process each IP
total_ips=$(wc -l < temp_ip_list.txt)
current_ip=0

while read -r count ip; do
    current_ip=$((current_ip + 1))
    printf "Processing IP %d of %d " "$current_ip" "$total_ips"
    
    # Start spinner in background
    spinner $$ &
    SPIN_PID=$!
    
    # Fetch IP info
    ip_info=$(get_ip_info "$ip")
    
    # Stop spinner
    kill $SPIN_PID &>/dev/null
    
    as_name=$(echo "$ip_info" | cut -d'|' -f1)
    country=$(echo "$ip_info" | cut -d'|' -f2)
    
    # Clear the processing line and print the result
    printf "\r%-6s %-15s %-40s %s\n" "$count" "$ip" "${as_name:0:40}" "$country"
done < temp_ip_list.txt

# Clean up
rm temp_ip_list.txt

echo "Analysis complete."