Fan-diagnostic/nano-diagnostics.sh

228 lines
9.2 KiB
Bash
Executable file

#!/bin/bash
# Linux Fan Diagnostic Script
# Monitors CPU usage, temperature, and processes every 200ms
# Usage: ./fan_diagnostic.sh [duration_in_seconds]
# Default duration: 60 seconds (can be overridden by argument)
DURATION=${1:-60}
INTERVAL_MS=200 # milliseconds
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_FILE="fan_diagnostic_${TIMESTAMP}.log"
SPIKE_LOG="fan_spikes_${TIMESTAMP}.log"
# Thresholds for spike detection
CPU_SPIKE_THRESHOLD=50 # CPU usage above 50%
TEMP_SPIKE_THRESHOLD=70 # Temperature above 70°C
TEMP_INCREASE_THRESHOLD=5 # Temperature increase of 5°C or more
# Baseline values
PREV_TEMP=0
BASELINE_CPU=0
SAMPLE_COUNT=0
echo "==================================" | tee -a "$LOG_FILE"
echo "Linux Fan Diagnostic Script" | tee -a "$LOG_FILE"
echo "Started: $(date)" | tee -a "$LOG_FILE"
echo "Duration: ${DURATION}s | Interval: ${INTERVAL_MS}ms" | tee -a "$LOG_FILE"
echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE"
echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE"
echo "==================================" | tee -a "$LOG_FILE"
echo "" | tee -a "$LOG_FILE"
# Initialize spike log
echo "==================================" > "$SPIKE_LOG"
echo "CPU & TEMPERATURE SPIKE LOG" >> "$SPIKE_LOG"
echo "Started: $(date)" >> "$SPIKE_LOG"
echo "CPU Spike Threshold: ${CPU_SPIKE_THRESHOLD}%" >> "$SPIKE_LOG"
echo "Temp Spike Threshold: ${TEMP_SPIKE_THRESHOLD}°C" >> "$SPIKE_LOG"
echo "Temp Increase Threshold: ${TEMP_INCREASE_THRESHOLD}°C" >> "$SPIKE_LOG"
echo "==================================" >> "$SPIKE_LOG"
echo "" >> "$SPIKE_LOG"
# System Information
echo "=== SYSTEM INFORMATION ===" >> "$LOG_FILE"
echo "Hostname: $(hostname)" >> "$LOG_FILE"
echo "Kernel: $(uname -r)" >> "$LOG_FILE"
echo "CPU Model: $(lscpu | grep 'Model name' | cut -d':' -f2 | xargs)" >> "$LOG_FILE"
echo "CPU Cores: $(nproc)" >> "$LOG_FILE"
echo "" >> "$LOG_FILE"
# Installed packages that might affect CPU/Fan
echo "=== INSTALLED SOFTWARE ===" >> "$LOG_FILE"
if command -v docker &> /dev/null; then
echo "Docker version: $(docker --version)" >> "$LOG_FILE"
echo "Docker containers:" >> "$LOG_FILE"
docker ps -a >> "$LOG_FILE" 2>&1
echo "" >> "$LOG_FILE"
fi
if command -v snap &> /dev/null; then
echo "Snap packages:" >> "$LOG_FILE"
snap list >> "$LOG_FILE" 2>&1
echo "" >> "$LOG_FILE"
fi
echo "Recently installed packages (apt):" >> "$LOG_FILE"
if [ -f /var/log/apt/history.log ]; then
grep -A 2 "Install:" /var/log/apt/history.log | tail -20 >> "$LOG_FILE"
fi
echo "" >> "$LOG_FILE"
# Temperature sensors available
echo "=== TEMPERATURE SENSORS ===" >> "$LOG_FILE"
if command -v sensors &> /dev/null; then
sensors >> "$LOG_FILE" 2>&1
else
echo "lm-sensors not installed. Install with: sudo apt install lm-sensors" >> "$LOG_FILE"
fi
echo "" >> "$LOG_FILE"
# Monitoring loop
echo "=== MONITORING DATA ===" >> "$LOG_FILE"
echo "Starting monitoring for ${DURATION} seconds..." | tee -a "$LOG_FILE"
START_TIME=$(date +%s)
END_TIME=$((START_TIME + DURATION))
SAMPLE=0
SPIKE_COUNT=0
while [ $(date +%s) -lt $END_TIME ]; do
SAMPLE=$((SAMPLE + 1))
TIMESTAMP_SAMPLE=$(date +"%Y-%m-%d %H:%M:%S.%3N")
echo "--- Sample #${SAMPLE} at ${TIMESTAMP_SAMPLE} ---" >> "$LOG_FILE"
# CPU Temperature
TEMP_C=0
if [ -f /sys/class/thermal/thermal_zone0/temp ]; then
TEMP=$(cat /sys/class/thermal/thermal_zone0/temp)
TEMP_C=$((TEMP / 1000))
echo "CPU Temp: ${TEMP_C}°C" >> "$LOG_FILE"
fi
# Overall CPU usage
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
CPU_USAGE_INT=$(echo "$CPU_USAGE" | cut -d'.' -f1)
echo "CPU Usage: ${CPU_USAGE}%" >> "$LOG_FILE"
# Top 5 CPU consuming processes
TOP_PROCESSES=$(ps aux --sort=-%cpu | head -6 | tail -5)
echo "Top CPU processes:" >> "$LOG_FILE"
echo "$TOP_PROCESSES" >> "$LOG_FILE"
# Docker containers CPU usage (if docker is running)
DOCKER_STATS=""
if command -v docker &> /dev/null && docker ps -q &> /dev/null; then
DOCKER_STATS=$(docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" 2>&1)
echo "Docker container stats:" >> "$LOG_FILE"
echo "$DOCKER_STATS" >> "$LOG_FILE"
fi
# Fan speed (if available)
FAN_INFO=""
if command -v sensors &> /dev/null; then
FAN_INFO=$(sensors | grep -i "fan")
if [ ! -z "$FAN_INFO" ]; then
echo "Fan: $FAN_INFO" >> "$LOG_FILE"
fi
fi
echo "" >> "$LOG_FILE"
# SPIKE DETECTION
SPIKE_DETECTED=0
SPIKE_REASONS=""
# Check CPU spike
if [ ! -z "$CPU_USAGE_INT" ] && [ "$CPU_USAGE_INT" -gt "$CPU_SPIKE_THRESHOLD" ]; then
SPIKE_DETECTED=1
SPIKE_REASONS="${SPIKE_REASONS}CPU usage high (${CPU_USAGE}% > ${CPU_SPIKE_THRESHOLD}%); "
fi
# Check temperature spike
if [ "$TEMP_C" -gt "$TEMP_SPIKE_THRESHOLD" ]; then
SPIKE_DETECTED=1
SPIKE_REASONS="${SPIKE_REASONS}High temperature (${TEMP_C}°C > ${TEMP_SPIKE_THRESHOLD}°C); "
fi
# Check temperature increase
if [ "$PREV_TEMP" -gt 0 ]; then
TEMP_DIFF=$((TEMP_C - PREV_TEMP))
if [ "$TEMP_DIFF" -ge "$TEMP_INCREASE_THRESHOLD" ]; then
SPIKE_DETECTED=1
SPIKE_REASONS="${SPIKE_REASONS}Temperature jump (+${TEMP_DIFF}°C); "
fi
fi
# Log spike if detected
if [ "$SPIKE_DETECTED" -eq 1 ]; then
SPIKE_COUNT=$((SPIKE_COUNT + 1))
echo "╔═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
echo "║ SPIKE #${SPIKE_COUNT} DETECTED at ${TIMESTAMP_SAMPLE}" >> "$SPIKE_LOG"
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
echo "║ Reason: ${SPIKE_REASONS}" >> "$SPIKE_LOG"
echo "║ CPU Usage: ${CPU_USAGE}%" >> "$SPIKE_LOG"
echo "║ CPU Temp: ${TEMP_C}°C (Previous: ${PREV_TEMP}°C)" >> "$SPIKE_LOG"
if [ ! -z "$FAN_INFO" ]; then
echo "║ Fan Status: ${FAN_INFO}" >> "$SPIKE_LOG"
fi
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
echo "║ TOP CPU PROCESSES:" >> "$SPIKE_LOG"
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
echo "$TOP_PROCESSES" | while IFS= read -r line; do
echo "$line" >> "$SPIKE_LOG"
done
if [ ! -z "$DOCKER_STATS" ]; then
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
echo "║ DOCKER CONTAINERS:" >> "$SPIKE_LOG"
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
echo "$DOCKER_STATS" | while IFS= read -r line; do
echo "$line" >> "$SPIKE_LOG"
done
fi
echo "╚═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
echo "" >> "$SPIKE_LOG"
printf "\r⚠ SPIKE #%d detected! " "$SPIKE_COUNT"
fi
# Update baseline and previous values
PREV_TEMP=$TEMP_C
if [ "$SAMPLE" -le 10 ] && [ ! -z "$CPU_USAGE_INT" ]; then
BASELINE_CPU=$((BASELINE_CPU + CPU_USAGE_INT))
if [ "$SAMPLE" -eq 10 ]; then
BASELINE_CPU=$((BASELINE_CPU / 10))
fi
fi
# Progress indicator
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
PROGRESS=$((ELAPSED * 100 / DURATION))
printf "\rProgress: %d%% (Sample #%d, %ds/%ds, Spikes: %d) " "$PROGRESS" "$SAMPLE" "$ELAPSED" "$DURATION" "$SPIKE_COUNT"
sleep 0.2
done
echo "" | tee -a "$LOG_FILE"
echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
echo "Monitoring completed: $(date)" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
echo "Total samples: ${SAMPLE}" | tee -a "$LOG_FILE"
echo "Total spikes detected: ${SPIKE_COUNT}" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE"
echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE"
echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
echo ""
echo "✓ Monitoring complete!"
echo " 📊 Full log: $LOG_FILE"
echo " ⚠️ Spike log: $SPIKE_LOG"
echo ""
echo "Quick analysis commands:"
echo " cat $SPIKE_LOG | less"
echo " grep 'SPIKE #' $SPIKE_LOG"