228 lines
9.2 KiB
Bash
Executable file
228 lines
9.2 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
# Linux Fan Diagnostic Script
|
|
# Monitors CPU usage, temperature, and processes every 200ms
|
|
# Usage: ./fan_diagnostic.sh [duration_in_seconds]
|
|
|
|
# Default duration: 60 seconds (can be overridden by argument)
|
|
DURATION=${1:-60}
|
|
INTERVAL_MS=200 # milliseconds
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
LOG_FILE="fan_diagnostic_${TIMESTAMP}.log"
|
|
SPIKE_LOG="fan_spikes_${TIMESTAMP}.log"
|
|
|
|
# Thresholds for spike detection
|
|
CPU_SPIKE_THRESHOLD=50 # CPU usage above 50%
|
|
TEMP_SPIKE_THRESHOLD=70 # Temperature above 70°C
|
|
TEMP_INCREASE_THRESHOLD=5 # Temperature increase of 5°C or more
|
|
|
|
# Baseline values
|
|
PREV_TEMP=0
|
|
BASELINE_CPU=0
|
|
SAMPLE_COUNT=0
|
|
|
|
echo "==================================" | tee -a "$LOG_FILE"
|
|
echo "Linux Fan Diagnostic Script" | tee -a "$LOG_FILE"
|
|
echo "Started: $(date)" | tee -a "$LOG_FILE"
|
|
echo "Duration: ${DURATION}s | Interval: ${INTERVAL_MS}ms" | tee -a "$LOG_FILE"
|
|
echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE"
|
|
echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE"
|
|
echo "==================================" | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
# Initialize spike log
|
|
echo "==================================" > "$SPIKE_LOG"
|
|
echo "CPU & TEMPERATURE SPIKE LOG" >> "$SPIKE_LOG"
|
|
echo "Started: $(date)" >> "$SPIKE_LOG"
|
|
echo "CPU Spike Threshold: ${CPU_SPIKE_THRESHOLD}%" >> "$SPIKE_LOG"
|
|
echo "Temp Spike Threshold: ${TEMP_SPIKE_THRESHOLD}°C" >> "$SPIKE_LOG"
|
|
echo "Temp Increase Threshold: ${TEMP_INCREASE_THRESHOLD}°C" >> "$SPIKE_LOG"
|
|
echo "==================================" >> "$SPIKE_LOG"
|
|
echo "" >> "$SPIKE_LOG"
|
|
|
|
# System Information
|
|
echo "=== SYSTEM INFORMATION ===" >> "$LOG_FILE"
|
|
echo "Hostname: $(hostname)" >> "$LOG_FILE"
|
|
echo "Kernel: $(uname -r)" >> "$LOG_FILE"
|
|
echo "CPU Model: $(lscpu | grep 'Model name' | cut -d':' -f2 | xargs)" >> "$LOG_FILE"
|
|
echo "CPU Cores: $(nproc)" >> "$LOG_FILE"
|
|
echo "" >> "$LOG_FILE"
|
|
|
|
# Installed packages that might affect CPU/Fan
|
|
echo "=== INSTALLED SOFTWARE ===" >> "$LOG_FILE"
|
|
if command -v docker &> /dev/null; then
|
|
echo "Docker version: $(docker --version)" >> "$LOG_FILE"
|
|
echo "Docker containers:" >> "$LOG_FILE"
|
|
docker ps -a >> "$LOG_FILE" 2>&1
|
|
echo "" >> "$LOG_FILE"
|
|
fi
|
|
|
|
if command -v snap &> /dev/null; then
|
|
echo "Snap packages:" >> "$LOG_FILE"
|
|
snap list >> "$LOG_FILE" 2>&1
|
|
echo "" >> "$LOG_FILE"
|
|
fi
|
|
|
|
echo "Recently installed packages (apt):" >> "$LOG_FILE"
|
|
if [ -f /var/log/apt/history.log ]; then
|
|
grep -A 2 "Install:" /var/log/apt/history.log | tail -20 >> "$LOG_FILE"
|
|
fi
|
|
echo "" >> "$LOG_FILE"
|
|
|
|
# Temperature sensors available
|
|
echo "=== TEMPERATURE SENSORS ===" >> "$LOG_FILE"
|
|
if command -v sensors &> /dev/null; then
|
|
sensors >> "$LOG_FILE" 2>&1
|
|
else
|
|
echo "lm-sensors not installed. Install with: sudo apt install lm-sensors" >> "$LOG_FILE"
|
|
fi
|
|
echo "" >> "$LOG_FILE"
|
|
|
|
# Monitoring loop
|
|
echo "=== MONITORING DATA ===" >> "$LOG_FILE"
|
|
echo "Starting monitoring for ${DURATION} seconds..." | tee -a "$LOG_FILE"
|
|
|
|
START_TIME=$(date +%s)
|
|
END_TIME=$((START_TIME + DURATION))
|
|
SAMPLE=0
|
|
SPIKE_COUNT=0
|
|
|
|
while [ $(date +%s) -lt $END_TIME ]; do
|
|
SAMPLE=$((SAMPLE + 1))
|
|
TIMESTAMP_SAMPLE=$(date +"%Y-%m-%d %H:%M:%S.%3N")
|
|
|
|
echo "--- Sample #${SAMPLE} at ${TIMESTAMP_SAMPLE} ---" >> "$LOG_FILE"
|
|
|
|
# CPU Temperature
|
|
TEMP_C=0
|
|
if [ -f /sys/class/thermal/thermal_zone0/temp ]; then
|
|
TEMP=$(cat /sys/class/thermal/thermal_zone0/temp)
|
|
TEMP_C=$((TEMP / 1000))
|
|
echo "CPU Temp: ${TEMP_C}°C" >> "$LOG_FILE"
|
|
fi
|
|
|
|
# Overall CPU usage
|
|
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
|
|
CPU_USAGE_INT=$(echo "$CPU_USAGE" | cut -d'.' -f1)
|
|
echo "CPU Usage: ${CPU_USAGE}%" >> "$LOG_FILE"
|
|
|
|
# Top 5 CPU consuming processes
|
|
TOP_PROCESSES=$(ps aux --sort=-%cpu | head -6 | tail -5)
|
|
echo "Top CPU processes:" >> "$LOG_FILE"
|
|
echo "$TOP_PROCESSES" >> "$LOG_FILE"
|
|
|
|
# Docker containers CPU usage (if docker is running)
|
|
DOCKER_STATS=""
|
|
if command -v docker &> /dev/null && docker ps -q &> /dev/null; then
|
|
DOCKER_STATS=$(docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" 2>&1)
|
|
echo "Docker container stats:" >> "$LOG_FILE"
|
|
echo "$DOCKER_STATS" >> "$LOG_FILE"
|
|
fi
|
|
|
|
# Fan speed (if available)
|
|
FAN_INFO=""
|
|
if command -v sensors &> /dev/null; then
|
|
FAN_INFO=$(sensors | grep -i "fan")
|
|
if [ ! -z "$FAN_INFO" ]; then
|
|
echo "Fan: $FAN_INFO" >> "$LOG_FILE"
|
|
fi
|
|
fi
|
|
|
|
echo "" >> "$LOG_FILE"
|
|
|
|
# SPIKE DETECTION
|
|
SPIKE_DETECTED=0
|
|
SPIKE_REASONS=""
|
|
|
|
# Check CPU spike
|
|
if [ ! -z "$CPU_USAGE_INT" ] && [ "$CPU_USAGE_INT" -gt "$CPU_SPIKE_THRESHOLD" ]; then
|
|
SPIKE_DETECTED=1
|
|
SPIKE_REASONS="${SPIKE_REASONS}CPU usage high (${CPU_USAGE}% > ${CPU_SPIKE_THRESHOLD}%); "
|
|
fi
|
|
|
|
# Check temperature spike
|
|
if [ "$TEMP_C" -gt "$TEMP_SPIKE_THRESHOLD" ]; then
|
|
SPIKE_DETECTED=1
|
|
SPIKE_REASONS="${SPIKE_REASONS}High temperature (${TEMP_C}°C > ${TEMP_SPIKE_THRESHOLD}°C); "
|
|
fi
|
|
|
|
# Check temperature increase
|
|
if [ "$PREV_TEMP" -gt 0 ]; then
|
|
TEMP_DIFF=$((TEMP_C - PREV_TEMP))
|
|
if [ "$TEMP_DIFF" -ge "$TEMP_INCREASE_THRESHOLD" ]; then
|
|
SPIKE_DETECTED=1
|
|
SPIKE_REASONS="${SPIKE_REASONS}Temperature jump (+${TEMP_DIFF}°C); "
|
|
fi
|
|
fi
|
|
|
|
# Log spike if detected
|
|
if [ "$SPIKE_DETECTED" -eq 1 ]; then
|
|
SPIKE_COUNT=$((SPIKE_COUNT + 1))
|
|
|
|
echo "╔═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
|
echo "║ SPIKE #${SPIKE_COUNT} DETECTED at ${TIMESTAMP_SAMPLE}" >> "$SPIKE_LOG"
|
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
|
echo "║ Reason: ${SPIKE_REASONS}" >> "$SPIKE_LOG"
|
|
echo "║ CPU Usage: ${CPU_USAGE}%" >> "$SPIKE_LOG"
|
|
echo "║ CPU Temp: ${TEMP_C}°C (Previous: ${PREV_TEMP}°C)" >> "$SPIKE_LOG"
|
|
|
|
if [ ! -z "$FAN_INFO" ]; then
|
|
echo "║ Fan Status: ${FAN_INFO}" >> "$SPIKE_LOG"
|
|
fi
|
|
|
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
|
echo "║ TOP CPU PROCESSES:" >> "$SPIKE_LOG"
|
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
|
echo "$TOP_PROCESSES" | while IFS= read -r line; do
|
|
echo "║ $line" >> "$SPIKE_LOG"
|
|
done
|
|
|
|
if [ ! -z "$DOCKER_STATS" ]; then
|
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
|
echo "║ DOCKER CONTAINERS:" >> "$SPIKE_LOG"
|
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
|
echo "$DOCKER_STATS" | while IFS= read -r line; do
|
|
echo "║ $line" >> "$SPIKE_LOG"
|
|
done
|
|
fi
|
|
|
|
echo "╚═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
|
echo "" >> "$SPIKE_LOG"
|
|
|
|
printf "\r⚠ SPIKE #%d detected! " "$SPIKE_COUNT"
|
|
fi
|
|
|
|
# Update baseline and previous values
|
|
PREV_TEMP=$TEMP_C
|
|
if [ "$SAMPLE" -le 10 ] && [ ! -z "$CPU_USAGE_INT" ]; then
|
|
BASELINE_CPU=$((BASELINE_CPU + CPU_USAGE_INT))
|
|
if [ "$SAMPLE" -eq 10 ]; then
|
|
BASELINE_CPU=$((BASELINE_CPU / 10))
|
|
fi
|
|
fi
|
|
|
|
# Progress indicator
|
|
CURRENT_TIME=$(date +%s)
|
|
ELAPSED=$((CURRENT_TIME - START_TIME))
|
|
PROGRESS=$((ELAPSED * 100 / DURATION))
|
|
printf "\rProgress: %d%% (Sample #%d, %ds/%ds, Spikes: %d) " "$PROGRESS" "$SAMPLE" "$ELAPSED" "$DURATION" "$SPIKE_COUNT"
|
|
|
|
sleep 0.2
|
|
done
|
|
|
|
echo "" | tee -a "$LOG_FILE"
|
|
echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
|
echo "Monitoring completed: $(date)" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
|
echo "Total samples: ${SAMPLE}" | tee -a "$LOG_FILE"
|
|
echo "Total spikes detected: ${SPIKE_COUNT}" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
|
echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE"
|
|
echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE"
|
|
echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
|
echo ""
|
|
echo "✓ Monitoring complete!"
|
|
echo " 📊 Full log: $LOG_FILE"
|
|
echo " ⚠️ Spike log: $SPIKE_LOG"
|
|
echo ""
|
|
echo "Quick analysis commands:"
|
|
echo " cat $SPIKE_LOG | less"
|
|
echo " grep 'SPIKE #' $SPIKE_LOG" |