#!/bin/bash # Linux Fan Diagnostic Script # Monitors CPU usage, temperature, and processes every 200ms # Usage: ./fan_diagnostic.sh [duration_in_seconds] # Default duration: 60 seconds (can be overridden by argument) DURATION=${1:-60} INTERVAL_MS=200 # milliseconds TIMESTAMP=$(date +%Y%m%d_%H%M%S) LOG_FILE="fan_diagnostic_${TIMESTAMP}.log" SPIKE_LOG="fan_spikes_${TIMESTAMP}.log" # Thresholds for spike detection CPU_SPIKE_THRESHOLD=50 # CPU usage above 50% TEMP_SPIKE_THRESHOLD=70 # Temperature above 70°C TEMP_INCREASE_THRESHOLD=5 # Temperature increase of 5°C or more # Baseline values PREV_TEMP=0 BASELINE_CPU=0 SAMPLE_COUNT=0 echo "==================================" | tee -a "$LOG_FILE" echo "Linux Fan Diagnostic Script" | tee -a "$LOG_FILE" echo "Started: $(date)" | tee -a "$LOG_FILE" echo "Duration: ${DURATION}s | Interval: ${INTERVAL_MS}ms" | tee -a "$LOG_FILE" echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE" echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE" echo "==================================" | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" # Initialize spike log echo "==================================" > "$SPIKE_LOG" echo "CPU & TEMPERATURE SPIKE LOG" >> "$SPIKE_LOG" echo "Started: $(date)" >> "$SPIKE_LOG" echo "CPU Spike Threshold: ${CPU_SPIKE_THRESHOLD}%" >> "$SPIKE_LOG" echo "Temp Spike Threshold: ${TEMP_SPIKE_THRESHOLD}°C" >> "$SPIKE_LOG" echo "Temp Increase Threshold: ${TEMP_INCREASE_THRESHOLD}°C" >> "$SPIKE_LOG" echo "==================================" >> "$SPIKE_LOG" echo "" >> "$SPIKE_LOG" # System Information echo "=== SYSTEM INFORMATION ===" >> "$LOG_FILE" echo "Hostname: $(hostname)" >> "$LOG_FILE" echo "Kernel: $(uname -r)" >> "$LOG_FILE" echo "CPU Model: $(lscpu | grep 'Model name' | cut -d':' -f2 | xargs)" >> "$LOG_FILE" echo "CPU Cores: $(nproc)" >> "$LOG_FILE" echo "" >> "$LOG_FILE" # Installed packages that might affect CPU/Fan echo "=== INSTALLED SOFTWARE ===" >> "$LOG_FILE" if command -v docker &> /dev/null; then echo "Docker version: $(docker --version)" >> "$LOG_FILE" echo "Docker containers:" >> "$LOG_FILE" docker ps -a >> "$LOG_FILE" 2>&1 echo "" >> "$LOG_FILE" fi if command -v snap &> /dev/null; then echo "Snap packages:" >> "$LOG_FILE" snap list >> "$LOG_FILE" 2>&1 echo "" >> "$LOG_FILE" fi echo "Recently installed packages (apt):" >> "$LOG_FILE" if [ -f /var/log/apt/history.log ]; then grep -A 2 "Install:" /var/log/apt/history.log | tail -20 >> "$LOG_FILE" fi echo "" >> "$LOG_FILE" # Temperature sensors available echo "=== TEMPERATURE SENSORS ===" >> "$LOG_FILE" if command -v sensors &> /dev/null; then sensors >> "$LOG_FILE" 2>&1 else echo "lm-sensors not installed. Install with: sudo apt install lm-sensors" >> "$LOG_FILE" fi echo "" >> "$LOG_FILE" # Monitoring loop echo "=== MONITORING DATA ===" >> "$LOG_FILE" echo "Starting monitoring for ${DURATION} seconds..." | tee -a "$LOG_FILE" START_TIME=$(date +%s) END_TIME=$((START_TIME + DURATION)) SAMPLE=0 SPIKE_COUNT=0 while [ $(date +%s) -lt $END_TIME ]; do SAMPLE=$((SAMPLE + 1)) TIMESTAMP_SAMPLE=$(date +"%Y-%m-%d %H:%M:%S.%3N") echo "--- Sample #${SAMPLE} at ${TIMESTAMP_SAMPLE} ---" >> "$LOG_FILE" # CPU Temperature TEMP_C=0 if [ -f /sys/class/thermal/thermal_zone0/temp ]; then TEMP=$(cat /sys/class/thermal/thermal_zone0/temp) TEMP_C=$((TEMP / 1000)) echo "CPU Temp: ${TEMP_C}°C" >> "$LOG_FILE" fi # Overall CPU usage CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}') CPU_USAGE_INT=$(echo "$CPU_USAGE" | cut -d'.' -f1) echo "CPU Usage: ${CPU_USAGE}%" >> "$LOG_FILE" # Top 5 CPU consuming processes TOP_PROCESSES=$(ps aux --sort=-%cpu | head -6 | tail -5) echo "Top CPU processes:" >> "$LOG_FILE" echo "$TOP_PROCESSES" >> "$LOG_FILE" # Docker containers CPU usage (if docker is running) DOCKER_STATS="" if command -v docker &> /dev/null && docker ps -q &> /dev/null; then DOCKER_STATS=$(docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" 2>&1) echo "Docker container stats:" >> "$LOG_FILE" echo "$DOCKER_STATS" >> "$LOG_FILE" fi # Fan speed (if available) FAN_INFO="" if command -v sensors &> /dev/null; then FAN_INFO=$(sensors | grep -i "fan") if [ ! -z "$FAN_INFO" ]; then echo "Fan: $FAN_INFO" >> "$LOG_FILE" fi fi echo "" >> "$LOG_FILE" # SPIKE DETECTION SPIKE_DETECTED=0 SPIKE_REASONS="" # Check CPU spike if [ ! -z "$CPU_USAGE_INT" ] && [ "$CPU_USAGE_INT" -gt "$CPU_SPIKE_THRESHOLD" ]; then SPIKE_DETECTED=1 SPIKE_REASONS="${SPIKE_REASONS}CPU usage high (${CPU_USAGE}% > ${CPU_SPIKE_THRESHOLD}%); " fi # Check temperature spike if [ "$TEMP_C" -gt "$TEMP_SPIKE_THRESHOLD" ]; then SPIKE_DETECTED=1 SPIKE_REASONS="${SPIKE_REASONS}High temperature (${TEMP_C}°C > ${TEMP_SPIKE_THRESHOLD}°C); " fi # Check temperature increase if [ "$PREV_TEMP" -gt 0 ]; then TEMP_DIFF=$((TEMP_C - PREV_TEMP)) if [ "$TEMP_DIFF" -ge "$TEMP_INCREASE_THRESHOLD" ]; then SPIKE_DETECTED=1 SPIKE_REASONS="${SPIKE_REASONS}Temperature jump (+${TEMP_DIFF}°C); " fi fi # Log spike if detected if [ "$SPIKE_DETECTED" -eq 1 ]; then SPIKE_COUNT=$((SPIKE_COUNT + 1)) echo "╔═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG" echo "║ SPIKE #${SPIKE_COUNT} DETECTED at ${TIMESTAMP_SAMPLE}" >> "$SPIKE_LOG" echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG" echo "║ Reason: ${SPIKE_REASONS}" >> "$SPIKE_LOG" echo "║ CPU Usage: ${CPU_USAGE}%" >> "$SPIKE_LOG" echo "║ CPU Temp: ${TEMP_C}°C (Previous: ${PREV_TEMP}°C)" >> "$SPIKE_LOG" if [ ! -z "$FAN_INFO" ]; then echo "║ Fan Status: ${FAN_INFO}" >> "$SPIKE_LOG" fi echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG" echo "║ TOP CPU PROCESSES:" >> "$SPIKE_LOG" echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG" echo "$TOP_PROCESSES" | while IFS= read -r line; do echo "║ $line" >> "$SPIKE_LOG" done if [ ! -z "$DOCKER_STATS" ]; then echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG" echo "║ DOCKER CONTAINERS:" >> "$SPIKE_LOG" echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG" echo "$DOCKER_STATS" | while IFS= read -r line; do echo "║ $line" >> "$SPIKE_LOG" done fi echo "╚═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG" echo "" >> "$SPIKE_LOG" printf "\r⚠ SPIKE #%d detected! " "$SPIKE_COUNT" fi # Update baseline and previous values PREV_TEMP=$TEMP_C if [ "$SAMPLE" -le 10 ] && [ ! -z "$CPU_USAGE_INT" ]; then BASELINE_CPU=$((BASELINE_CPU + CPU_USAGE_INT)) if [ "$SAMPLE" -eq 10 ]; then BASELINE_CPU=$((BASELINE_CPU / 10)) fi fi # Progress indicator CURRENT_TIME=$(date +%s) ELAPSED=$((CURRENT_TIME - START_TIME)) PROGRESS=$((ELAPSED * 100 / DURATION)) printf "\rProgress: %d%% (Sample #%d, %ds/%ds, Spikes: %d) " "$PROGRESS" "$SAMPLE" "$ELAPSED" "$DURATION" "$SPIKE_COUNT" sleep 0.2 done echo "" | tee -a "$LOG_FILE" echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG" echo "Monitoring completed: $(date)" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG" echo "Total samples: ${SAMPLE}" | tee -a "$LOG_FILE" echo "Total spikes detected: ${SPIKE_COUNT}" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG" echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE" echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE" echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG" echo "" echo "✓ Monitoring complete!" echo " 📊 Full log: $LOG_FILE" echo " ⚠️ Spike log: $SPIKE_LOG" echo "" echo "Quick analysis commands:" echo " cat $SPIKE_LOG | less" echo " grep 'SPIKE #' $SPIKE_LOG"