added spike detector
This commit is contained in:
parent
3f727ee1cd
commit
6eeed26f12
|
|
@ -7,16 +7,39 @@
|
||||||
# Default duration: 60 seconds (can be overridden by argument)
|
# Default duration: 60 seconds (can be overridden by argument)
|
||||||
DURATION=${1:-60}
|
DURATION=${1:-60}
|
||||||
INTERVAL_MS=200 # milliseconds
|
INTERVAL_MS=200 # milliseconds
|
||||||
LOG_FILE="fan_diagnostic_$(date +%Y%m%d_%H%M%S).log"
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||||
|
LOG_FILE="fan_diagnostic_${TIMESTAMP}.log"
|
||||||
|
SPIKE_LOG="fan_spikes_${TIMESTAMP}.log"
|
||||||
|
|
||||||
|
# Thresholds for spike detection
|
||||||
|
CPU_SPIKE_THRESHOLD=50 # CPU usage above 50%
|
||||||
|
TEMP_SPIKE_THRESHOLD=70 # Temperature above 70°C
|
||||||
|
TEMP_INCREASE_THRESHOLD=5 # Temperature increase of 5°C or more
|
||||||
|
|
||||||
|
# Baseline values
|
||||||
|
PREV_TEMP=0
|
||||||
|
BASELINE_CPU=0
|
||||||
|
SAMPLE_COUNT=0
|
||||||
|
|
||||||
echo "==================================" | tee -a "$LOG_FILE"
|
echo "==================================" | tee -a "$LOG_FILE"
|
||||||
echo "Linux Fan Diagnostic Script" | tee -a "$LOG_FILE"
|
echo "Linux Fan Diagnostic Script" | tee -a "$LOG_FILE"
|
||||||
echo "Started: $(date)" | tee -a "$LOG_FILE"
|
echo "Started: $(date)" | tee -a "$LOG_FILE"
|
||||||
echo "Duration: ${DURATION}s | Interval: ${INTERVAL_MS}ms" | tee -a "$LOG_FILE"
|
echo "Duration: ${DURATION}s | Interval: ${INTERVAL_MS}ms" | tee -a "$LOG_FILE"
|
||||||
echo "Log file: $LOG_FILE" | tee -a "$LOG_FILE"
|
echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE"
|
||||||
|
echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE"
|
||||||
echo "==================================" | tee -a "$LOG_FILE"
|
echo "==================================" | tee -a "$LOG_FILE"
|
||||||
echo "" | tee -a "$LOG_FILE"
|
echo "" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
# Initialize spike log
|
||||||
|
echo "==================================" > "$SPIKE_LOG"
|
||||||
|
echo "CPU & TEMPERATURE SPIKE LOG" >> "$SPIKE_LOG"
|
||||||
|
echo "Started: $(date)" >> "$SPIKE_LOG"
|
||||||
|
echo "CPU Spike Threshold: ${CPU_SPIKE_THRESHOLD}%" >> "$SPIKE_LOG"
|
||||||
|
echo "Temp Spike Threshold: ${TEMP_SPIKE_THRESHOLD}°C" >> "$SPIKE_LOG"
|
||||||
|
echo "Temp Increase Threshold: ${TEMP_INCREASE_THRESHOLD}°C" >> "$SPIKE_LOG"
|
||||||
|
echo "==================================" >> "$SPIKE_LOG"
|
||||||
|
echo "" >> "$SPIKE_LOG"
|
||||||
|
|
||||||
# System Information
|
# System Information
|
||||||
echo "=== SYSTEM INFORMATION ===" >> "$LOG_FILE"
|
echo "=== SYSTEM INFORMATION ===" >> "$LOG_FILE"
|
||||||
echo "Hostname: $(hostname)" >> "$LOG_FILE"
|
echo "Hostname: $(hostname)" >> "$LOG_FILE"
|
||||||
|
|
@ -62,14 +85,16 @@ echo "Starting monitoring for ${DURATION} seconds..." | tee -a "$LOG_FILE"
|
||||||
START_TIME=$(date +%s)
|
START_TIME=$(date +%s)
|
||||||
END_TIME=$((START_TIME + DURATION))
|
END_TIME=$((START_TIME + DURATION))
|
||||||
SAMPLE=0
|
SAMPLE=0
|
||||||
|
SPIKE_COUNT=0
|
||||||
|
|
||||||
while [ $(date +%s) -lt $END_TIME ]; do
|
while [ $(date +%s) -lt $END_TIME ]; do
|
||||||
SAMPLE=$((SAMPLE + 1))
|
SAMPLE=$((SAMPLE + 1))
|
||||||
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S.%3N")
|
TIMESTAMP_SAMPLE=$(date +"%Y-%m-%d %H:%M:%S.%3N")
|
||||||
|
|
||||||
echo "--- Sample #${SAMPLE} at ${TIMESTAMP} ---" >> "$LOG_FILE"
|
echo "--- Sample #${SAMPLE} at ${TIMESTAMP_SAMPLE} ---" >> "$LOG_FILE"
|
||||||
|
|
||||||
# CPU Temperature
|
# CPU Temperature
|
||||||
|
TEMP_C=0
|
||||||
if [ -f /sys/class/thermal/thermal_zone0/temp ]; then
|
if [ -f /sys/class/thermal/thermal_zone0/temp ]; then
|
||||||
TEMP=$(cat /sys/class/thermal/thermal_zone0/temp)
|
TEMP=$(cat /sys/class/thermal/thermal_zone0/temp)
|
||||||
TEMP_C=$((TEMP / 1000))
|
TEMP_C=$((TEMP / 1000))
|
||||||
|
|
@ -78,44 +103,126 @@ while [ $(date +%s) -lt $END_TIME ]; do
|
||||||
|
|
||||||
# Overall CPU usage
|
# Overall CPU usage
|
||||||
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
|
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
|
||||||
|
CPU_USAGE_INT=$(echo "$CPU_USAGE" | cut -d'.' -f1)
|
||||||
echo "CPU Usage: ${CPU_USAGE}%" >> "$LOG_FILE"
|
echo "CPU Usage: ${CPU_USAGE}%" >> "$LOG_FILE"
|
||||||
|
|
||||||
# Top 5 CPU consuming processes
|
# Top 5 CPU consuming processes
|
||||||
|
TOP_PROCESSES=$(ps aux --sort=-%cpu | head -6 | tail -5)
|
||||||
echo "Top CPU processes:" >> "$LOG_FILE"
|
echo "Top CPU processes:" >> "$LOG_FILE"
|
||||||
ps aux --sort=-%cpu | head -6 | tail -5 >> "$LOG_FILE"
|
echo "$TOP_PROCESSES" >> "$LOG_FILE"
|
||||||
|
|
||||||
# Docker containers CPU usage (if docker is running)
|
# Docker containers CPU usage (if docker is running)
|
||||||
|
DOCKER_STATS=""
|
||||||
if command -v docker &> /dev/null && docker ps -q &> /dev/null; then
|
if command -v docker &> /dev/null && docker ps -q &> /dev/null; then
|
||||||
|
DOCKER_STATS=$(docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" 2>&1)
|
||||||
echo "Docker container stats:" >> "$LOG_FILE"
|
echo "Docker container stats:" >> "$LOG_FILE"
|
||||||
docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" >> "$LOG_FILE" 2>&1
|
echo "$DOCKER_STATS" >> "$LOG_FILE"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Fan speed (if available)
|
# Fan speed (if available)
|
||||||
|
FAN_INFO=""
|
||||||
if command -v sensors &> /dev/null; then
|
if command -v sensors &> /dev/null; then
|
||||||
FAN_SPEED=$(sensors | grep -i "fan" | head -1)
|
FAN_INFO=$(sensors | grep -i "fan")
|
||||||
if [ ! -z "$FAN_SPEED" ]; then
|
if [ ! -z "$FAN_INFO" ]; then
|
||||||
echo "Fan: $FAN_SPEED" >> "$LOG_FILE"
|
echo "Fan: $FAN_INFO" >> "$LOG_FILE"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "" >> "$LOG_FILE"
|
echo "" >> "$LOG_FILE"
|
||||||
|
|
||||||
|
# SPIKE DETECTION
|
||||||
|
SPIKE_DETECTED=0
|
||||||
|
SPIKE_REASONS=""
|
||||||
|
|
||||||
|
# Check CPU spike
|
||||||
|
if [ ! -z "$CPU_USAGE_INT" ] && [ "$CPU_USAGE_INT" -gt "$CPU_SPIKE_THRESHOLD" ]; then
|
||||||
|
SPIKE_DETECTED=1
|
||||||
|
SPIKE_REASONS="${SPIKE_REASONS}CPU usage high (${CPU_USAGE}% > ${CPU_SPIKE_THRESHOLD}%); "
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check temperature spike
|
||||||
|
if [ "$TEMP_C" -gt "$TEMP_SPIKE_THRESHOLD" ]; then
|
||||||
|
SPIKE_DETECTED=1
|
||||||
|
SPIKE_REASONS="${SPIKE_REASONS}High temperature (${TEMP_C}°C > ${TEMP_SPIKE_THRESHOLD}°C); "
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check temperature increase
|
||||||
|
if [ "$PREV_TEMP" -gt 0 ]; then
|
||||||
|
TEMP_DIFF=$((TEMP_C - PREV_TEMP))
|
||||||
|
if [ "$TEMP_DIFF" -ge "$TEMP_INCREASE_THRESHOLD" ]; then
|
||||||
|
SPIKE_DETECTED=1
|
||||||
|
SPIKE_REASONS="${SPIKE_REASONS}Temperature jump (+${TEMP_DIFF}°C); "
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Log spike if detected
|
||||||
|
if [ "$SPIKE_DETECTED" -eq 1 ]; then
|
||||||
|
SPIKE_COUNT=$((SPIKE_COUNT + 1))
|
||||||
|
|
||||||
|
echo "╔═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
||||||
|
echo "║ SPIKE #${SPIKE_COUNT} DETECTED at ${TIMESTAMP_SAMPLE}" >> "$SPIKE_LOG"
|
||||||
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
||||||
|
echo "║ Reason: ${SPIKE_REASONS}" >> "$SPIKE_LOG"
|
||||||
|
echo "║ CPU Usage: ${CPU_USAGE}%" >> "$SPIKE_LOG"
|
||||||
|
echo "║ CPU Temp: ${TEMP_C}°C (Previous: ${PREV_TEMP}°C)" >> "$SPIKE_LOG"
|
||||||
|
|
||||||
|
if [ ! -z "$FAN_INFO" ]; then
|
||||||
|
echo "║ Fan Status: ${FAN_INFO}" >> "$SPIKE_LOG"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
||||||
|
echo "║ TOP CPU PROCESSES:" >> "$SPIKE_LOG"
|
||||||
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
||||||
|
echo "$TOP_PROCESSES" | while IFS= read -r line; do
|
||||||
|
echo "║ $line" >> "$SPIKE_LOG"
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ ! -z "$DOCKER_STATS" ]; then
|
||||||
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
||||||
|
echo "║ DOCKER CONTAINERS:" >> "$SPIKE_LOG"
|
||||||
|
echo "╠═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
||||||
|
echo "$DOCKER_STATS" | while IFS= read -r line; do
|
||||||
|
echo "║ $line" >> "$SPIKE_LOG"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "╚═══════════════════════════════════════════════════════════════" >> "$SPIKE_LOG"
|
||||||
|
echo "" >> "$SPIKE_LOG"
|
||||||
|
|
||||||
|
printf "\r⚠ SPIKE #%d detected! " "$SPIKE_COUNT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Update baseline and previous values
|
||||||
|
PREV_TEMP=$TEMP_C
|
||||||
|
if [ "$SAMPLE" -le 10 ] && [ ! -z "$CPU_USAGE_INT" ]; then
|
||||||
|
BASELINE_CPU=$((BASELINE_CPU + CPU_USAGE_INT))
|
||||||
|
if [ "$SAMPLE" -eq 10 ]; then
|
||||||
|
BASELINE_CPU=$((BASELINE_CPU / 10))
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Progress indicator
|
# Progress indicator
|
||||||
CURRENT_TIME=$(date +%s)
|
CURRENT_TIME=$(date +%s)
|
||||||
ELAPSED=$((CURRENT_TIME - START_TIME))
|
ELAPSED=$((CURRENT_TIME - START_TIME))
|
||||||
PROGRESS=$((ELAPSED * 100 / DURATION))
|
PROGRESS=$((ELAPSED * 100 / DURATION))
|
||||||
printf "\rProgress: %d%% (Sample #%d, %ds/%ds) " "$PROGRESS" "$SAMPLE" "$ELAPSED" "$DURATION"
|
printf "\rProgress: %d%% (Sample #%d, %ds/%ds, Spikes: %d) " "$PROGRESS" "$SAMPLE" "$ELAPSED" "$DURATION" "$SPIKE_COUNT"
|
||||||
|
|
||||||
sleep 0.2
|
sleep 0.2
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "" | tee -a "$LOG_FILE"
|
echo "" | tee -a "$LOG_FILE"
|
||||||
echo "==================================" | tee -a "$LOG_FILE"
|
echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
||||||
echo "Monitoring completed: $(date)" | tee -a "$LOG_FILE"
|
echo "Monitoring completed: $(date)" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
||||||
echo "Total samples: ${SAMPLE}" | tee -a "$LOG_FILE"
|
echo "Total samples: ${SAMPLE}" | tee -a "$LOG_FILE"
|
||||||
echo "Log saved to: $LOG_FILE" | tee -a "$LOG_FILE"
|
echo "Total spikes detected: ${SPIKE_COUNT}" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
||||||
echo "==================================" | tee -a "$LOG_FILE"
|
echo "Main log: $LOG_FILE" | tee -a "$LOG_FILE"
|
||||||
|
echo "Spike log: $SPIKE_LOG" | tee -a "$LOG_FILE"
|
||||||
|
echo "==================================" | tee -a "$LOG_FILE" | tee -a "$SPIKE_LOG"
|
||||||
echo ""
|
echo ""
|
||||||
echo "To analyze the log, send the file to me or run:"
|
echo "✓ Monitoring complete!"
|
||||||
echo " grep 'CPU Temp:' $LOG_FILE | sort -k3 -n | tail -10"
|
echo " 📊 Full log: $LOG_FILE"
|
||||||
echo " grep 'CPU Usage:' $LOG_FILE | sort -k3 -n | tail -10"
|
echo " ⚠️ Spike log: $SPIKE_LOG"
|
||||||
|
echo ""
|
||||||
|
echo "Quick analysis commands:"
|
||||||
|
echo " cat $SPIKE_LOG | less"
|
||||||
|
echo " grep 'SPIKE #' $SPIKE_LOG"
|
||||||
Loading…
Reference in a new issue