Alerts about your node failures
Without Grafana and prometheus. A simple bash script for telegram alerts.
Install necessary dependencies:
sudo apt update && sudo apt install -y curl jq
Save the script to a file, for example:
sudo nano $HOME/story_monitor.sh
Paste the script content into the file and save.
Script (change your TELEGRAM_TOKEN and TELEGRAM_CHAT_ID):
#!/bin/bash
# === CONFIGURATION ===
TELEGRAM_TOKEN="YOUR_TELEGRAM_BOT_TOKEN"
TELEGRAM_CHAT_ID="YOUR_TELEGRAM_CHAT_ID"
CHECK_INTERVAL=60 # Check interval in seconds
LAST_BLOCK_FILE="/tmp/last_story_block_height"
ALERT_FILE="/tmp/story_last_alert"
CPU_LIMIT=80
MEM_LIMIT=80
DISK_LIMIT=90
MIN_PEERS=3
# === FUNCTIONS ===
send_telegram() {
MESSAGE="$1"
echo -e "$(date): $MESSAGE"
curl -s -X POST "https://api.telegram.org/bot$TELEGRAM_TOKEN/sendMessage" \
-d chat_id="$TELEGRAM_CHAT_ID" \
-d text="$MESSAGE" > /dev/null
}
can_alert() {
NOW=$(date +%s)
LAST=$(cat "$ALERT_FILE" 2>/dev/null || echo 0)
if ((NOW - LAST > 600)); then
echo "$NOW" > "$ALERT_FILE"
return 0
fi
return 1
}
while true; do
# Fetch node status
status=$(curl -s http://localhost:${STORY_PORT}657/status)
if [[ -z "$status" ]]; then
if can_alert; then
send_telegram "🚨 Node unreachable! No response from your node"
fi
sleep $CHECK_INTERVAL
continue
fi
catching_up=$(echo "$status" | jq -r .result.sync_info.catching_up)
height=$(echo "$status" | jq -r .result.sync_info.latest_block_height)
# Fetch peer information
netinfo=$(curl -s http://localhost:${STORY_PORT}657/net_info)
peers=$(echo "$netinfo" | jq -r .result.n_peers)
# CPU and RAM usage
cpu_load=$(top -bn1 | grep "Cpu(s)" | awk '{print 100 - $8}')
mem_load=$(free | awk '/Mem:/ {printf("%.0f"), $3/$2 * 100.0}')
# Disk usage
disk_usage=$(df / | awk 'END{print $(NF-1)}' | tr -d '%')
ALERT_MSG=""
# Block growth check
LAST_HEIGHT=$(cat "$LAST_BLOCK_FILE" 2>/dev/null || echo 0)
if [[ "$height" == "$LAST_HEIGHT" ]]; then
ALERT_MSG="$ALERT_MSG\n❗️Block height not increasing (current: $height)!"
fi
echo "$height" > "$LAST_BLOCK_FILE"
# Synchronization status
if [[ "$catching_up" == "true" ]]; then
ALERT_MSG="$ALERT_MSG\n❗️Node is still synchronizing! Block: $height"
fi
# Peer count
if [[ "$peers" -lt "$MIN_PEERS" ]]; then
ALERT_MSG="$ALERT_MSG\n❗️Low peer count: $peers"
fi
# CPU usage
CPU_INT=${cpu_load%.*}
if [[ "$CPU_INT" -ge "$CPU_LIMIT" ]]; then
ALERT_MSG="$ALERT_MSG\n🔥 High CPU usage: $cpu_load%"
fi
# RAM usage
if [[ "$mem_load" -ge "$MEM_LIMIT" ]]; then
ALERT_MSG="$ALERT_MSG\n🔥 High memory usage: $mem_load%"
fi
# Disk usage
if [[ "$disk_usage" -ge "$DISK_LIMIT" ]]; then
ALERT_MSG="$ALERT_MSG\n💾 High disk usage: $disk_usage%"
fi
# Send alert if any issues detected
if [[ -n "$ALERT_MSG" ]]; then
if can_alert; then
send_telegram "Story Node Alert: $ALERT_MSG"
fi
fi
sleep $CHECK_INTERVAL
done
Make the script executable:
sudo chmod +x $HOME/story_monitor.sh
Create a systemd service:
sudo tee /etc/systemd/system/story-monitor.service > /dev/null <<EOF
[Unit]
Description=Story Node Monitor
After=network.target
[Service]
User=$USER
ExecStart=$HOME/story_monitor.sh
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
Enable and start the service:
sudo systemctl daemon-reload
sudo systemctl enable story-monitor
sudo systemctl start story-monitor
Last updated