Skip to main content

Discord Notifications

I figured some quick notifications, nothing fancy from NGINX (my reverse proxy) about 4xx and 5xx spikes would be helpful, so I could be alerted quicker to any issues with what I've got going on. I ended up with a Python script to post to my Discord server, and here is what I did to get that rolling.

Discord

  1. Create a channel for your notifications
  2. Create a Webhook integration
  3. Give it a cool name and image (I used the Nginx logo)
  4. Copy the webhook URL and save the integration

NGINX LXC

  1. Ensure your LXC is up to date with apt update
  2. Install dependencies with apt install -y python3 python3-requests jq

The Script

nano /usr/local/bin/nginx-logwatch.py
#!/usr/bin/env python3
import os, re, time, json, requests, glob

WEBHOOK_URL = os.getenv("WEBHOOK_URL")
LOG_DIR = os.getenv("LOG_DIR", "/var/log/nginx")
ACCESS_PATTERN = os.getenv("ACCESS_PATTERN", "*.access.log")
ERROR_PATTERN = os.getenv("ERROR_PATTERN", "*.error.log")
STATE_FILE = os.getenv("STATE_FILE", "/var/lib/nginx-logwatch/state.json")

WINDOW_SEC = int(os.getenv("WINDOW_SEC", "300"))
ALERT_4XX = int(os.getenv("ALERT_4XX", "50"))
ALERT_5XX = int(os.getenv("ALERT_5XX", "10"))
ALERT_ERR = int(os.getenv("ALERT_ERR", "20"))
NORMAL_STREAK_FOR_CLEAR = int(os.getenv("NORMAL_STREAK_FOR_CLEAR", "2"))

HOSTNAME = os.getenv("HOSTNAME_OVERRIDE", os.uname()[1])

os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)

# Load/save state
try:
    state = json.load(open(STATE_FILE))
except:
    state = {"files": {}}

def save_state():
    tmp = STATE_FILE + ".tmp"
    with open(tmp, "w") as f:
        json.dump(state, f)
    os.replace(tmp, STATE_FILE)

def send(msg):
    if not WEBHOOK_URL: return
    try:
        r = requests.post(WEBHOOK_URL, json={"content": msg}, timeout=10)
        r.raise_for_status()
        print(f"Discord send ok: {r.status_code}")
    except Exception as e:
        print(f"Discord send failed: {e}")

now = int(time.time())

def process_log(path, regex, bucket):
    ino = os.stat(path).st_ino
    s = state["files"].setdefault(path, {"ino": ino, "offset": 0,
                                         "win": {"4xx": [], "5xx": [], "err": []},
                                         "alert_active": {"4xx": False, "5xx": False, "err": False},
                                         "normal_streak": {"4xx": 0, "5xx": 0, "err": 0}})
    if s["ino"] != ino:  # rotated
        s.update({"ino": ino, "offset": 0,
                  "win": {"4xx": [], "5xx": [], "err": []},
                  "alert_active": {"4xx": False, "5xx": False, "err": False},
                  "normal_streak": {"4xx": 0, "5xx": 0, "err": 0}})

    with open(path, "r") as f:
        f.seek(s["offset"])
        for line in f:
            m = regex.search(line)
            if not m: continue
            code = int(m.group(1))
            ts = now
            if bucket == "access":
                if 400 <= code < 500: s["win"]["4xx"].append(ts)
                elif 500 <= code < 600: s["win"]["5xx"].append(ts)
            else:
                s["win"]["err"].append(ts)
        s["offset"] = f.tell()

    for k in ["4xx", "5xx", "err"]:
        s["win"][k] = [t for t in s["win"][k] if t >= now - WINDOW_SEC]

        thresh = {"4xx": ALERT_4XX, "5xx": ALERT_5XX, "err": ALERT_ERR}[k]
        count = len(s["win"][k])

        if not s["alert_active"][k] and count >= thresh:
            send(f"🚨 **{HOSTNAME} / {os.path.basename(path)}**: {k} spike (last {WINDOW_SEC}s count={count}).")
            s["alert_active"][k] = True
            s["normal_streak"][k] = 0
        elif s["alert_active"][k] and count < thresh:
            s["normal_streak"][k] += 1
            if s["normal_streak"][k] >= NORMAL_STREAK_FOR_CLEAR:
                send(f"✅ **{HOSTNAME} / {os.path.basename(path)}**: {k} back to normal (last {WINDOW_SEC}s count={count}).")
                s["alert_active"][k] = False
                s["normal_streak"][k] = 0

access_re = re.compile(r'"\S+ \S+ \S+" (\d{3})')
for f in glob.glob(os.path.join(LOG_DIR, ACCESS_PATTERN)):
    process_log(f, access_re, "access")

error_re = re.compile(r'.*')  # any line = "error"
for f in glob.glob(os.path.join(LOG_DIR, ERROR_PATTERN)):
    process_log(f, error_re, "error")

save_state()

Don't forget to make this executable:

chmod +x /usr/local/bin/nginx-logwatch.py

Config File

nano /etc/default/nginx-logwatch
WEBHOOK_URL="https://discord.com/api/webhooks/XXXXX/XXXXX"

LOG_DIR="/var/log/nginx"
ACCESS_PATTERN="*.access.log"
ERROR_PATTERN="*.error.log"
STATE_FILE="/var/lib/nginx-logwatch/state.json"

WINDOW_SEC=300
ALERT_4XX=50
ALERT_5XX=10
ALERT_ERR=20
NORMAL_STREAK_FOR_CLEAR=2
#HOSTNAME_OVERRIDE="nginx-lxc-01"

Systemd Service

nano /etc/systemd/system/nginx-logwatch.service
[Unit]
Description=NGINX log spike watcher -> Discord
Wants=network-online.target
After=network-online.target

[Service]
Type=oneshot
EnvironmentFile=/etc/default/nginx-logwatch
ExecStart=/usr/local/bin/nginx-logwatch.py
User=root
Group=root

Systemd Timer

nano /etc/systemd/system/nginx-logwatch.timer
[Unit]
Description=Run nginx-logwatch every minute

[Timer]
OnBootSec=1min
OnUnitActiveSec=1min
Unit=nginx-logwatch.service

[Install]
WantedBy=timers.target

Enable & Start

systemctl daemon-reload
systemctl enable --now nginx-logwatch.timer

Logging & Debugging

Last Run
journalctl -u nginx-logwatch.service -n 50 --no-pager -o cat
Timer Activity
journalctl -u nginx-logwatch.timer -n 20 --no-pager
Reset State
rm -f /var/lib/nginx-logwatch/state.json