Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arbiter: Handle SIGCHLD like all other signals + misc signal handling improvements #3148

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
156 changes: 52 additions & 104 deletions gunicorn/arbiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
import errno
import os
import random
import select
import signal
import sys
import time
import traceback
import queue

from gunicorn.errors import HaltServer, AppImportError
from gunicorn.pidfile import Pidfile
Expand Down Expand Up @@ -37,16 +37,11 @@ class Arbiter(object):

LISTENERS = []
WORKERS = {}
PIPE = []

# I love dynamic languages
SIG_QUEUE = []
SIGNALS = [getattr(signal, "SIG%s" % x)
for x in "HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()]
SIG_NAMES = dict(
(getattr(signal, name), name[3:].lower()) for name in dir(signal)
if name[:3] == "SIG" and name[3] != "_"
)
SIG_QUEUE = queue.SimpleQueue()
SIGNALS = [getattr(signal.Signals, "SIG%s" % x)
for x in "CHLD HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()]

def __init__(self, app):
os.environ["SERVER_SOFTWARE"] = SERVER_SOFTWARE
Expand Down Expand Up @@ -76,6 +71,11 @@ def __init__(self, app):
0: sys.executable
}

self.SIG_HANDLERS = dict(
(sig, getattr(self, "handle_%s" % sig.name[3:].lower()))
for sig in self.SIGNALS
)

def _get_num_workers(self):
return self._num_workers

Expand Down Expand Up @@ -171,27 +171,19 @@ def init_signals(self):
Initialize master signal handling. Most of the signals
are queued. Child signals only wake up the master.
"""
# close old PIPE
for p in self.PIPE:
os.close(p)

# initialize the pipe
self.PIPE = pair = os.pipe()
for p in pair:
util.set_non_blocking(p)
util.close_on_exec(p)

self.log.close_on_exec()

# initialize all signals
for s in self.SIGNALS:
signal.signal(s, self.signal)
signal.signal(signal.SIGCHLD, self.handle_chld)

def signal(self, sig, frame):
if len(self.SIG_QUEUE) < 5:
self.SIG_QUEUE.append(sig)
self.wakeup()
""" Note: Signal handler! No logging allowed. """
self.SIG_QUEUE.put(sig)

# Some UNIXes require SIGCHLD to be reinstalled, see python signal docs
if sig == signal.SIGCHLD:
signal.signal(sig, self.signal)

def run(self):
"Main master loop."
Expand All @@ -204,25 +196,16 @@ def run(self):
while True:
self.maybe_promote_master()

sig = self.SIG_QUEUE.pop(0) if self.SIG_QUEUE else None
if sig is None:
self.sleep()
self.murder_workers()
self.manage_workers()
continue

if sig not in self.SIG_NAMES:
self.log.info("Ignoring unknown signal: %s", sig)
continue
try:
sig = self.SIG_QUEUE.get(timeout=1)
if sig != signal.SIGCHLD:
self.log.info("Handling signal: %s", signal.Signals(sig).name)
self.SIG_HANDLERS[sig]()
except queue.Empty:
pass

signame = self.SIG_NAMES.get(sig)
handler = getattr(self, "handle_%s" % signame, None)
if not handler:
self.log.error("Unhandled signal: %s", signame)
continue
self.log.info("Handling signal: %s", signame)
handler()
self.wakeup()
self.murder_workers()
self.manage_workers()
except (StopIteration, KeyboardInterrupt):
self.halt()
except HaltServer as inst:
Expand All @@ -237,10 +220,9 @@ def run(self):
self.pidfile.unlink()
sys.exit(-1)

def handle_chld(self, sig, frame):
def handle_chld(self):
"SIGCHLD handling"
self.reap_workers()
self.wakeup()

def handle_hup(self):
"""\
Expand Down Expand Up @@ -327,16 +309,6 @@ def maybe_promote_master(self):
# reset proctitle
util._setproctitle("master [%s]" % self.proc_name)

def wakeup(self):
"""\
Wake up the arbiter by writing to the PIPE
"""
try:
os.write(self.PIPE[1], b'.')
except IOError as e:
if e.errno not in [errno.EAGAIN, errno.EINTR]:
raise

def halt(self, reason=None, exit_status=0):
""" halt arbiter """
self.stop()
Expand All @@ -351,25 +323,6 @@ def halt(self, reason=None, exit_status=0):
self.cfg.on_exit(self)
sys.exit(exit_status)

def sleep(self):
"""\
Sleep until PIPE is readable or we timeout.
A readable PIPE means a signal occurred.
"""
try:
ready = select.select([self.PIPE[0]], [], [], 1.0)
if not ready[0]:
return
while os.read(self.PIPE[0], 1):
pass
except (select.error, OSError) as e:
# TODO: select.error is a subclass of OSError since Python 3.3.
error_number = getattr(e, 'errno', e.args[0])
if error_number not in [errno.EAGAIN, errno.EINTR]:
raise
except KeyboardInterrupt:
sys.exit()

def stop(self, graceful=True):
"""\
Stop workers
Expand All @@ -394,6 +347,7 @@ def stop(self, graceful=True):
# wait until the graceful timeout
while self.WORKERS and time.time() < limit:
time.sleep(0.1)
self.reap_workers()

self.kill_workers(signal.SIGKILL)

Expand Down Expand Up @@ -518,44 +472,38 @@ def reap_workers(self):
break
if self.reexec_pid == wpid:
self.reexec_pid = 0
else:
# A worker was terminated. If the termination reason was
# that it could not boot, we'll shut it down to avoid
# infinite start/stop cycles.
exitcode = status >> 8
if exitcode != 0:
self.log.error('Worker (pid:%s) exited with code %s', wpid, exitcode)
continue

if os.WIFEXITED(status):
# A worker was normally terminated. If the termination
# reason was that it could not boot, we'll halt the server
# to avoid infinite start/stop cycles.
exitcode = os.WEXITSTATUS(status)
log = self.log.error if exitcode != 0 else self.log.debug
log('Worker (pid:%s) exited with code %s', wpid, exitcode)
if exitcode == self.WORKER_BOOT_ERROR:
reason = "Worker failed to boot."
raise HaltServer(reason, self.WORKER_BOOT_ERROR)
if exitcode == self.APP_LOAD_ERROR:
reason = "App failed to load."
raise HaltServer(reason, self.APP_LOAD_ERROR)

if exitcode > 0:
# If the exit code of the worker is greater than 0,
# let the user know.
self.log.error("Worker (pid:%s) exited with code %s.",
wpid, exitcode)
elif status > 0:
# If the exit code of the worker is 0 and the status
# is greater than 0, then it was most likely killed
# via a signal.
try:
sig_name = signal.Signals(status).name
except ValueError:
sig_name = "code {}".format(status)
msg = "Worker (pid:{}) was sent {}!".format(
wpid, sig_name)

# Additional hint for SIGKILL
if status == signal.SIGKILL:
msg += " Perhaps out of memory?"
self.log.error(msg)

worker = self.WORKERS.pop(wpid, None)
if not worker:
continue
elif os.WIFSIGNALED(status):
# A worker was terminated by a signal.
sig = os.WTERMSIG(status)
try:
sig_name = signal.Signals(sig).name
except ValueError:
sig_name = "signal {}".format(sig)
msg = "Worker (pid:{}) was terminated by {}!".format(
wpid, sig_name)

# Additional hint for SIGKILL
if sig == signal.SIGKILL:
msg += " Perhaps out of memory?"
self.log.error(msg)

worker = self.WORKERS.pop(wpid, None)
if worker:
worker.tmp.close()
self.cfg.child_exit(self, worker)
except OSError as e:
Expand Down