mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:46:16 +00:00
libceph: fix race between delayed_work() and ceph_monc_stop()
The way the delayed work is handled in ceph_monc_stop() is prone to
races with mon_fault() and possibly also finish_hunting(). Both of
these can requeue the delayed work which wouldn't be canceled by any of
the following code in case that happens after cancel_delayed_work_sync()
runs -- __close_session() doesn't mess with the delayed work in order
to avoid interfering with the hunting interval logic. This part was
missed in commit b5d91704f5
("libceph: behave in mon_fault() if
cur_mon < 0") and use-after-free can still ensue on monc and objects
that hang off of it, with monc->auth and monc->monmap being
particularly susceptible to quickly being reused.
To fix this:
- clear monc->cur_mon and monc->hunting as part of closing the session
in ceph_monc_stop()
- bail from delayed_work() if monc->cur_mon is cleared, similar to how
it's done in mon_fault() and finish_hunting() (based on monc->hunting)
- call cancel_delayed_work_sync() after the session is closed
Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/66857
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
This commit is contained in:
parent
256abd8e55
commit
69c7b2fe4c
@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work)
|
||||
struct ceph_mon_client *monc =
|
||||
container_of(work, struct ceph_mon_client, delayed_work.work);
|
||||
|
||||
dout("monc delayed_work\n");
|
||||
mutex_lock(&monc->mutex);
|
||||
dout("%s mon%d\n", __func__, monc->cur_mon);
|
||||
if (monc->cur_mon < 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (monc->hunting) {
|
||||
dout("%s continuing hunt\n", __func__);
|
||||
reopen_session(monc);
|
||||
} else {
|
||||
int is_auth = ceph_auth_is_authenticated(monc->auth);
|
||||
|
||||
dout("%s is_authed %d\n", __func__, is_auth);
|
||||
if (ceph_con_keepalive_expired(&monc->con,
|
||||
CEPH_MONC_PING_TIMEOUT)) {
|
||||
dout("monc keepalive timeout\n");
|
||||
@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
__schedule_delayed(monc);
|
||||
|
||||
out:
|
||||
mutex_unlock(&monc->mutex);
|
||||
}
|
||||
|
||||
@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init);
|
||||
void ceph_monc_stop(struct ceph_mon_client *monc)
|
||||
{
|
||||
dout("stop\n");
|
||||
cancel_delayed_work_sync(&monc->delayed_work);
|
||||
|
||||
mutex_lock(&monc->mutex);
|
||||
__close_session(monc);
|
||||
monc->hunting = false;
|
||||
monc->cur_mon = -1;
|
||||
mutex_unlock(&monc->mutex);
|
||||
|
||||
cancel_delayed_work_sync(&monc->delayed_work);
|
||||
|
||||
/*
|
||||
* flush msgr queue before we destroy ourselves to ensure that:
|
||||
* - any work that references our embedded con is finished.
|
||||
|
Loading…
Reference in New Issue
Block a user