getrusage: use sig->stats_lock rather than lock_task_sighand()
authorOleg Nesterov <oleg@redhat.com>
Mon, 22 Jan 2024 15:50:53 +0000 (16:50 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 8 Feb 2024 05:20:32 +0000 (21:20 -0800)
lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
getrusage() at the same time and the process has NR_THREADS, spin_lock_irq
will spin with irqs disabled O(NR_CPUS * NR_THREADS) time.

Change getrusage() to use sig->stats_lock, it was specifically designed
for this type of use. This way it runs lockless in the likely case.

TODO:
- Change do_task_stat() to use sig->stats_lock too, then we can
  remove spin_lock_irq(siglock) in wait_task_zombie().

- Turn sig->stats_lock into seqcount_rwlock_t, this way the
  readers in the slow mode won't exclude each other. See
  https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/

- stats_lock has to disable irqs because ->siglock can be taken
  in irq context, it would be very nice to change __exit_signal()
  to avoid the siglock->stats_lock dependency.

Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Dylan Hatch <dylanbhatch@google.com>
Tested-by: Dylan Hatch <dylanbhatch@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
kernel/sys.c

index 70ad06ad852e5940bc46fc25225e701366cd86c7..f8e543f1e38a06dc3a4aa2f777c7e88d444e5565 100644 (file)
@@ -1788,7 +1788,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
        unsigned long maxrss;
        struct mm_struct *mm;
        struct signal_struct *sig = p->signal;
+       unsigned int seq = 0;
 
+retry:
        memset(r, 0, sizeof(*r));
        utime = stime = 0;
        maxrss = 0;
@@ -1800,8 +1802,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
                goto out_thread;
        }
 
-       if (!lock_task_sighand(p, &flags))
-               return;
+       flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
 
        switch (who) {
        case RUSAGE_BOTH:
@@ -1829,14 +1830,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
                r->ru_oublock += sig->oublock;
                if (maxrss < sig->maxrss)
                        maxrss = sig->maxrss;
+
+               rcu_read_lock();
                __for_each_thread(sig, t)
                        accumulate_thread_rusage(t, r);
+               rcu_read_unlock();
+
                break;
 
        default:
                BUG();
        }
-       unlock_task_sighand(p, &flags);
+
+       if (need_seqretry(&sig->stats_lock, seq)) {
+               seq = 1;
+               goto retry;
+       }
+       done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
 
        if (who == RUSAGE_CHILDREN)
                goto out_children;