1/* $NetBSD: linux_futex.c,v 1.37 2017/04/10 15:04:32 dholland Exp $ */
2
3/*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 * products derived from this software without specific prior written
19 * permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.37 2017/04/10 15:04:32 dholland Exp $");
36
37#include <sys/param.h>
38#include <sys/time.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/lwp.h>
42#include <sys/queue.h>
43#include <sys/condvar.h>
44#include <sys/mutex.h>
45#include <sys/kmem.h>
46#include <sys/kernel.h>
47#include <sys/atomic.h>
48
49#include <compat/linux/common/linux_types.h>
50#include <compat/linux/common/linux_emuldata.h>
51#include <compat/linux/common/linux_exec.h>
52#include <compat/linux/common/linux_signal.h>
53#include <compat/linux/common/linux_futex.h>
54#include <compat/linux/common/linux_sched.h>
55#include <compat/linux/common/linux_machdep.h>
56#include <compat/linux/linux_syscallargs.h>
57
58struct futex;
59
60struct waiting_proc {
61 struct futex *wp_futex;
62 kcondvar_t wp_futex_cv;
63 TAILQ_ENTRY(waiting_proc) wp_list;
64 bool wp_onlist;
65};
66struct futex {
67 void *f_uaddr;
68 int f_refcount;
69 uint32_t f_bitset;
70 LIST_ENTRY(futex) f_list;
71 TAILQ_HEAD(, waiting_proc) f_waiting_proc;
72};
73
74static LIST_HEAD(futex_list, futex) futex_list;
75static kmutex_t futex_lock;
76
77#define FUTEX_LOCK mutex_enter(&futex_lock)
78#define FUTEX_UNLOCK mutex_exit(&futex_lock)
79#define FUTEX_LOCKASSERT KASSERT(mutex_owned(&futex_lock))
80
81#define FUTEX_SYSTEM_LOCK KERNEL_LOCK(1, NULL)
82#define FUTEX_SYSTEM_UNLOCK KERNEL_UNLOCK_ONE(0)
83
84#ifdef DEBUG_LINUX_FUTEX
85int debug_futex = 1;
86#define FUTEXPRINTF(a) do { if (debug_futex) printf a; } while (0)
87#else
88#define FUTEXPRINTF(a)
89#endif
90
91void
92linux_futex_init(void)
93{
94 FUTEXPRINTF(("%s: initializing futex\n", __func__));
95 mutex_init(&futex_lock, MUTEX_DEFAULT, IPL_NONE);
96}
97
98void
99linux_futex_fini(void)
100{
101 FUTEXPRINTF(("%s: destroying futex\n", __func__));
102 mutex_destroy(&futex_lock);
103}
104
105static struct waiting_proc *futex_wp_alloc(void);
106static void futex_wp_free(struct waiting_proc *);
107static struct futex *futex_get(void *, uint32_t);
108static void futex_ref(struct futex *);
109static void futex_put(struct futex *);
110static int futex_sleep(struct futex **, lwp_t *, int, struct waiting_proc *);
111static int futex_wake(struct futex *, int, struct futex *, int);
112static int futex_atomic_op(lwp_t *, int, void *);
113
114int
115linux_sys_futex(struct lwp *l, const struct linux_sys_futex_args *uap,
116 register_t *retval)
117{
118 /* {
119 syscallarg(int *) uaddr;
120 syscallarg(int) op;
121 syscallarg(int) val;
122 syscallarg(const struct linux_timespec *) timeout;
123 syscallarg(int *) uaddr2;
124 syscallarg(int) val3;
125 } */
126 struct linux_timespec lts;
127 struct timespec ts = { 0, 0 };
128 int error;
129
130 if ((SCARG(uap, op) & LINUX_FUTEX_CMD_MASK) == LINUX_FUTEX_WAIT &&
131 SCARG(uap, timeout) != NULL) {
132 if ((error = copyin(SCARG(uap, timeout),
133 &lts, sizeof(lts))) != 0) {
134 return error;
135 }
136 linux_to_native_timespec(&ts, &lts);
137 }
138 return linux_do_futex(l, uap, &ts, retval);
139}
140
141/*
142 * Note: TS can't be const because ts2timo destroys it.
143 */
144int
145linux_do_futex(struct lwp *l, const struct linux_sys_futex_args *uap,
146 struct timespec *ts, register_t *retval)
147{
148 /* {
149 syscallarg(int *) uaddr;
150 syscallarg(int) op;
151 syscallarg(int) val;
152 syscallarg(const struct linux_timespec *) timeout;
153 syscallarg(int *) uaddr2;
154 syscallarg(int) val3;
155 } */
156 int val, val3;
157 int ret;
158 int error = 0;
159 struct futex *f;
160 struct futex *newf;
161 int tout;
162 struct futex *f2;
163 struct waiting_proc *wp;
164 int op_ret, cmd;
165 clockid_t clk;
166
167 cmd = SCARG(uap, op) & LINUX_FUTEX_CMD_MASK;
168 val3 = SCARG(uap, val3);
169
170 if (SCARG(uap, op) & LINUX_FUTEX_CLOCK_REALTIME) {
171 switch (cmd) {
172 case LINUX_FUTEX_WAIT_BITSET:
173 case LINUX_FUTEX_WAIT:
174 clk = CLOCK_REALTIME;
175 break;
176 default:
177 return ENOSYS;
178 }
179 } else
180 clk = CLOCK_MONOTONIC;
181
182 /*
183 * Our implementation provides only private futexes. Most of the apps
184 * should use private futexes but don't claim so. Therefore we treat
185 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
186 * in most cases (ie. when futexes are not shared on file descriptor
187 * or between different processes).
188 *
189 * Note that we don't handle bitsets at all at the moment. We need
190 * to move from refcounting uaddr's to handling multiple futex entries
191 * pointing to the same uaddr, but having possibly different bitmask.
192 * Perhaps move to an implementation where each uaddr has a list of
193 * futexes.
194 */
195 switch (cmd) {
196 case LINUX_FUTEX_WAIT:
197 val3 = FUTEX_BITSET_MATCH_ANY;
198 /*FALLTHROUGH*/
199 case LINUX_FUTEX_WAIT_BITSET:
200 if ((error = ts2timo(clk, 0, ts, &tout, NULL)) != 0) {
201 if (error != ETIMEDOUT)
202 return error;
203 /*
204 * If the user process requests a non null
205 * timeout, make sure we do not turn it into
206 * an infinite timeout because tout is 0.
207 *
208 * We use a minimal timeout of 1/hz. Maybe it
209 * would make sense to just return ETIMEDOUT
210 * without sleeping.
211 */
212 if (SCARG(uap, timeout) != NULL)
213 tout = 1;
214 else
215 tout = 0;
216 }
217 FUTEX_SYSTEM_LOCK;
218 if ((error = copyin(SCARG(uap, uaddr),
219 &val, sizeof(val))) != 0) {
220 FUTEX_SYSTEM_UNLOCK;
221 return error;
222 }
223
224 if (val != SCARG(uap, val)) {
225 FUTEX_SYSTEM_UNLOCK;
226 return EWOULDBLOCK;
227 }
228
229 FUTEXPRINTF(("FUTEX_WAIT %d.%d: val = %d, uaddr = %p, "
230 "*uaddr = %d, timeout = %lld.%09ld\n",
231 l->l_proc->p_pid, l->l_lid, SCARG(uap, val),
232 SCARG(uap, uaddr), val, (long long)ts->tv_sec,
233 ts->tv_nsec));
234
235
236 wp = futex_wp_alloc();
237 FUTEX_LOCK;
238 f = futex_get(SCARG(uap, uaddr), val3);
239 ret = futex_sleep(&f, l, tout, wp);
240 futex_put(f);
241 FUTEX_UNLOCK;
242 futex_wp_free(wp);
243
244 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, "
245 "ret = %d\n", l->l_proc->p_pid, l->l_lid,
246 SCARG(uap, uaddr), ret));
247
248 FUTEX_SYSTEM_UNLOCK;
249 switch (ret) {
250 case EWOULDBLOCK: /* timeout */
251 return ETIMEDOUT;
252 break;
253 case EINTR: /* signal */
254 return EINTR;
255 break;
256 case 0: /* FUTEX_WAKE received */
257 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, got it\n",
258 l->l_proc->p_pid, l->l_lid, SCARG(uap, uaddr)));
259 return 0;
260 break;
261 default:
262 FUTEXPRINTF(("FUTEX_WAIT: unexpected ret = %d\n", ret));
263 break;
264 }
265
266 /* NOTREACHED */
267 break;
268
269 case LINUX_FUTEX_WAKE:
270 val = FUTEX_BITSET_MATCH_ANY;
271 /*FALLTHROUGH*/
272 case LINUX_FUTEX_WAKE_BITSET:
273 /*
274 * XXX: Linux is able cope with different addresses
275 * corresponding to the same mapped memory in the sleeping
276 * and the waker process(es).
277 */
278 FUTEXPRINTF(("FUTEX_WAKE %d.%d: uaddr = %p, val = %d\n",
279 l->l_proc->p_pid, l->l_lid,
280 SCARG(uap, uaddr), SCARG(uap, val)));
281
282 FUTEX_SYSTEM_LOCK;
283 FUTEX_LOCK;
284 f = futex_get(SCARG(uap, uaddr), val3);
285 *retval = futex_wake(f, SCARG(uap, val), NULL, 0);
286 futex_put(f);
287 FUTEX_UNLOCK;
288 FUTEX_SYSTEM_UNLOCK;
289
290 break;
291
292 case LINUX_FUTEX_CMP_REQUEUE:
293 FUTEX_SYSTEM_LOCK;
294
295 if ((error = copyin(SCARG(uap, uaddr),
296 &val, sizeof(val))) != 0) {
297 FUTEX_SYSTEM_UNLOCK;
298 return error;
299 }
300
301 if (val != val3) {
302 FUTEX_SYSTEM_UNLOCK;
303 return EAGAIN;
304 }
305
306 FUTEXPRINTF(("FUTEX_CMP_REQUEUE %d.%d: uaddr = %p, val = %d, "
307 "uaddr2 = %p, val2 = %d\n",
308 l->l_proc->p_pid, l->l_lid,
309 SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2),
310 (int)(unsigned long)SCARG(uap, timeout)));
311
312 FUTEX_LOCK;
313 f = futex_get(SCARG(uap, uaddr), val3);
314 newf = futex_get(SCARG(uap, uaddr2), val3);
315 *retval = futex_wake(f, SCARG(uap, val), newf,
316 (int)(unsigned long)SCARG(uap, timeout));
317 futex_put(f);
318 futex_put(newf);
319 FUTEX_UNLOCK;
320
321 FUTEX_SYSTEM_UNLOCK;
322 break;
323
324 case LINUX_FUTEX_REQUEUE:
325 FUTEX_SYSTEM_LOCK;
326
327 FUTEXPRINTF(("FUTEX_REQUEUE %d.%d: uaddr = %p, val = %d, "
328 "uaddr2 = %p, val2 = %d\n",
329 l->l_proc->p_pid, l->l_lid,
330 SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2),
331 (int)(unsigned long)SCARG(uap, timeout)));
332
333 FUTEX_LOCK;
334 f = futex_get(SCARG(uap, uaddr), val3);
335 newf = futex_get(SCARG(uap, uaddr2), val3);
336 *retval = futex_wake(f, SCARG(uap, val), newf,
337 (int)(unsigned long)SCARG(uap, timeout));
338 futex_put(f);
339 futex_put(newf);
340 FUTEX_UNLOCK;
341
342 FUTEX_SYSTEM_UNLOCK;
343 break;
344
345 case LINUX_FUTEX_FD:
346 FUTEXPRINTF(("%s: unimplemented op %d\n", __func__, cmd));
347 return ENOSYS;
348 case LINUX_FUTEX_WAKE_OP:
349 FUTEX_SYSTEM_LOCK;
350
351 FUTEXPRINTF(("FUTEX_WAKE_OP %d.%d: uaddr = %p, op = %d, "
352 "val = %d, uaddr2 = %p, val2 = %d\n",
353 l->l_proc->p_pid, l->l_lid,
354 SCARG(uap, uaddr), cmd, SCARG(uap, val),
355 SCARG(uap, uaddr2),
356 (int)(unsigned long)SCARG(uap, timeout)));
357
358 FUTEX_LOCK;
359 f = futex_get(SCARG(uap, uaddr), val3);
360 f2 = futex_get(SCARG(uap, uaddr2), val3);
361 FUTEX_UNLOCK;
362
363 /*
364 * This function returns positive number as results and
365 * negative as errors
366 */
367 op_ret = futex_atomic_op(l, val3, SCARG(uap, uaddr2));
368 FUTEX_LOCK;
369 if (op_ret < 0) {
370 futex_put(f);
371 futex_put(f2);
372 FUTEX_UNLOCK;
373 FUTEX_SYSTEM_UNLOCK;
374 return -op_ret;
375 }
376
377 ret = futex_wake(f, SCARG(uap, val), NULL, 0);
378 futex_put(f);
379 if (op_ret > 0) {
380 op_ret = 0;
381 /*
382 * Linux abuses the address of the timespec parameter
383 * as the number of retries
384 */
385 op_ret += futex_wake(f2,
386 (int)(unsigned long)SCARG(uap, timeout), NULL, 0);
387 ret += op_ret;
388 }
389 futex_put(f2);
390 FUTEX_UNLOCK;
391 FUTEX_SYSTEM_UNLOCK;
392 *retval = ret;
393 break;
394 default:
395 FUTEXPRINTF(("%s: unknown op %d\n", __func__, cmd));
396 return ENOSYS;
397 }
398 return 0;
399}
400
401static struct waiting_proc *
402futex_wp_alloc(void)
403{
404 struct waiting_proc *wp;
405
406 wp = kmem_zalloc(sizeof(*wp), KM_SLEEP);
407 cv_init(&wp->wp_futex_cv, "futex");
408 return wp;
409}
410
411static void
412futex_wp_free(struct waiting_proc *wp)
413{
414
415 cv_destroy(&wp->wp_futex_cv);
416 kmem_free(wp, sizeof(*wp));
417}
418
419static struct futex *
420futex_get(void *uaddr, uint32_t bitset)
421{
422 struct futex *f;
423
424 FUTEX_LOCKASSERT;
425
426 LIST_FOREACH(f, &futex_list, f_list) {
427 if (f->f_uaddr == uaddr) {
428 f->f_refcount++;
429 return f;
430 }
431 }
432
433 /* Not found, create it */
434 f = kmem_zalloc(sizeof(*f), KM_SLEEP);
435 f->f_uaddr = uaddr;
436 f->f_bitset = bitset;
437 f->f_refcount = 1;
438 TAILQ_INIT(&f->f_waiting_proc);
439 LIST_INSERT_HEAD(&futex_list, f, f_list);
440
441 return f;
442}
443
444static void
445futex_ref(struct futex *f)
446{
447
448 FUTEX_LOCKASSERT;
449
450 f->f_refcount++;
451}
452
453static void
454futex_put(struct futex *f)
455{
456
457 FUTEX_LOCKASSERT;
458
459 f->f_refcount--;
460 if (f->f_refcount == 0) {
461 KASSERT(TAILQ_EMPTY(&f->f_waiting_proc));
462 LIST_REMOVE(f, f_list);
463 kmem_free(f, sizeof(*f));
464 }
465}
466
467static int
468futex_sleep(struct futex **fp, lwp_t *l, int timeout, struct waiting_proc *wp)
469{
470 struct futex *f;
471 int ret;
472
473 FUTEX_LOCKASSERT;
474
475 f = *fp;
476 wp->wp_futex = f;
477 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list);
478 wp->wp_onlist = true;
479 ret = cv_timedwait_sig(&wp->wp_futex_cv, &futex_lock, timeout);
480
481 /*
482 * we may have been requeued to a different futex before we were
483 * woken up, so let the caller know which futex to put. if we were
484 * woken by futex_wake() then it took us off the waiting list,
485 * but if our sleep was interrupted or timed out then we might
486 * need to take ourselves off the waiting list.
487 */
488
489 f = wp->wp_futex;
490 if (wp->wp_onlist) {
491 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
492 }
493 *fp = f;
494 return ret;
495}
496
497static int
498futex_wake(struct futex *f, int n, struct futex *newf, int n2)
499{
500 struct waiting_proc *wp;
501 int count = 0;
502
503 FUTEX_LOCKASSERT;
504
505 /*
506 * wake up up to n threads waiting on this futex.
507 */
508
509 while (n--) {
510 wp = TAILQ_FIRST(&f->f_waiting_proc);
511 if (wp == NULL)
512 return count;
513
514 KASSERT(f == wp->wp_futex);
515 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
516 wp->wp_onlist = false;
517 cv_signal(&wp->wp_futex_cv);
518 count++;
519 }
520 if (newf == NULL)
521 return count;
522
523 /*
524 * then requeue up to n2 additional threads to newf
525 * (without waking them up).
526 */
527
528 while (n2--) {
529 wp = TAILQ_FIRST(&f->f_waiting_proc);
530 if (wp == NULL)
531 return count;
532
533 KASSERT(f == wp->wp_futex);
534 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
535 futex_put(f);
536
537 wp->wp_futex = newf;
538 futex_ref(newf);
539 TAILQ_INSERT_TAIL(&newf->f_waiting_proc, wp, wp_list);
540 count++;
541 }
542 return count;
543}
544
545static int
546futex_atomic_op(lwp_t *l, int encoded_op, void *uaddr)
547{
548 const int op = (encoded_op >> 28) & 7;
549 const int cmp = (encoded_op >> 24) & 15;
550 const int cmparg = (encoded_op << 20) >> 20;
551 int oparg = (encoded_op << 8) >> 20;
552 int error, oldval, cval;
553
554 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
555 oparg = 1 << oparg;
556
557 /* XXX: linux verifies access here and returns EFAULT */
558
559 if (copyin(uaddr, &cval, sizeof(int)) != 0)
560 return -EFAULT;
561
562 for (;;) {
563 int nval;
564
565 switch (op) {
566 case FUTEX_OP_SET:
567 nval = oparg;
568 break;
569 case FUTEX_OP_ADD:
570 nval = cval + oparg;
571 break;
572 case FUTEX_OP_OR:
573 nval = cval | oparg;
574 break;
575 case FUTEX_OP_ANDN:
576 nval = cval & ~oparg;
577 break;
578 case FUTEX_OP_XOR:
579 nval = cval ^ oparg;
580 break;
581 default:
582 return -ENOSYS;
583 }
584
585 error = ucas_int(uaddr, cval, nval, &oldval);
586 if (error || oldval == cval) {
587 break;
588 }
589 cval = oldval;
590 }
591
592 if (error)
593 return -EFAULT;
594
595 switch (cmp) {
596 case FUTEX_OP_CMP_EQ:
597 return (oldval == cmparg);
598 case FUTEX_OP_CMP_NE:
599 return (oldval != cmparg);
600 case FUTEX_OP_CMP_LT:
601 return (oldval < cmparg);
602 case FUTEX_OP_CMP_GE:
603 return (oldval >= cmparg);
604 case FUTEX_OP_CMP_LE:
605 return (oldval <= cmparg);
606 case FUTEX_OP_CMP_GT:
607 return (oldval > cmparg);
608 default:
609 return -ENOSYS;
610 }
611}
612
613int
614linux_sys_set_robust_list(struct lwp *l,
615 const struct linux_sys_set_robust_list_args *uap, register_t *retval)
616{
617 /* {
618 syscallarg(struct linux_robust_list_head *) head;
619 syscallarg(size_t) len;
620 } */
621 struct linux_emuldata *led;
622
623 if (SCARG(uap, len) != sizeof(struct linux_robust_list_head))
624 return EINVAL;
625 led = l->l_emuldata;
626 led->led_robust_head = SCARG(uap, head);
627 *retval = 0;
628 return 0;
629}
630
631int
632linux_sys_get_robust_list(struct lwp *l,
633 const struct linux_sys_get_robust_list_args *uap, register_t *retval)
634{
635 /* {
636 syscallarg(int) pid;
637 syscallarg(struct linux_robust_list_head **) head;
638 syscallarg(size_t *) len;
639 } */
640 struct proc *p;
641 struct linux_emuldata *led;
642 struct linux_robust_list_head *head;
643 size_t len;
644 int error = 0;
645
646 p = l->l_proc;
647 if (!SCARG(uap, pid)) {
648 led = l->l_emuldata;
649 head = led->led_robust_head;
650 } else {
651 mutex_enter(p->p_lock);
652 l = lwp_find(p, SCARG(uap, pid));
653 if (l != NULL) {
654 led = l->l_emuldata;
655 head = led->led_robust_head;
656 }
657 mutex_exit(p->p_lock);
658 if (l == NULL) {
659 return ESRCH;
660 }
661 }
662#ifdef __arch64__
663 if (p->p_flag & PK_32) {
664 uint32_t u32;
665
666 u32 = 12;
667 error = copyout(&u32, SCARG(uap, len), sizeof(u32));
668 if (error)
669 return error;
670 u32 = (uint32_t)(uintptr_t)head;
671 return copyout(&u32, SCARG(uap, head), sizeof(u32));
672 }
673#endif
674
675 len = sizeof(*head);
676 error = copyout(&len, SCARG(uap, len), sizeof(len));
677 if (error)
678 return error;
679 return copyout(&head, SCARG(uap, head), sizeof(head));
680}
681
682static int
683handle_futex_death(void *uaddr, pid_t pid, int pi)
684{
685 int uval, nval, mval;
686 struct futex *f;
687
688retry:
689 if (copyin(uaddr, &uval, sizeof(uval)))
690 return EFAULT;
691
692 if ((uval & FUTEX_TID_MASK) == pid) {
693 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
694 nval = atomic_cas_32(uaddr, uval, mval);
695
696 if (nval == -1)
697 return EFAULT;
698
699 if (nval != uval)
700 goto retry;
701
702 if (!pi && (uval & FUTEX_WAITERS)) {
703 FUTEX_LOCK;
704 f = futex_get(uaddr, FUTEX_BITSET_MATCH_ANY);
705 futex_wake(f, 1, NULL, 0);
706 FUTEX_UNLOCK;
707 }
708 }
709
710 return 0;
711}
712
713static int
714fetch_robust_entry(struct lwp *l, struct linux_robust_list **entry,
715 struct linux_robust_list **head, int *pi)
716{
717 unsigned long uentry;
718
719#ifdef __arch64__
720 if (l->l_proc->p_flag & PK_32) {
721 uint32_t u32;
722
723 if (copyin(head, &u32, sizeof(u32)))
724 return EFAULT;
725 uentry = (unsigned long)u32;
726 } else
727#endif
728 if (copyin(head, &uentry, sizeof(uentry)))
729 return EFAULT;
730
731 *entry = (void *)(uentry & ~1UL);
732 *pi = uentry & 1;
733
734 return 0;
735}
736
737/* This walks the list of robust futexes, releasing them. */
738void
739release_futexes(struct lwp *l)
740{
741 struct linux_robust_list_head head;
742 struct linux_robust_list *entry, *next_entry = NULL, *pending;
743 unsigned int limit = 2048, pi, next_pi, pip;
744 struct linux_emuldata *led;
745 unsigned long futex_offset;
746 int rc;
747
748 led = l->l_emuldata;
749 if (led->led_robust_head == NULL)
750 return;
751
752#ifdef __arch64__
753 if (l->l_proc->p_flag & PK_32) {
754 uint32_t u32s[3];
755
756 if (copyin(led->led_robust_head, u32s, sizeof(u32s)))
757 return;
758
759 head.list.next = (void *)(uintptr_t)u32s[0];
760 head.futex_offset = (unsigned long)u32s[1];
761 head.pending_list = (void *)(uintptr_t)u32s[2];
762 } else
763#endif
764 if (copyin(led->led_robust_head, &head, sizeof(head)))
765 return;
766
767 if (fetch_robust_entry(l, &entry, &head.list.next, &pi))
768 return;
769
770#ifdef __arch64__
771 if (l->l_proc->p_flag & PK_32) {
772 uint32_t u32;
773
774 if (copyin(led->led_robust_head, &u32, sizeof(u32)))
775 return;
776
777 head.futex_offset = (unsigned long)u32;
778 futex_offset = head.futex_offset;
779 } else
780#endif
781 if (copyin(&head.futex_offset, &futex_offset, sizeof(unsigned long)))
782 return;
783
784 if (fetch_robust_entry(l, &pending, &head.pending_list, &pip))
785 return;
786
787 while (entry != &head.list) {
788 rc = fetch_robust_entry(l, &next_entry, &entry->next, &next_pi);
789
790 if (entry != pending)
791 if (handle_futex_death((char *)entry + futex_offset,
792 l->l_lid, pi))
793 return;
794
795 if (rc)
796 return;
797
798 entry = next_entry;
799 pi = next_pi;
800
801 if (!--limit)
802 break;
803
804 yield(); /* XXX why? */
805 }
806
807 if (pending)
808 handle_futex_death((char *)pending + futex_offset,
809 l->l_lid, pip);
810}
811