1 | /* $NetBSD: linux_futex.c,v 1.37 2017/04/10 15:04:32 dholland Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * 3. All advertising materials mentioning features or use of this software |
15 | * must display the following acknowledgement: |
16 | * This product includes software developed by Emmanuel Dreyfus |
17 | * 4. The name of the author may not be used to endorse or promote |
18 | * products derived from this software without specific prior written |
19 | * permission. |
20 | * |
21 | * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' |
22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
23 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS |
25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
28 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
29 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
31 | * POSSIBILITY OF SUCH DAMAGE. |
32 | */ |
33 | |
34 | #include <sys/cdefs.h> |
35 | __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.37 2017/04/10 15:04:32 dholland Exp $" ); |
36 | |
37 | #include <sys/param.h> |
38 | #include <sys/time.h> |
39 | #include <sys/systm.h> |
40 | #include <sys/proc.h> |
41 | #include <sys/lwp.h> |
42 | #include <sys/queue.h> |
43 | #include <sys/condvar.h> |
44 | #include <sys/mutex.h> |
45 | #include <sys/kmem.h> |
46 | #include <sys/kernel.h> |
47 | #include <sys/atomic.h> |
48 | |
49 | #include <compat/linux/common/linux_types.h> |
50 | #include <compat/linux/common/linux_emuldata.h> |
51 | #include <compat/linux/common/linux_exec.h> |
52 | #include <compat/linux/common/linux_signal.h> |
53 | #include <compat/linux/common/linux_futex.h> |
54 | #include <compat/linux/common/linux_sched.h> |
55 | #include <compat/linux/common/linux_machdep.h> |
56 | #include <compat/linux/linux_syscallargs.h> |
57 | |
58 | struct futex; |
59 | |
60 | struct waiting_proc { |
61 | struct futex *wp_futex; |
62 | kcondvar_t wp_futex_cv; |
63 | TAILQ_ENTRY(waiting_proc) wp_list; |
64 | bool wp_onlist; |
65 | }; |
66 | struct futex { |
67 | void *f_uaddr; |
68 | int f_refcount; |
69 | uint32_t f_bitset; |
70 | LIST_ENTRY(futex) f_list; |
71 | TAILQ_HEAD(, waiting_proc) f_waiting_proc; |
72 | }; |
73 | |
74 | static LIST_HEAD(futex_list, futex) futex_list; |
75 | static kmutex_t futex_lock; |
76 | |
77 | #define FUTEX_LOCK mutex_enter(&futex_lock) |
78 | #define FUTEX_UNLOCK mutex_exit(&futex_lock) |
79 | #define FUTEX_LOCKASSERT KASSERT(mutex_owned(&futex_lock)) |
80 | |
81 | #define FUTEX_SYSTEM_LOCK KERNEL_LOCK(1, NULL) |
82 | #define FUTEX_SYSTEM_UNLOCK KERNEL_UNLOCK_ONE(0) |
83 | |
84 | #ifdef DEBUG_LINUX_FUTEX |
85 | int debug_futex = 1; |
86 | #define FUTEXPRINTF(a) do { if (debug_futex) printf a; } while (0) |
87 | #else |
88 | #define FUTEXPRINTF(a) |
89 | #endif |
90 | |
91 | void |
92 | linux_futex_init(void) |
93 | { |
94 | FUTEXPRINTF(("%s: initializing futex\n" , __func__)); |
95 | mutex_init(&futex_lock, MUTEX_DEFAULT, IPL_NONE); |
96 | } |
97 | |
98 | void |
99 | linux_futex_fini(void) |
100 | { |
101 | FUTEXPRINTF(("%s: destroying futex\n" , __func__)); |
102 | mutex_destroy(&futex_lock); |
103 | } |
104 | |
105 | static struct waiting_proc *futex_wp_alloc(void); |
106 | static void futex_wp_free(struct waiting_proc *); |
107 | static struct futex *futex_get(void *, uint32_t); |
108 | static void futex_ref(struct futex *); |
109 | static void futex_put(struct futex *); |
110 | static int futex_sleep(struct futex **, lwp_t *, int, struct waiting_proc *); |
111 | static int futex_wake(struct futex *, int, struct futex *, int); |
112 | static int futex_atomic_op(lwp_t *, int, void *); |
113 | |
114 | int |
115 | linux_sys_futex(struct lwp *l, const struct linux_sys_futex_args *uap, |
116 | register_t *retval) |
117 | { |
118 | /* { |
119 | syscallarg(int *) uaddr; |
120 | syscallarg(int) op; |
121 | syscallarg(int) val; |
122 | syscallarg(const struct linux_timespec *) timeout; |
123 | syscallarg(int *) uaddr2; |
124 | syscallarg(int) val3; |
125 | } */ |
126 | struct linux_timespec lts; |
127 | struct timespec ts = { 0, 0 }; |
128 | int error; |
129 | |
130 | if ((SCARG(uap, op) & LINUX_FUTEX_CMD_MASK) == LINUX_FUTEX_WAIT && |
131 | SCARG(uap, timeout) != NULL) { |
132 | if ((error = copyin(SCARG(uap, timeout), |
133 | <s, sizeof(lts))) != 0) { |
134 | return error; |
135 | } |
136 | linux_to_native_timespec(&ts, <s); |
137 | } |
138 | return linux_do_futex(l, uap, &ts, retval); |
139 | } |
140 | |
141 | /* |
142 | * Note: TS can't be const because ts2timo destroys it. |
143 | */ |
144 | int |
145 | linux_do_futex(struct lwp *l, const struct linux_sys_futex_args *uap, |
146 | struct timespec *ts, register_t *retval) |
147 | { |
148 | /* { |
149 | syscallarg(int *) uaddr; |
150 | syscallarg(int) op; |
151 | syscallarg(int) val; |
152 | syscallarg(const struct linux_timespec *) timeout; |
153 | syscallarg(int *) uaddr2; |
154 | syscallarg(int) val3; |
155 | } */ |
156 | int val, val3; |
157 | int ret; |
158 | int error = 0; |
159 | struct futex *f; |
160 | struct futex *newf; |
161 | int tout; |
162 | struct futex *f2; |
163 | struct waiting_proc *wp; |
164 | int op_ret, cmd; |
165 | clockid_t clk; |
166 | |
167 | cmd = SCARG(uap, op) & LINUX_FUTEX_CMD_MASK; |
168 | val3 = SCARG(uap, val3); |
169 | |
170 | if (SCARG(uap, op) & LINUX_FUTEX_CLOCK_REALTIME) { |
171 | switch (cmd) { |
172 | case LINUX_FUTEX_WAIT_BITSET: |
173 | case LINUX_FUTEX_WAIT: |
174 | clk = CLOCK_REALTIME; |
175 | break; |
176 | default: |
177 | return ENOSYS; |
178 | } |
179 | } else |
180 | clk = CLOCK_MONOTONIC; |
181 | |
182 | /* |
183 | * Our implementation provides only private futexes. Most of the apps |
184 | * should use private futexes but don't claim so. Therefore we treat |
185 | * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works |
186 | * in most cases (ie. when futexes are not shared on file descriptor |
187 | * or between different processes). |
188 | * |
189 | * Note that we don't handle bitsets at all at the moment. We need |
190 | * to move from refcounting uaddr's to handling multiple futex entries |
191 | * pointing to the same uaddr, but having possibly different bitmask. |
192 | * Perhaps move to an implementation where each uaddr has a list of |
193 | * futexes. |
194 | */ |
195 | switch (cmd) { |
196 | case LINUX_FUTEX_WAIT: |
197 | val3 = FUTEX_BITSET_MATCH_ANY; |
198 | /*FALLTHROUGH*/ |
199 | case LINUX_FUTEX_WAIT_BITSET: |
200 | if ((error = ts2timo(clk, 0, ts, &tout, NULL)) != 0) { |
201 | if (error != ETIMEDOUT) |
202 | return error; |
203 | /* |
204 | * If the user process requests a non null |
205 | * timeout, make sure we do not turn it into |
206 | * an infinite timeout because tout is 0. |
207 | * |
208 | * We use a minimal timeout of 1/hz. Maybe it |
209 | * would make sense to just return ETIMEDOUT |
210 | * without sleeping. |
211 | */ |
212 | if (SCARG(uap, timeout) != NULL) |
213 | tout = 1; |
214 | else |
215 | tout = 0; |
216 | } |
217 | FUTEX_SYSTEM_LOCK; |
218 | if ((error = copyin(SCARG(uap, uaddr), |
219 | &val, sizeof(val))) != 0) { |
220 | FUTEX_SYSTEM_UNLOCK; |
221 | return error; |
222 | } |
223 | |
224 | if (val != SCARG(uap, val)) { |
225 | FUTEX_SYSTEM_UNLOCK; |
226 | return EWOULDBLOCK; |
227 | } |
228 | |
229 | FUTEXPRINTF(("FUTEX_WAIT %d.%d: val = %d, uaddr = %p, " |
230 | "*uaddr = %d, timeout = %lld.%09ld\n" , |
231 | l->l_proc->p_pid, l->l_lid, SCARG(uap, val), |
232 | SCARG(uap, uaddr), val, (long long)ts->tv_sec, |
233 | ts->tv_nsec)); |
234 | |
235 | |
236 | wp = futex_wp_alloc(); |
237 | FUTEX_LOCK; |
238 | f = futex_get(SCARG(uap, uaddr), val3); |
239 | ret = futex_sleep(&f, l, tout, wp); |
240 | futex_put(f); |
241 | FUTEX_UNLOCK; |
242 | futex_wp_free(wp); |
243 | |
244 | FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, " |
245 | "ret = %d\n" , l->l_proc->p_pid, l->l_lid, |
246 | SCARG(uap, uaddr), ret)); |
247 | |
248 | FUTEX_SYSTEM_UNLOCK; |
249 | switch (ret) { |
250 | case EWOULDBLOCK: /* timeout */ |
251 | return ETIMEDOUT; |
252 | break; |
253 | case EINTR: /* signal */ |
254 | return EINTR; |
255 | break; |
256 | case 0: /* FUTEX_WAKE received */ |
257 | FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, got it\n" , |
258 | l->l_proc->p_pid, l->l_lid, SCARG(uap, uaddr))); |
259 | return 0; |
260 | break; |
261 | default: |
262 | FUTEXPRINTF(("FUTEX_WAIT: unexpected ret = %d\n" , ret)); |
263 | break; |
264 | } |
265 | |
266 | /* NOTREACHED */ |
267 | break; |
268 | |
269 | case LINUX_FUTEX_WAKE: |
270 | val = FUTEX_BITSET_MATCH_ANY; |
271 | /*FALLTHROUGH*/ |
272 | case LINUX_FUTEX_WAKE_BITSET: |
273 | /* |
274 | * XXX: Linux is able cope with different addresses |
275 | * corresponding to the same mapped memory in the sleeping |
276 | * and the waker process(es). |
277 | */ |
278 | FUTEXPRINTF(("FUTEX_WAKE %d.%d: uaddr = %p, val = %d\n" , |
279 | l->l_proc->p_pid, l->l_lid, |
280 | SCARG(uap, uaddr), SCARG(uap, val))); |
281 | |
282 | FUTEX_SYSTEM_LOCK; |
283 | FUTEX_LOCK; |
284 | f = futex_get(SCARG(uap, uaddr), val3); |
285 | *retval = futex_wake(f, SCARG(uap, val), NULL, 0); |
286 | futex_put(f); |
287 | FUTEX_UNLOCK; |
288 | FUTEX_SYSTEM_UNLOCK; |
289 | |
290 | break; |
291 | |
292 | case LINUX_FUTEX_CMP_REQUEUE: |
293 | FUTEX_SYSTEM_LOCK; |
294 | |
295 | if ((error = copyin(SCARG(uap, uaddr), |
296 | &val, sizeof(val))) != 0) { |
297 | FUTEX_SYSTEM_UNLOCK; |
298 | return error; |
299 | } |
300 | |
301 | if (val != val3) { |
302 | FUTEX_SYSTEM_UNLOCK; |
303 | return EAGAIN; |
304 | } |
305 | |
306 | FUTEXPRINTF(("FUTEX_CMP_REQUEUE %d.%d: uaddr = %p, val = %d, " |
307 | "uaddr2 = %p, val2 = %d\n" , |
308 | l->l_proc->p_pid, l->l_lid, |
309 | SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2), |
310 | (int)(unsigned long)SCARG(uap, timeout))); |
311 | |
312 | FUTEX_LOCK; |
313 | f = futex_get(SCARG(uap, uaddr), val3); |
314 | newf = futex_get(SCARG(uap, uaddr2), val3); |
315 | *retval = futex_wake(f, SCARG(uap, val), newf, |
316 | (int)(unsigned long)SCARG(uap, timeout)); |
317 | futex_put(f); |
318 | futex_put(newf); |
319 | FUTEX_UNLOCK; |
320 | |
321 | FUTEX_SYSTEM_UNLOCK; |
322 | break; |
323 | |
324 | case LINUX_FUTEX_REQUEUE: |
325 | FUTEX_SYSTEM_LOCK; |
326 | |
327 | FUTEXPRINTF(("FUTEX_REQUEUE %d.%d: uaddr = %p, val = %d, " |
328 | "uaddr2 = %p, val2 = %d\n" , |
329 | l->l_proc->p_pid, l->l_lid, |
330 | SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2), |
331 | (int)(unsigned long)SCARG(uap, timeout))); |
332 | |
333 | FUTEX_LOCK; |
334 | f = futex_get(SCARG(uap, uaddr), val3); |
335 | newf = futex_get(SCARG(uap, uaddr2), val3); |
336 | *retval = futex_wake(f, SCARG(uap, val), newf, |
337 | (int)(unsigned long)SCARG(uap, timeout)); |
338 | futex_put(f); |
339 | futex_put(newf); |
340 | FUTEX_UNLOCK; |
341 | |
342 | FUTEX_SYSTEM_UNLOCK; |
343 | break; |
344 | |
345 | case LINUX_FUTEX_FD: |
346 | FUTEXPRINTF(("%s: unimplemented op %d\n" , __func__, cmd)); |
347 | return ENOSYS; |
348 | case LINUX_FUTEX_WAKE_OP: |
349 | FUTEX_SYSTEM_LOCK; |
350 | |
351 | FUTEXPRINTF(("FUTEX_WAKE_OP %d.%d: uaddr = %p, op = %d, " |
352 | "val = %d, uaddr2 = %p, val2 = %d\n" , |
353 | l->l_proc->p_pid, l->l_lid, |
354 | SCARG(uap, uaddr), cmd, SCARG(uap, val), |
355 | SCARG(uap, uaddr2), |
356 | (int)(unsigned long)SCARG(uap, timeout))); |
357 | |
358 | FUTEX_LOCK; |
359 | f = futex_get(SCARG(uap, uaddr), val3); |
360 | f2 = futex_get(SCARG(uap, uaddr2), val3); |
361 | FUTEX_UNLOCK; |
362 | |
363 | /* |
364 | * This function returns positive number as results and |
365 | * negative as errors |
366 | */ |
367 | op_ret = futex_atomic_op(l, val3, SCARG(uap, uaddr2)); |
368 | FUTEX_LOCK; |
369 | if (op_ret < 0) { |
370 | futex_put(f); |
371 | futex_put(f2); |
372 | FUTEX_UNLOCK; |
373 | FUTEX_SYSTEM_UNLOCK; |
374 | return -op_ret; |
375 | } |
376 | |
377 | ret = futex_wake(f, SCARG(uap, val), NULL, 0); |
378 | futex_put(f); |
379 | if (op_ret > 0) { |
380 | op_ret = 0; |
381 | /* |
382 | * Linux abuses the address of the timespec parameter |
383 | * as the number of retries |
384 | */ |
385 | op_ret += futex_wake(f2, |
386 | (int)(unsigned long)SCARG(uap, timeout), NULL, 0); |
387 | ret += op_ret; |
388 | } |
389 | futex_put(f2); |
390 | FUTEX_UNLOCK; |
391 | FUTEX_SYSTEM_UNLOCK; |
392 | *retval = ret; |
393 | break; |
394 | default: |
395 | FUTEXPRINTF(("%s: unknown op %d\n" , __func__, cmd)); |
396 | return ENOSYS; |
397 | } |
398 | return 0; |
399 | } |
400 | |
401 | static struct waiting_proc * |
402 | futex_wp_alloc(void) |
403 | { |
404 | struct waiting_proc *wp; |
405 | |
406 | wp = kmem_zalloc(sizeof(*wp), KM_SLEEP); |
407 | cv_init(&wp->wp_futex_cv, "futex" ); |
408 | return wp; |
409 | } |
410 | |
411 | static void |
412 | futex_wp_free(struct waiting_proc *wp) |
413 | { |
414 | |
415 | cv_destroy(&wp->wp_futex_cv); |
416 | kmem_free(wp, sizeof(*wp)); |
417 | } |
418 | |
419 | static struct futex * |
420 | futex_get(void *uaddr, uint32_t bitset) |
421 | { |
422 | struct futex *f; |
423 | |
424 | FUTEX_LOCKASSERT; |
425 | |
426 | LIST_FOREACH(f, &futex_list, f_list) { |
427 | if (f->f_uaddr == uaddr) { |
428 | f->f_refcount++; |
429 | return f; |
430 | } |
431 | } |
432 | |
433 | /* Not found, create it */ |
434 | f = kmem_zalloc(sizeof(*f), KM_SLEEP); |
435 | f->f_uaddr = uaddr; |
436 | f->f_bitset = bitset; |
437 | f->f_refcount = 1; |
438 | TAILQ_INIT(&f->f_waiting_proc); |
439 | LIST_INSERT_HEAD(&futex_list, f, f_list); |
440 | |
441 | return f; |
442 | } |
443 | |
444 | static void |
445 | futex_ref(struct futex *f) |
446 | { |
447 | |
448 | FUTEX_LOCKASSERT; |
449 | |
450 | f->f_refcount++; |
451 | } |
452 | |
453 | static void |
454 | futex_put(struct futex *f) |
455 | { |
456 | |
457 | FUTEX_LOCKASSERT; |
458 | |
459 | f->f_refcount--; |
460 | if (f->f_refcount == 0) { |
461 | KASSERT(TAILQ_EMPTY(&f->f_waiting_proc)); |
462 | LIST_REMOVE(f, f_list); |
463 | kmem_free(f, sizeof(*f)); |
464 | } |
465 | } |
466 | |
467 | static int |
468 | futex_sleep(struct futex **fp, lwp_t *l, int timeout, struct waiting_proc *wp) |
469 | { |
470 | struct futex *f; |
471 | int ret; |
472 | |
473 | FUTEX_LOCKASSERT; |
474 | |
475 | f = *fp; |
476 | wp->wp_futex = f; |
477 | TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); |
478 | wp->wp_onlist = true; |
479 | ret = cv_timedwait_sig(&wp->wp_futex_cv, &futex_lock, timeout); |
480 | |
481 | /* |
482 | * we may have been requeued to a different futex before we were |
483 | * woken up, so let the caller know which futex to put. if we were |
484 | * woken by futex_wake() then it took us off the waiting list, |
485 | * but if our sleep was interrupted or timed out then we might |
486 | * need to take ourselves off the waiting list. |
487 | */ |
488 | |
489 | f = wp->wp_futex; |
490 | if (wp->wp_onlist) { |
491 | TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); |
492 | } |
493 | *fp = f; |
494 | return ret; |
495 | } |
496 | |
497 | static int |
498 | futex_wake(struct futex *f, int n, struct futex *newf, int n2) |
499 | { |
500 | struct waiting_proc *wp; |
501 | int count = 0; |
502 | |
503 | FUTEX_LOCKASSERT; |
504 | |
505 | /* |
506 | * wake up up to n threads waiting on this futex. |
507 | */ |
508 | |
509 | while (n--) { |
510 | wp = TAILQ_FIRST(&f->f_waiting_proc); |
511 | if (wp == NULL) |
512 | return count; |
513 | |
514 | KASSERT(f == wp->wp_futex); |
515 | TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); |
516 | wp->wp_onlist = false; |
517 | cv_signal(&wp->wp_futex_cv); |
518 | count++; |
519 | } |
520 | if (newf == NULL) |
521 | return count; |
522 | |
523 | /* |
524 | * then requeue up to n2 additional threads to newf |
525 | * (without waking them up). |
526 | */ |
527 | |
528 | while (n2--) { |
529 | wp = TAILQ_FIRST(&f->f_waiting_proc); |
530 | if (wp == NULL) |
531 | return count; |
532 | |
533 | KASSERT(f == wp->wp_futex); |
534 | TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); |
535 | futex_put(f); |
536 | |
537 | wp->wp_futex = newf; |
538 | futex_ref(newf); |
539 | TAILQ_INSERT_TAIL(&newf->f_waiting_proc, wp, wp_list); |
540 | count++; |
541 | } |
542 | return count; |
543 | } |
544 | |
545 | static int |
546 | futex_atomic_op(lwp_t *l, int encoded_op, void *uaddr) |
547 | { |
548 | const int op = (encoded_op >> 28) & 7; |
549 | const int cmp = (encoded_op >> 24) & 15; |
550 | const int cmparg = (encoded_op << 20) >> 20; |
551 | int oparg = (encoded_op << 8) >> 20; |
552 | int error, oldval, cval; |
553 | |
554 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) |
555 | oparg = 1 << oparg; |
556 | |
557 | /* XXX: linux verifies access here and returns EFAULT */ |
558 | |
559 | if (copyin(uaddr, &cval, sizeof(int)) != 0) |
560 | return -EFAULT; |
561 | |
562 | for (;;) { |
563 | int nval; |
564 | |
565 | switch (op) { |
566 | case FUTEX_OP_SET: |
567 | nval = oparg; |
568 | break; |
569 | case FUTEX_OP_ADD: |
570 | nval = cval + oparg; |
571 | break; |
572 | case FUTEX_OP_OR: |
573 | nval = cval | oparg; |
574 | break; |
575 | case FUTEX_OP_ANDN: |
576 | nval = cval & ~oparg; |
577 | break; |
578 | case FUTEX_OP_XOR: |
579 | nval = cval ^ oparg; |
580 | break; |
581 | default: |
582 | return -ENOSYS; |
583 | } |
584 | |
585 | error = ucas_int(uaddr, cval, nval, &oldval); |
586 | if (error || oldval == cval) { |
587 | break; |
588 | } |
589 | cval = oldval; |
590 | } |
591 | |
592 | if (error) |
593 | return -EFAULT; |
594 | |
595 | switch (cmp) { |
596 | case FUTEX_OP_CMP_EQ: |
597 | return (oldval == cmparg); |
598 | case FUTEX_OP_CMP_NE: |
599 | return (oldval != cmparg); |
600 | case FUTEX_OP_CMP_LT: |
601 | return (oldval < cmparg); |
602 | case FUTEX_OP_CMP_GE: |
603 | return (oldval >= cmparg); |
604 | case FUTEX_OP_CMP_LE: |
605 | return (oldval <= cmparg); |
606 | case FUTEX_OP_CMP_GT: |
607 | return (oldval > cmparg); |
608 | default: |
609 | return -ENOSYS; |
610 | } |
611 | } |
612 | |
613 | int |
614 | linux_sys_set_robust_list(struct lwp *l, |
615 | const struct linux_sys_set_robust_list_args *uap, register_t *retval) |
616 | { |
617 | /* { |
618 | syscallarg(struct linux_robust_list_head *) head; |
619 | syscallarg(size_t) len; |
620 | } */ |
621 | struct linux_emuldata *led; |
622 | |
623 | if (SCARG(uap, len) != sizeof(struct linux_robust_list_head)) |
624 | return EINVAL; |
625 | led = l->l_emuldata; |
626 | led->led_robust_head = SCARG(uap, head); |
627 | *retval = 0; |
628 | return 0; |
629 | } |
630 | |
631 | int |
632 | linux_sys_get_robust_list(struct lwp *l, |
633 | const struct linux_sys_get_robust_list_args *uap, register_t *retval) |
634 | { |
635 | /* { |
636 | syscallarg(int) pid; |
637 | syscallarg(struct linux_robust_list_head **) head; |
638 | syscallarg(size_t *) len; |
639 | } */ |
640 | struct proc *p; |
641 | struct linux_emuldata *led; |
642 | struct linux_robust_list_head *head; |
643 | size_t len; |
644 | int error = 0; |
645 | |
646 | p = l->l_proc; |
647 | if (!SCARG(uap, pid)) { |
648 | led = l->l_emuldata; |
649 | head = led->led_robust_head; |
650 | } else { |
651 | mutex_enter(p->p_lock); |
652 | l = lwp_find(p, SCARG(uap, pid)); |
653 | if (l != NULL) { |
654 | led = l->l_emuldata; |
655 | head = led->led_robust_head; |
656 | } |
657 | mutex_exit(p->p_lock); |
658 | if (l == NULL) { |
659 | return ESRCH; |
660 | } |
661 | } |
662 | #ifdef __arch64__ |
663 | if (p->p_flag & PK_32) { |
664 | uint32_t u32; |
665 | |
666 | u32 = 12; |
667 | error = copyout(&u32, SCARG(uap, len), sizeof(u32)); |
668 | if (error) |
669 | return error; |
670 | u32 = (uint32_t)(uintptr_t)head; |
671 | return copyout(&u32, SCARG(uap, head), sizeof(u32)); |
672 | } |
673 | #endif |
674 | |
675 | len = sizeof(*head); |
676 | error = copyout(&len, SCARG(uap, len), sizeof(len)); |
677 | if (error) |
678 | return error; |
679 | return copyout(&head, SCARG(uap, head), sizeof(head)); |
680 | } |
681 | |
682 | static int |
683 | handle_futex_death(void *uaddr, pid_t pid, int pi) |
684 | { |
685 | int uval, nval, mval; |
686 | struct futex *f; |
687 | |
688 | retry: |
689 | if (copyin(uaddr, &uval, sizeof(uval))) |
690 | return EFAULT; |
691 | |
692 | if ((uval & FUTEX_TID_MASK) == pid) { |
693 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
694 | nval = atomic_cas_32(uaddr, uval, mval); |
695 | |
696 | if (nval == -1) |
697 | return EFAULT; |
698 | |
699 | if (nval != uval) |
700 | goto retry; |
701 | |
702 | if (!pi && (uval & FUTEX_WAITERS)) { |
703 | FUTEX_LOCK; |
704 | f = futex_get(uaddr, FUTEX_BITSET_MATCH_ANY); |
705 | futex_wake(f, 1, NULL, 0); |
706 | FUTEX_UNLOCK; |
707 | } |
708 | } |
709 | |
710 | return 0; |
711 | } |
712 | |
713 | static int |
714 | fetch_robust_entry(struct lwp *l, struct linux_robust_list **entry, |
715 | struct linux_robust_list **head, int *pi) |
716 | { |
717 | unsigned long uentry; |
718 | |
719 | #ifdef __arch64__ |
720 | if (l->l_proc->p_flag & PK_32) { |
721 | uint32_t u32; |
722 | |
723 | if (copyin(head, &u32, sizeof(u32))) |
724 | return EFAULT; |
725 | uentry = (unsigned long)u32; |
726 | } else |
727 | #endif |
728 | if (copyin(head, &uentry, sizeof(uentry))) |
729 | return EFAULT; |
730 | |
731 | *entry = (void *)(uentry & ~1UL); |
732 | *pi = uentry & 1; |
733 | |
734 | return 0; |
735 | } |
736 | |
737 | /* This walks the list of robust futexes, releasing them. */ |
738 | void |
739 | release_futexes(struct lwp *l) |
740 | { |
741 | struct linux_robust_list_head head; |
742 | struct linux_robust_list *entry, *next_entry = NULL, *pending; |
743 | unsigned int limit = 2048, pi, next_pi, pip; |
744 | struct linux_emuldata *led; |
745 | unsigned long futex_offset; |
746 | int rc; |
747 | |
748 | led = l->l_emuldata; |
749 | if (led->led_robust_head == NULL) |
750 | return; |
751 | |
752 | #ifdef __arch64__ |
753 | if (l->l_proc->p_flag & PK_32) { |
754 | uint32_t u32s[3]; |
755 | |
756 | if (copyin(led->led_robust_head, u32s, sizeof(u32s))) |
757 | return; |
758 | |
759 | head.list.next = (void *)(uintptr_t)u32s[0]; |
760 | head.futex_offset = (unsigned long)u32s[1]; |
761 | head.pending_list = (void *)(uintptr_t)u32s[2]; |
762 | } else |
763 | #endif |
764 | if (copyin(led->led_robust_head, &head, sizeof(head))) |
765 | return; |
766 | |
767 | if (fetch_robust_entry(l, &entry, &head.list.next, &pi)) |
768 | return; |
769 | |
770 | #ifdef __arch64__ |
771 | if (l->l_proc->p_flag & PK_32) { |
772 | uint32_t u32; |
773 | |
774 | if (copyin(led->led_robust_head, &u32, sizeof(u32))) |
775 | return; |
776 | |
777 | head.futex_offset = (unsigned long)u32; |
778 | futex_offset = head.futex_offset; |
779 | } else |
780 | #endif |
781 | if (copyin(&head.futex_offset, &futex_offset, sizeof(unsigned long))) |
782 | return; |
783 | |
784 | if (fetch_robust_entry(l, &pending, &head.pending_list, &pip)) |
785 | return; |
786 | |
787 | while (entry != &head.list) { |
788 | rc = fetch_robust_entry(l, &next_entry, &entry->next, &next_pi); |
789 | |
790 | if (entry != pending) |
791 | if (handle_futex_death((char *)entry + futex_offset, |
792 | l->l_lid, pi)) |
793 | return; |
794 | |
795 | if (rc) |
796 | return; |
797 | |
798 | entry = next_entry; |
799 | pi = next_pi; |
800 | |
801 | if (!--limit) |
802 | break; |
803 | |
804 | yield(); /* XXX why? */ |
805 | } |
806 | |
807 | if (pending) |
808 | handle_futex_death((char *)pending + futex_offset, |
809 | l->l_lid, pip); |
810 | } |
811 | |