1 | /* $NetBSD: kern_syscall.c,v 1.18 2019/05/06 08:05:03 kamil Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software developed for The NetBSD Foundation |
8 | * by Andrew Doran. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.18 2019/05/06 08:05:03 kamil Exp $" ); |
34 | |
35 | #ifdef _KERNEL_OPT |
36 | #include "opt_modular.h" |
37 | #include "opt_syscall_debug.h" |
38 | #include "opt_ktrace.h" |
39 | #include "opt_ptrace.h" |
40 | #include "opt_dtrace.h" |
41 | #endif |
42 | |
43 | /* XXX To get syscall prototypes. */ |
44 | #define SYSVSHM |
45 | #define SYSVSEM |
46 | #define SYSVMSG |
47 | |
48 | #include <sys/param.h> |
49 | #include <sys/module.h> |
50 | #include <sys/sched.h> |
51 | #include <sys/syscall.h> |
52 | #include <sys/syscallargs.h> |
53 | #include <sys/syscallvar.h> |
54 | #include <sys/systm.h> |
55 | #include <sys/xcall.h> |
56 | #include <sys/ktrace.h> |
57 | #include <sys/ptrace.h> |
58 | |
59 | int |
60 | sys_nomodule(struct lwp *l, const void *v, register_t *retval) |
61 | { |
62 | #ifdef MODULAR |
63 | |
64 | const struct sysent *sy; |
65 | const struct emul *em; |
66 | const struct sc_autoload *auto_list; |
67 | u_int code; |
68 | |
69 | /* |
70 | * Restart the syscall if we interrupted a module unload that |
71 | * failed. Acquiring kernconfig_lock delays us until any unload |
72 | * has been completed or rolled back. |
73 | */ |
74 | kernconfig_lock(); |
75 | sy = l->l_sysent; |
76 | if (sy->sy_call != sys_nomodule) { |
77 | kernconfig_unlock(); |
78 | return ERESTART; |
79 | } |
80 | /* |
81 | * Try to autoload a module to satisfy the request. If it |
82 | * works, retry the request. |
83 | */ |
84 | em = l->l_proc->p_emul; |
85 | code = sy - em->e_sysent; |
86 | |
87 | if ((auto_list = em->e_sc_autoload) != NULL) |
88 | for (; auto_list->al_code > 0; auto_list++) { |
89 | if (auto_list->al_code != code) { |
90 | continue; |
91 | } |
92 | if (module_autoload(auto_list->al_module, |
93 | MODULE_CLASS_ANY) != 0 || |
94 | sy->sy_call == sys_nomodule) { |
95 | break; |
96 | } |
97 | kernconfig_unlock(); |
98 | return ERESTART; |
99 | } |
100 | kernconfig_unlock(); |
101 | #endif /* MODULAR */ |
102 | |
103 | return sys_nosys(l, v, retval); |
104 | } |
105 | |
106 | int |
107 | syscall_establish(const struct emul *em, const struct syscall_package *sp) |
108 | { |
109 | struct sysent *sy; |
110 | int i; |
111 | |
112 | KASSERT(kernconfig_is_held()); |
113 | |
114 | if (em == NULL) { |
115 | em = &emul_netbsd; |
116 | } |
117 | sy = em->e_sysent; |
118 | |
119 | /* |
120 | * Ensure that all preconditions are valid, since this is |
121 | * an all or nothing deal. Once a system call is entered, |
122 | * it can become busy and we could be unable to remove it |
123 | * on error. |
124 | */ |
125 | for (i = 0; sp[i].sp_call != NULL; i++) { |
126 | if (sp[i].sp_code >= SYS_NSYSENT) |
127 | return EINVAL; |
128 | if (sy[sp[i].sp_code].sy_call != sys_nomodule && |
129 | sy[sp[i].sp_code].sy_call != sys_nosys) { |
130 | #ifdef DIAGNOSTIC |
131 | printf("syscall %d is busy\n" , sp[i].sp_code); |
132 | #endif |
133 | return EBUSY; |
134 | } |
135 | } |
136 | /* Everything looks good, patch them in. */ |
137 | for (i = 0; sp[i].sp_call != NULL; i++) { |
138 | sy[sp[i].sp_code].sy_call = sp[i].sp_call; |
139 | } |
140 | |
141 | return 0; |
142 | } |
143 | |
144 | int |
145 | syscall_disestablish(const struct emul *em, const struct syscall_package *sp) |
146 | { |
147 | struct sysent *sy; |
148 | const uint32_t *sb; |
149 | uint64_t where; |
150 | lwp_t *l; |
151 | int i; |
152 | |
153 | KASSERT(kernconfig_is_held()); |
154 | |
155 | if (em == NULL) { |
156 | em = &emul_netbsd; |
157 | } |
158 | sy = em->e_sysent; |
159 | sb = em->e_nomodbits; |
160 | |
161 | /* |
162 | * First, patch the system calls to sys_nomodule or sys_nosys |
163 | * to gate further activity. |
164 | */ |
165 | for (i = 0; sp[i].sp_call != NULL; i++) { |
166 | KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); |
167 | sy[sp[i].sp_code].sy_call = |
168 | sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ? |
169 | sys_nomodule : sys_nosys; |
170 | } |
171 | |
172 | /* |
173 | * Run a cross call to cycle through all CPUs. This does two |
174 | * things: lock activity provides a barrier and makes our update |
175 | * of sy_call visible to all CPUs, and upon return we can be sure |
176 | * that we see pertinent values of l_sysent posted by remote CPUs. |
177 | */ |
178 | where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); |
179 | xc_wait(where); |
180 | |
181 | /* |
182 | * Now it's safe to check l_sysent. Run through all LWPs and see |
183 | * if anyone is still using the system call. |
184 | */ |
185 | for (i = 0; sp[i].sp_call != NULL; i++) { |
186 | mutex_enter(proc_lock); |
187 | LIST_FOREACH(l, &alllwp, l_list) { |
188 | if (l->l_sysent == &sy[sp[i].sp_code]) { |
189 | break; |
190 | } |
191 | } |
192 | mutex_exit(proc_lock); |
193 | if (l == NULL) { |
194 | continue; |
195 | } |
196 | /* |
197 | * We lose: one or more calls are still in use. Put back |
198 | * the old entrypoints and act like nothing happened. |
199 | * When we drop kernconfig_lock, any system calls held in |
200 | * sys_nomodule() will be restarted. |
201 | */ |
202 | for (i = 0; sp[i].sp_call != NULL; i++) { |
203 | sy[sp[i].sp_code].sy_call = sp[i].sp_call; |
204 | } |
205 | return EBUSY; |
206 | } |
207 | |
208 | return 0; |
209 | } |
210 | |
211 | /* |
212 | * Return true if system call tracing is enabled for the specified process. |
213 | */ |
214 | bool |
215 | trace_is_enabled(struct proc *p) |
216 | { |
217 | #ifdef SYSCALL_DEBUG |
218 | return (true); |
219 | #endif |
220 | #ifdef KTRACE |
221 | if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) |
222 | return (true); |
223 | #endif |
224 | #ifdef PTRACE |
225 | if (ISSET(p->p_slflag, PSL_SYSCALL)) |
226 | return (true); |
227 | #endif |
228 | |
229 | return (false); |
230 | } |
231 | |
232 | /* |
233 | * Start trace of particular system call. If process is being traced, |
234 | * this routine is called by MD syscall dispatch code just before |
235 | * a system call is actually executed. |
236 | */ |
237 | int |
238 | trace_enter(register_t code, const struct sysent *sy, const void *args) |
239 | { |
240 | int error = 0; |
241 | |
242 | #ifdef KDTRACE_HOOKS |
243 | if (sy->sy_entry) { |
244 | struct emul *e = curlwp->l_proc->p_emul; |
245 | (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0); |
246 | } |
247 | #endif |
248 | |
249 | #ifdef SYSCALL_DEBUG |
250 | scdebug_call(code, args); |
251 | #endif /* SYSCALL_DEBUG */ |
252 | |
253 | ktrsyscall(code, args, sy->sy_narg); |
254 | |
255 | #ifdef PTRACE |
256 | if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == |
257 | (PSL_SYSCALL|PSL_TRACED)) { |
258 | proc_stoptrace(TRAP_SCE, code, args, NULL, 0); |
259 | if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { |
260 | /* tracer will emulate syscall for us */ |
261 | error = EJUSTRETURN; |
262 | } |
263 | } |
264 | #endif |
265 | return error; |
266 | } |
267 | |
268 | /* |
269 | * End trace of particular system call. If process is being traced, |
270 | * this routine is called by MD syscall dispatch code just after |
271 | * a system call finishes. |
272 | * MD caller guarantees the passed 'code' is within the supported |
273 | * system call number range for emulation the process runs under. |
274 | */ |
275 | void |
276 | trace_exit(register_t code, const struct sysent *sy, const void *args, |
277 | register_t rval[], int error) |
278 | { |
279 | #if defined(PTRACE) || defined(KDTRACE_HOOKS) |
280 | struct proc *p = curlwp->l_proc; |
281 | #endif |
282 | |
283 | #ifdef KDTRACE_HOOKS |
284 | if (sy->sy_return) { |
285 | (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args, |
286 | rval, error); |
287 | } |
288 | #endif |
289 | |
290 | #ifdef SYSCALL_DEBUG |
291 | scdebug_ret(code, error, rval); |
292 | #endif /* SYSCALL_DEBUG */ |
293 | |
294 | ktrsysret(code, error, rval); |
295 | |
296 | #ifdef PTRACE |
297 | if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == |
298 | (PSL_SYSCALL|PSL_TRACED)) { |
299 | proc_stoptrace(TRAP_SCX, code, args, rval, error); |
300 | } |
301 | CLR(p->p_slflag, PSL_SYSCALLEMU); |
302 | #endif |
303 | } |
304 | |