vfs_mount.c source code [netbsd/sys/kern/vfs_mount.c]

1	/ $NetBSD: vfs_mount.c,v 1.70 2019/02/20 10:08:37 hannken Exp $ /
2
3	/-*
4	* Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9	* NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10	*
11	* Redistribution and use in source and binary forms, with or without
12	* modification, are permitted provided that the following conditions
13	* are met:
14	* 1. Redistributions of source code must retain the above copyright
15	* notice, this list of conditions and the following disclaimer.
16	* 2. Redistributions in binary form must reproduce the above copyright
17	* notice, this list of conditions and the following disclaimer in the
18	* documentation and/or other materials provided with the distribution.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30	* POSSIBILITY OF SUCH DAMAGE.
31	*/
32
33	/*
34	* Copyright (c) 1989, 1993
35	* The Regents of the University of California. All rights reserved.
36	* (c) UNIX System Laboratories, Inc.
37	* All or some portions of this file are derived from material licensed
38	* to the University of California by American Telephone and Telegraph
39	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
40	* the permission of UNIX System Laboratories, Inc.
41	*
42	* Redistribution and use in source and binary forms, with or without
43	* modification, are permitted provided that the following conditions
44	* are met:
45	* 1. Redistributions of source code must retain the above copyright
46	* notice, this list of conditions and the following disclaimer.
47	* 2. Redistributions in binary form must reproduce the above copyright
48	* notice, this list of conditions and the following disclaimer in the
49	* documentation and/or other materials provided with the distribution.
50	* 3. Neither the name of the University nor the names of its contributors
51	* may be used to endorse or promote products derived from this software
52	* without specific prior written permission.
53	*
54	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64	* SUCH DAMAGE.
65	*
66	* @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67	*/
68
69	#include <sys/cdefs.h>
70	__KERNEL_RCSID(`0`, "$NetBSD: vfs_mount.c,v 1.70 2019/02/20 10:08:37 hannken Exp $");
71
72	#include <sys/param.h>
73	#include <sys/kernel.h>
74
75	#include <sys/atomic.h>
76	#include <sys/buf.h>
77	#include <sys/conf.h>
78	#include <sys/fcntl.h>
79	#include <sys/filedesc.h>
80	#include <sys/device.h>
81	#include <sys/kauth.h>
82	#include <sys/kmem.h>
83	#include <sys/module.h>
84	#include <sys/mount.h>
85	#include <sys/fstrans.h>
86	#include <sys/namei.h>
87	#include <sys/extattr.h>
88	#include <sys/syscallargs.h>
89	#include <sys/sysctl.h>
90	#include <sys/systm.h>
91	#include <sys/vfs_syscalls.h>
92	#include <sys/vnode_impl.h>
93
94	#include <miscfs/genfs/genfs.h>
95	#include <miscfs/specfs/specdev.h>
96
97	enum mountlist_type {
98	ME_MOUNT,
99	ME_MARKER
100	};
101	struct mountlist_entry {
102	TAILQ_ENTRY(mountlist_entry) me_list; / Mount list. /
103	struct mount me_mount; /* Actual mount if ME_MOUNT,*
104	current mount else. /*
105	enum mountlist_type me_type; / Mount or marker. /
106	};
107	struct mount_iterator {
108	struct mountlist_entry mi_entry;
109	};
110
111	static struct vnode vfs_vnode_iterator_next1(struct* vnode_iterator *,
112	bool ()(void* , struct* vnode ), void* *, bool);
113
114	/ Root filesystem. /
115	vnode_t * rootvnode;
116
117	/ Mounted filesystem list. /
118	static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
119	static kmutex_t mountlist_lock;
120	int vnode_offset_next_by_lru / XXX: ugly hack for pstat.c /
121	= offsetof(vnode_impl_t, vi_lrulist.tqe_next);
122
123	kmutex_t mntvnode_lock;
124	kmutex_t vfs_list_lock;
125
126	static specificdata_domain_t mount_specificdata_domain;
127	static kmutex_t mntid_lock;
128
129	static kmutex_t mountgen_lock;
130	static uint64_t mountgen;
131
132	void
133	vfs_mount_sysinit(void)
134	{
135
136	TAILQ_INIT(&mountlist);
137	mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
138	mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
139	mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
140
141	mount_specificdata_domain = specificdata_domain_create();
142	mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
143	mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
144	mountgen = `0`;
145	}
146
147	struct mount *
148	vfs_mountalloc(struct vfsops vfsops, vnode_t vp)
149	{
150	struct mount *mp;
151	int error __diagused;
152
153	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
154	mp->mnt_op = vfsops;
155	mp->mnt_refcnt = `1`;
156	TAILQ_INIT(&mp->mnt_vnodelist);
157	mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
158	mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
159	mp->mnt_vnodecovered = vp;
160	mount_initspecific(mp);
161
162	error = fstrans_mount(mp);
163	KASSERT(error == `0`);
164
165	mutex_enter(&mountgen_lock);
166	mp->mnt_gen = mountgen++;
167	mutex_exit(&mountgen_lock);
168
169	return mp;
170	}
171
172	/*
173	* vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
174	* initialize a mount structure for it.
175	*
176	* Devname is usually updated by mount(8) after booting.
177	*/
178	int
179	vfs_rootmountalloc(const char fstypename, const* char *devname,
180	struct mount **mpp)
181	{
182	struct vfsops *vfsp = NULL;
183	struct mount *mp;
184	int error __diagused;
185
186	mutex_enter(&vfs_list_lock);
187	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
188	if (!strncmp(vfsp->vfs_name, fstypename,
189	sizeof(mp->mnt_stat.f_fstypename)))
190	break;
191	if (vfsp == NULL) {
192	mutex_exit(&vfs_list_lock);
193	return (ENODEV);
194	}
195	vfsp->vfs_refcount++;
196	mutex_exit(&vfs_list_lock);
197
198	if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
199	return ENOMEM;
200	error = vfs_busy(mp);
201	KASSERT(error == `0`);
202	mp->mnt_flag = MNT_RDONLY;
203	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
204	sizeof(mp->mnt_stat.f_fstypename));
205	mp->mnt_stat.f_mntonname[`0`] = `'/'`;
206	mp->mnt_stat.f_mntonname[`1`] = `'\0'`;
207	mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - `1`] =
208	`'\0'`;
209	(void)copystr(devname, mp->mnt_stat.f_mntfromname,
210	sizeof(mp->mnt_stat.f_mntfromname) - `1`, `0`);
211	*mpp = mp;
212	return `0`;
213	}
214
215	/*
216	* vfs_getnewfsid: get a new unique fsid.
217	*/
218	void
219	vfs_getnewfsid(struct mount *mp)
220	{
221	static u_short xxxfs_mntid;
222	fsid_t tfsid;
223	int mtype;
224
225	mutex_enter(&mntid_lock);
226	mtype = makefstype(mp->mnt_op->vfs_name);
227	mp->mnt_stat.f_fsidx.__fsid_val[`0`] = makedev(mtype, `0`);
228	mp->mnt_stat.f_fsidx.__fsid_val[`1`] = mtype;
229	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[`0`];
230	if (xxxfs_mntid == `0`)
231	++xxxfs_mntid;
232	tfsid.__fsid_val[`0`] = makedev(mtype & `0xff`, xxxfs_mntid);
233	tfsid.__fsid_val[`1`] = mtype;
234	while (vfs_getvfs(&tfsid)) {
235	tfsid.__fsid_val[`0`]++;
236	xxxfs_mntid++;
237	}
238	mp->mnt_stat.f_fsidx.__fsid_val[`0`] = tfsid.__fsid_val[`0`];
239	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[`0`];
240	mutex_exit(&mntid_lock);
241	}
242
243	/*
244	* Lookup a mount point by filesystem identifier.
245	*
246	* XXX Needs to add a reference to the mount point.
247	*/
248	struct mount *
249	vfs_getvfs(fsid_t *fsid)
250	{
251	mount_iterator_t *iter;
252	struct mount *mp;
253
254	mountlist_iterator_init(&iter);
255	while ((mp = mountlist_iterator_next(iter)) != NULL) {
256	if (mp->mnt_stat.f_fsidx.__fsid_val[`0`] == fsid->__fsid_val[`0`] &&
257	mp->mnt_stat.f_fsidx.__fsid_val[`1`] == fsid->__fsid_val[`1`]) {
258	mountlist_iterator_destroy(iter);
259	return mp;
260	}
261	}
262	mountlist_iterator_destroy(iter);
263	return NULL;
264	}
265
266	/*
267	* Take a reference to a mount structure.
268	*/
269	void
270	vfs_ref(struct mount *mp)
271	{
272
273	KASSERT(mp->mnt_refcnt > `0` \|\| mutex_owned(&mountlist_lock));
274
275	atomic_inc_uint(&mp->mnt_refcnt);
276	}
277
278	/*
279	* Drop a reference to a mount structure, freeing if the last reference.
280	*/
281	void
282	vfs_rele(struct mount *mp)
283	{
284
285	if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > `0`)) {
286	return;
287	}
288
289	/*
290	* Nothing else has visibility of the mount: we can now
291	* free the data structures.
292	*/
293	KASSERT(mp->mnt_refcnt == `0`);
294	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
295	mutex_destroy(&mp->mnt_updating);
296	mutex_destroy(&mp->mnt_renamelock);
297	if (mp->mnt_op != NULL) {
298	vfs_delref(mp->mnt_op);
299	}
300	fstrans_unmount(mp);
301	/*
302	* Final free of mp gets done from fstrans_mount_dtor().
303	*
304	* Prevents this memory to be reused as a mount before
305	* fstrans releases all references to it.
306	*/
307	}
308
309	/*
310	* Mark a mount point as busy, and gain a new reference to it. Used to
311	* prevent the file system from being unmounted during critical sections.
312	*
313	* vfs_busy can be called multiple times and by multiple threads
314	* and must be accompanied by the same number of vfs_unbusy calls.
315	*
316	* => The caller must hold a pre-existing reference to the mount.
317	* => Will fail if the file system is being unmounted, or is unmounted.
318	*/
319	static inline int
320	_vfs_busy(struct mount *mp, bool wait)
321	{
322
323	KASSERT(mp->mnt_refcnt > `0`);
324
325	if (wait) {
326	fstrans_start(mp);
327	} else {
328	if (fstrans_start_nowait(mp))
329	return EBUSY;
330	}
331	if (__predict_false((mp->mnt_iflag & IMNT_GONE) != `0`)) {
332	fstrans_done(mp);
333	return ENOENT;
334	}
335	vfs_ref(mp);
336	return `0`;
337	}
338
339	int
340	vfs_busy(struct mount *mp)
341	{
342
343	return _vfs_busy(mp, true);
344	}
345
346	int
347	vfs_trybusy(struct mount *mp)
348	{
349
350	return _vfs_busy(mp, false);
351	}
352
353	/*
354	* Unbusy a busy filesystem.
355	*
356	* Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
357	*/
358	void
359	vfs_unbusy(struct mount *mp)
360	{
361
362	KASSERT(mp->mnt_refcnt > `0`);
363
364	fstrans_done(mp);
365	vfs_rele(mp);
366	}
367
368	struct vnode_iterator {
369	vnode_impl_t vi_vnode;
370	};
371
372	void
373	vfs_vnode_iterator_init(struct mount mp, struct* vnode_iterator **vnip)
374	{
375	vnode_t *vp;
376	vnode_impl_t *vip;
377
378	vp = vnalloc_marker(mp);
379	vip = VNODE_TO_VIMPL(vp);
380
381	mutex_enter(&mntvnode_lock);
382	TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
383	vp->v_usecount = `1`;
384	mutex_exit(&mntvnode_lock);
385
386	vnip = (struct* vnode_iterator *)vip;
387	}
388
389	void
390	vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
391	{
392	vnode_impl_t *mvip = &vni->vi_vnode;
393	vnode_t *mvp = VIMPL_TO_VNODE(mvip);
394
395	mutex_enter(&mntvnode_lock);
396	KASSERT(vnis_marker(mvp));
397	if (mvp->v_usecount != `0`) {
398	TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
399	mvp->v_usecount = `0`;
400	}
401	mutex_exit(&mntvnode_lock);
402	vnfree_marker(mvp);
403	}
404
405	static struct vnode *
406	vfs_vnode_iterator_next1(struct vnode_iterator *vni,
407	bool (f)(void* , struct* vnode ), void* *cl, bool do_wait)
408	{
409	vnode_impl_t *mvip = &vni->vi_vnode;
410	struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
411	vnode_t *vp;
412	vnode_impl_t *vip;
413	int error;
414
415	KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
416
417	do {
418	mutex_enter(&mntvnode_lock);
419	vip = TAILQ_NEXT(mvip, vi_mntvnodes);
420	TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
421	VIMPL_TO_VNODE(mvip)->v_usecount = `0`;
422	again:
423	vp = VIMPL_TO_VNODE(vip);
424	if (vp == NULL) {
425	mutex_exit(&mntvnode_lock);
426	return NULL;
427	}
428	mutex_enter(vp->v_interlock);
429	if (vnis_marker(vp) \|\|
430	vdead_check(vp, (do_wait ? `0` : VDEAD_NOWAIT)) \|\|
431	(f && !(*f)(cl, vp))) {
432	mutex_exit(vp->v_interlock);
433	vip = TAILQ_NEXT(vip, vi_mntvnodes);
434	goto again;
435	}
436
437	TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
438	VIMPL_TO_VNODE(mvip)->v_usecount = `1`;
439	mutex_exit(&mntvnode_lock);
440	error = vcache_vget(vp);
441	KASSERT(error == `0` \|\| error == ENOENT);
442	} while (error != `0`);
443
444	return vp;
445	}
446
447	struct vnode *
448	vfs_vnode_iterator_next(struct vnode_iterator *vni,
449	bool (f)(void* , struct* vnode ), void* *cl)
450	{
451
452	return vfs_vnode_iterator_next1(vni, f, cl, false);
453	}
454
455	/*
456	* Move a vnode from one mount queue to another.
457	*/
458	void
459	vfs_insmntque(vnode_t vp, struct* mount *mp)
460	{
461	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
462	struct mount *omp;
463
464	KASSERT(mp == NULL \|\| (mp->mnt_iflag & IMNT_UNMOUNT) == `0` \|\|
465	vp->v_tag == VT_VFS);
466
467	mutex_enter(&mntvnode_lock);
468	/*
469	* Delete from old mount point vnode list, if on one.
470	*/
471	if ((omp = vp->v_mount) != NULL)
472	TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
473	/*
474	* Insert into list of vnodes for the new mount point, if
475	* available. The caller must take a reference on the mount
476	* structure and donate to the vnode.
477	*/
478	if ((vp->v_mount = mp) != NULL)
479	TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
480	mutex_exit(&mntvnode_lock);
481
482	if (omp != NULL) {
483	/ Release reference to old mount. /
484	vfs_rele(omp);
485	}
486	}
487
488	/*
489	* Remove any vnodes in the vnode table belonging to mount point mp.
490	*
491	* If FORCECLOSE is not specified, there should not be any active ones,
492	* return error if any are found (nb: this is a user error, not a
493	* system error). If FORCECLOSE is specified, detach any active vnodes
494	* that are found.
495	*
496	* If WRITECLOSE is set, only flush out regular file vnodes open for
497	* writing.
498	*
499	* SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
500	*/
501	#ifdef DEBUG
502	int busyprt = `0`; / print out busy vnodes /
503	struct ctldebug debug1 = { "busyprt", &busyprt };
504	#endif
505
506	static vnode_t *
507	vflushnext(struct vnode_iterator marker, int* *when)
508	{
509	if (hardclock_ticks > *when) {
510	yield();
511	*when = hardclock_ticks + hz / `10`;
512	}
513	return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
514	}
515
516	/*
517	* Flush one vnode. Referenced on entry, unreferenced on return.
518	*/
519	static int
520	vflush_one(vnode_t vp, vnode_t skipvp, int flags)
521	{
522	int error;
523	struct vattr vattr;
524
525	if (vp == skipvp \|\|
526	((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
527	vrele(vp);
528	return `0`;
529	}
530	/*
531	* If WRITECLOSE is set, only flush out regular file
532	* vnodes open for writing or open and unlinked.
533	*/
534	if ((flags & WRITECLOSE)) {
535	if (vp->v_type != VREG) {
536	vrele(vp);
537	return `0`;
538	}
539	error = vn_lock(vp, LK_EXCLUSIVE);
540	if (error) {
541	KASSERT(error == ENOENT);
542	vrele(vp);
543	return `0`;
544	}
545	error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, `0`, `0`);
546	if (error == `0`)
547	error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
548	VOP_UNLOCK(vp);
549	if (error) {
550	vrele(vp);
551	return error;
552	}
553	if (vp->v_writecount == `0` && vattr.va_nlink > `0`) {
554	vrele(vp);
555	return `0`;
556	}
557	}
558	/*
559	* First try to recycle the vnode.
560	*/
561	if (vrecycle(vp))
562	return `0`;
563	/*
564	* If FORCECLOSE is set, forcibly close the vnode.
565	* For block or character devices, revert to an
566	* anonymous device. For all other files, just
567	* kill them.
568	*/
569	if (flags & FORCECLOSE) {
570	if (vp->v_usecount > `1` &&
571	(vp->v_type == VBLK \|\| vp->v_type == VCHR))
572	vcache_make_anon(vp);
573	else
574	vgone(vp);
575	return `0`;
576	}
577	vrele(vp);
578	return EBUSY;
579	}
580
581	int
582	vflush(struct mount mp, vnode_t skipvp, int flags)
583	{
584	vnode_t *vp;
585	struct vnode_iterator *marker;
586	int busy, error, when, retries = `2`;
587
588	do {
589	busy = error = when = `0`;
590
591	/*
592	* First, flush out any vnode references from the
593	* deferred vrele list.
594	*/
595	vrele_flush(mp);
596
597	vfs_vnode_iterator_init(mp, &marker);
598
599	while ((vp = vflushnext(marker, &when)) != NULL) {
600	error = vflush_one(vp, skipvp, flags);
601	if (error == EBUSY) {
602	error = `0`;
603	busy++;
604	#ifdef DEBUG
605	if (busyprt && retries == `0`)
606	vprint("vflush: busy vnode", vp);
607	#endif
608	} else if (error != `0`) {
609	break;
610	}
611	}
612
613	vfs_vnode_iterator_destroy(marker);
614	} while (error == `0` && busy > `0` && retries-- > `0`);
615
616	if (error)
617	return error;
618	if (busy)
619	return EBUSY;
620	return `0`;
621	}
622
623	/*
624	* Mount a file system.
625	*/
626
627	/*
628	* Scan all active processes to see if any of them have a current or root
629	* directory onto which the new filesystem has just been mounted. If so,
630	* replace them with the new mount point.
631	*/
632	static void
633	mount_checkdirs(vnode_t *olddp)
634	{
635	vnode_t newdp, rele1, *rele2;
636	struct cwdinfo *cwdi;
637	struct proc *p;
638	bool retry;
639
640	if (olddp->v_usecount == `1`) {
641	return;
642	}
643	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
644	panic("mount: lost mount");
645
646	do {
647	retry = false;
648	mutex_enter(proc_lock);
649	PROCLIST_FOREACH(p, &allproc) {
650	if ((cwdi = p->p_cwdi) == NULL)
651	continue;
652	/*
653	* Cannot change to the old directory any more,
654	* so even if we see a stale value it is not a
655	* problem.
656	*/
657	if (cwdi->cwdi_cdir != olddp &&
658	cwdi->cwdi_rdir != olddp)
659	continue;
660	retry = true;
661	rele1 = NULL;
662	rele2 = NULL;
663	atomic_inc_uint(&cwdi->cwdi_refcnt);
664	mutex_exit(proc_lock);
665	rw_enter(&cwdi->cwdi_lock, RW_WRITER);
666	if (cwdi->cwdi_cdir == olddp) {
667	rele1 = cwdi->cwdi_cdir;
668	vref(newdp);
669	cwdi->cwdi_cdir = newdp;
670	}
671	if (cwdi->cwdi_rdir == olddp) {
672	rele2 = cwdi->cwdi_rdir;
673	vref(newdp);
674	cwdi->cwdi_rdir = newdp;
675	}
676	rw_exit(&cwdi->cwdi_lock);
677	cwdfree(cwdi);
678	if (rele1 != NULL)
679	vrele(rele1);
680	if (rele2 != NULL)
681	vrele(rele2);
682	mutex_enter(proc_lock);
683	break;
684	}
685	mutex_exit(proc_lock);
686	} while (retry);
687
688	if (rootvnode == olddp) {
689	vrele(rootvnode);
690	vref(newdp);
691	rootvnode = newdp;
692	}
693	vput(newdp);
694	}
695
696	/*
697	* Start extended attributes
698	*/
699	static int
700	start_extattr(struct mount *mp)
701	{
702	int error;
703
704	error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, `0`, NULL);
705	if (error)
706	printf("%s: failed to start extattr: error = %d\n",
707	mp->mnt_stat.f_mntonname, error);
708
709	return error;
710	}
711
712	int
713	mount_domount(struct lwp l, vnode_t vpp, struct* vfsops *vfsops,
714	const char path, int* flags, void data, size_t data_len)
715	{
716	vnode_t vp = vpp;
717	struct mount *mp;
718	struct pathbuf *pb;
719	struct nameidata nd;
720	int error;
721
722	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
723	KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
724	if (error) {
725	vfs_delref(vfsops);
726	return error;
727	}
728
729	/ Cannot make a non-dir a mount-point (from here anyway). /
730	if (vp->v_type != VDIR) {
731	vfs_delref(vfsops);
732	return ENOTDIR;
733	}
734
735	if (flags & MNT_EXPORTED) {
736	vfs_delref(vfsops);
737	return EINVAL;
738	}
739
740	if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
741	vfs_delref(vfsops);
742	return ENOMEM;
743	}
744
745	mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
746
747	/*
748	* The underlying file system may refuse the mount for
749	* various reasons. Allow the user to force it to happen.
750	*
751	* Set the mount level flags.
752	*/
753	mp->mnt_flag = flags & (MNT_BASIC_FLAGS \| MNT_FORCE \| MNT_IGNORE);
754
755	mutex_enter(&mp->mnt_updating);
756	error = VFS_MOUNT(mp, path, data, data_len);
757	mp->mnt_flag &= ~MNT_OP_FLAGS;
758
759	if (error != `0`)
760	goto err_unmounted;
761
762	/*
763	* Validate and prepare the mount point.
764	*/
765	error = pathbuf_copyin(path, &pb);
766	if (error != `0`) {
767	goto err_mounted;
768	}
769	NDINIT(&nd, LOOKUP, FOLLOW \| LOCKLEAF \| TRYEMULROOT, pb);
770	error = namei(&nd);
771	pathbuf_destroy(pb);
772	if (error != `0`) {
773	goto err_mounted;
774	}
775	if (nd.ni_vp != vp) {
776	vput(nd.ni_vp);
777	error = EINVAL;
778	goto err_mounted;
779	}
780	if (vp->v_mountedhere != NULL) {
781	vput(nd.ni_vp);
782	error = EBUSY;
783	goto err_mounted;
784	}
785	error = vinvalbuf(vp, V_SAVE, l->l_cred, l, `0`, `0`);
786	if (error != `0`) {
787	vput(nd.ni_vp);
788	goto err_mounted;
789	}
790
791	/*
792	* Put the new filesystem on the mount list after root.
793	*/
794	cache_purge(vp);
795	mp->mnt_iflag &= ~IMNT_WANTRDWR;
796
797	mountlist_append(mp);
798	if ((mp->mnt_flag & (MNT_RDONLY \| MNT_ASYNC)) == `0`)
799	vfs_syncer_add_to_worklist(mp);
800	vp->v_mountedhere = mp;
801	vput(nd.ni_vp);
802
803	mount_checkdirs(vp);
804	mutex_exit(&mp->mnt_updating);
805
806	/ Hold an additional reference to the mount across VFS_START(). /
807	vfs_ref(mp);
808	(void) VFS_STATVFS(mp, &mp->mnt_stat);
809	error = VFS_START(mp, `0`);
810	if (error) {
811	vrele(vp);
812	} else if (flags & MNT_EXTATTR) {
813	(void)start_extattr(mp);
814	}
815	/ Drop reference held for VFS_START(). /
816	vfs_rele(mp);
817	*vpp = NULL;
818	return error;
819
820	err_mounted:
821	if (VFS_UNMOUNT(mp, MNT_FORCE) != `0`)
822	panic("Unmounting fresh file system failed");
823
824	err_unmounted:
825	vp->v_mountedhere = NULL;
826	mutex_exit(&mp->mnt_updating);
827	vfs_rele(mp);
828
829	return error;
830	}
831
832	/*
833	* Do the actual file system unmount. File system is assumed to have
834	* been locked by the caller.
835	*
836	* => Caller hold reference to the mount, explicitly for dounmount().
837	*/
838	int
839	dounmount(struct mount mp, int* flags, struct lwp *l)
840	{
841	vnode_t *coveredvp;
842	int error, async, used_syncer, used_extattr;
843	const bool was_suspended = fstrans_is_owner(mp);
844
845	#if NVERIEXEC > 0
846	error = veriexec_unmountchk(mp);
847	if (error)
848	return (error);
849	#endif /* NVERIEXEC > 0 */
850
851	if (!was_suspended) {
852	error = vfs_suspend(mp, `0`);
853	if (error) {
854	return error;
855	}
856	}
857
858	KASSERT((mp->mnt_iflag & IMNT_GONE) == `0`);
859
860	used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != `0`;
861	used_extattr = mp->mnt_flag & MNT_EXTATTR;
862
863	mp->mnt_iflag \|= IMNT_UNMOUNT;
864	mutex_enter(&mp->mnt_updating);
865	async = mp->mnt_flag & MNT_ASYNC;
866	mp->mnt_flag &= ~MNT_ASYNC;
867	cache_purgevfs(mp); / remove cache entries for this file sys /
868	if (used_syncer)
869	vfs_syncer_remove_from_worklist(mp);
870	error = `0`;
871	if (((mp->mnt_flag & MNT_RDONLY) == `0`) && ((flags & MNT_FORCE) == `0`)) {
872	error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
873	}
874	if (error == `0` \|\| (flags & MNT_FORCE)) {
875	error = VFS_UNMOUNT(mp, flags);
876	}
877	if (error) {
878	mp->mnt_iflag &= ~IMNT_UNMOUNT;
879	if ((mp->mnt_flag & (MNT_RDONLY \| MNT_ASYNC)) == `0`)
880	vfs_syncer_add_to_worklist(mp);
881	mp->mnt_flag \|= async;
882	mutex_exit(&mp->mnt_updating);
883	if (!was_suspended)
884	vfs_resume(mp);
885	if (used_extattr) {
886	if (start_extattr(mp) != `0`)
887	mp->mnt_flag &= ~MNT_EXTATTR;
888	else
889	mp->mnt_flag \|= MNT_EXTATTR;
890	}
891	return (error);
892	}
893	mutex_exit(&mp->mnt_updating);
894
895	/*
896	* mark filesystem as gone to prevent further umounts
897	* after mnt_umounting lock is gone, this also prevents
898	* vfs_busy() from succeeding.
899	*/
900	mp->mnt_iflag \|= IMNT_GONE;
901	if (!was_suspended)
902	vfs_resume(mp);
903
904	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
905	vn_lock(coveredvp, LK_EXCLUSIVE \| LK_RETRY);
906	coveredvp->v_mountedhere = NULL;
907	VOP_UNLOCK(coveredvp);
908	}
909	mountlist_remove(mp);
910	if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
911	panic("unmount: dangling vnode");
912	vfs_hooks_unmount(mp);
913
914	vfs_rele(mp); / reference from mount() /
915	if (coveredvp != NULLVP) {
916	vrele(coveredvp);
917	}
918	return (`0`);
919	}
920
921	/*
922	* Unmount all file systems.
923	* We traverse the list in reverse order under the assumption that doing so
924	* will avoid needing to worry about dependencies.
925	*/
926	bool
927	vfs_unmountall(struct lwp *l)
928	{
929
930	printf("unmounting file systems...\n");
931	return vfs_unmountall1(l, true, true);
932	}
933
934	static void
935	vfs_unmount_print(struct mount mp, const* char *pfx)
936	{
937
938	aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
939	mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
940	mp->mnt_stat.f_fstypename);
941	}
942
943	/*
944	* Return the mount with the highest generation less than "gen".
945	*/
946	static struct mount *
947	vfs_unmount_next(uint64_t gen)
948	{
949	mount_iterator_t *iter;
950	struct mount mp, nmp;
951
952	nmp = NULL;
953
954	mountlist_iterator_init(&iter);
955	while ((mp = mountlist_iterator_next(iter)) != NULL) {
956	if ((nmp == NULL \|\| mp->mnt_gen > nmp->mnt_gen) &&
957	mp->mnt_gen < gen) {
958	if (nmp != NULL)
959	vfs_rele(nmp);
960	nmp = mp;
961	vfs_ref(nmp);
962	}
963	}
964	mountlist_iterator_destroy(iter);
965
966	return nmp;
967	}
968
969	bool
970	vfs_unmount_forceone(struct lwp *l)
971	{
972	struct mount *mp;
973	int error;
974
975	mp = vfs_unmount_next(mountgen);
976	if (mp == NULL) {
977	return false;
978	}
979
980	#ifdef DEBUG
981	printf("forcefully unmounting %s (%s)...\n",
982	mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
983	#endif
984	if ((error = dounmount(mp, MNT_FORCE, l)) == `0`) {
985	vfs_unmount_print(mp, "forcefully ");
986	return true;
987	} else {
988	vfs_rele(mp);
989	}
990
991	#ifdef DEBUG
992	printf("forceful unmount of %s failed with error %d\n",
993	mp->mnt_stat.f_mntonname, error);
994	#endif
995
996	return false;
997	}
998
999	bool
1000	vfs_unmountall1(struct lwp *l, bool force, bool verbose)
1001	{
1002	struct mount *mp;
1003	bool any_error = false, progress = false;
1004	uint64_t gen;
1005	int error;
1006
1007	gen = mountgen;
1008	for (;;) {
1009	mp = vfs_unmount_next(gen);
1010	if (mp == NULL)
1011	break;
1012	gen = mp->mnt_gen;
1013
1014	#ifdef DEBUG
1015	printf("unmounting %p %s (%s)...\n",
1016	(void *)mp, mp->mnt_stat.f_mntonname,
1017	mp->mnt_stat.f_mntfromname);
1018	#endif
1019	if ((error = dounmount(mp, force ? MNT_FORCE : `0`, l)) == `0`) {
1020	vfs_unmount_print(mp, "");
1021	progress = true;
1022	} else {
1023	vfs_rele(mp);
1024	if (verbose) {
1025	printf("unmount of %s failed with error %d\n",
1026	mp->mnt_stat.f_mntonname, error);
1027	}
1028	any_error = true;
1029	}
1030	}
1031	if (verbose) {
1032	printf("unmounting done\n");
1033	}
1034	if (any_error && verbose) {
1035	printf("WARNING: some file systems would not unmount\n");
1036	}
1037	return progress;
1038	}
1039
1040	void
1041	vfs_sync_all(struct lwp *l)
1042	{
1043	printf("syncing disks... ");
1044
1045	/ remove user processes from run queue /
1046	suspendsched();
1047	(void)spl0();
1048
1049	/ avoid coming back this way again if we panic. /
1050	doing_shutdown = `1`;
1051
1052	do_sys_sync(l);
1053
1054	/ Wait for sync to finish. /
1055	if (buf_syncwait() != `0`) {
1056	#if defined(DDB) && defined(DEBUG_HALT_BUSY)
1057	Debugger();
1058	#endif
1059	printf("giving up\n");
1060	return;
1061	} else
1062	printf("done\n");
1063	}
1064
1065	/*
1066	* Sync and unmount file systems before shutting down.
1067	*/
1068	void
1069	vfs_shutdown(void)
1070	{
1071	lwp_t *l = curlwp;
1072
1073	vfs_sync_all(l);
1074
1075	/*
1076	* If we have paniced - do not make the situation potentially
1077	* worse by unmounting the file systems.
1078	*/
1079	if (panicstr != NULL) {
1080	return;
1081	}
1082
1083	/ Unmount file systems. /
1084	vfs_unmountall(l);
1085	}
1086
1087	/*
1088	* Print a list of supported file system types (used by vfs_mountroot)
1089	*/
1090	static void
1091	vfs_print_fstypes(void)
1092	{
1093	struct vfsops *v;
1094	int cnt = `0`;
1095
1096	mutex_enter(&vfs_list_lock);
1097	LIST_FOREACH(v, &vfs_list, vfs_list)
1098	++cnt;
1099	mutex_exit(&vfs_list_lock);
1100
1101	if (cnt == `0`) {
1102	printf("WARNING: No file system modules have been loaded.\n");
1103	return;
1104	}
1105
1106	printf("Supported file systems:");
1107	mutex_enter(&vfs_list_lock);
1108	LIST_FOREACH(v, &vfs_list, vfs_list) {
1109	printf(" %s", v->vfs_name);
1110	}
1111	mutex_exit(&vfs_list_lock);
1112	printf("\n");
1113	}
1114
1115	/*
1116	* Mount the root file system. If the operator didn't specify a
1117	* file system to use, try all possible file systems until one
1118	* succeeds.
1119	*/
1120	int
1121	vfs_mountroot(void)
1122	{
1123	struct vfsops *v;
1124	int error = ENODEV;
1125
1126	if (root_device == NULL)
1127	panic("vfs_mountroot: root device unknown");
1128
1129	switch (device_class(root_device)) {
1130	case DV_IFNET:
1131	if (rootdev != NODEV)
1132	panic("vfs_mountroot: rootdev set for DV_IFNET "
1133	"(0x%llx -> %llu,%llu)",
1134	(unsigned long long)rootdev,
1135	(unsigned long long)major(rootdev),
1136	(unsigned long long)minor(rootdev));
1137	break;
1138
1139	case DV_DISK:
1140	if (rootdev == NODEV)
1141	panic("vfs_mountroot: rootdev not set for DV_DISK");
1142	if (bdevvp(rootdev, &rootvp))
1143	panic("vfs_mountroot: can't get vnode for rootdev");
1144	error = VOP_OPEN(rootvp, FREAD, FSCRED);
1145	if (error) {
1146	printf("vfs_mountroot: can't open root device\n");
1147	return (error);
1148	}
1149	break;
1150
1151	case DV_VIRTUAL:
1152	break;
1153
1154	default:
1155	printf("%s: inappropriate for root file system\n",
1156	device_xname(root_device));
1157	return (ENODEV);
1158	}
1159
1160	/*
1161	* If user specified a root fs type, use it. Make sure the
1162	* specified type exists and has a mount_root()
1163	*/
1164	if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != `0`) {
1165	v = vfs_getopsbyname(rootfstype);
1166	error = EFTYPE;
1167	if (v != NULL) {
1168	if (v->vfs_mountroot != NULL) {
1169	error = (v->vfs_mountroot)();
1170	}
1171	v->vfs_refcount--;
1172	}
1173	goto done;
1174	}
1175
1176	/*
1177	* Try each file system currently configured into the kernel.
1178	*/
1179	mutex_enter(&vfs_list_lock);
1180	LIST_FOREACH(v, &vfs_list, vfs_list) {
1181	if (v->vfs_mountroot == NULL)
1182	continue;
1183	#ifdef DEBUG
1184	aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1185	#endif
1186	v->vfs_refcount++;
1187	mutex_exit(&vfs_list_lock);
1188	error = (*v->vfs_mountroot)();
1189	mutex_enter(&vfs_list_lock);
1190	v->vfs_refcount--;
1191	if (!error) {
1192	aprint_normal("root file system type: %s\n",
1193	v->vfs_name);
1194	break;
1195	}
1196	}
1197	mutex_exit(&vfs_list_lock);
1198
1199	if (v == NULL) {
1200	vfs_print_fstypes();
1201	printf("no file system for %s", device_xname(root_device));
1202	if (device_class(root_device) == DV_DISK)
1203	printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1204	printf("\n");
1205	error = EFTYPE;
1206	}
1207
1208	done:
1209	if (error && device_class(root_device) == DV_DISK) {
1210	VOP_CLOSE(rootvp, FREAD, FSCRED);
1211	vrele(rootvp);
1212	}
1213	if (error == `0`) {
1214	mount_iterator_t *iter;
1215	struct mount *mp;
1216	extern struct cwdinfo cwdi0;
1217
1218	mountlist_iterator_init(&iter);
1219	mp = mountlist_iterator_next(iter);
1220	KASSERT(mp != NULL);
1221	mountlist_iterator_destroy(iter);
1222
1223	mp->mnt_flag \|= MNT_ROOTFS;
1224	mp->mnt_op->vfs_refcount++;
1225
1226	/*
1227	* Get the vnode for '/'. Set cwdi0.cwdi_cdir to
1228	* reference it.
1229	*/
1230	error = VFS_ROOT(mp, &rootvnode);
1231	if (error)
1232	panic("cannot find root vnode, error=%d", error);
1233	cwdi0.cwdi_cdir = rootvnode;
1234	vref(cwdi0.cwdi_cdir);
1235	VOP_UNLOCK(rootvnode);
1236	cwdi0.cwdi_rdir = NULL;
1237
1238	/*
1239	* Now that root is mounted, we can fixup initproc's CWD
1240	* info. All other processes are kthreads, which merely
1241	* share proc0's CWD info.
1242	*/
1243	initproc->p_cwdi->cwdi_cdir = rootvnode;
1244	vref(initproc->p_cwdi->cwdi_cdir);
1245	initproc->p_cwdi->cwdi_rdir = NULL;
1246	/*
1247	* Enable loading of modules from the filesystem
1248	*/
1249	module_load_vfs_init();
1250
1251	}
1252	return (error);
1253	}
1254
1255	/*
1256	* mount_specific_key_create --
1257	* Create a key for subsystem mount-specific data.
1258	*/
1259	int
1260	mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1261	{
1262
1263	return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
1264	}
1265
1266	/*
1267	* mount_specific_key_delete --
1268	* Delete a key for subsystem mount-specific data.
1269	*/
1270	void
1271	mount_specific_key_delete(specificdata_key_t key)
1272	{
1273
1274	specificdata_key_delete(mount_specificdata_domain, key);
1275	}
1276
1277	/*
1278	* mount_initspecific --
1279	* Initialize a mount's specificdata container.
1280	*/
1281	void
1282	mount_initspecific(struct mount *mp)
1283	{
1284	int error __diagused;
1285
1286	error = specificdata_init(mount_specificdata_domain,
1287	&mp->mnt_specdataref);
1288	KASSERT(error == `0`);
1289	}
1290
1291	/*
1292	* mount_finispecific --
1293	* Finalize a mount's specificdata container.
1294	*/
1295	void
1296	mount_finispecific(struct mount *mp)
1297	{
1298
1299	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
1300	}
1301
1302	/*
1303	* mount_getspecific --
1304	* Return mount-specific data corresponding to the specified key.
1305	*/
1306	void *
1307	mount_getspecific(struct mount *mp, specificdata_key_t key)
1308	{
1309
1310	return specificdata_getspecific(mount_specificdata_domain,
1311	&mp->mnt_specdataref, key);
1312	}
1313
1314	/*
1315	* mount_setspecific --
1316	* Set mount-specific data corresponding to the specified key.
1317	*/
1318	void
1319	mount_setspecific(struct mount mp, specificdata_key_t key, void* *data)
1320	{
1321
1322	specificdata_setspecific(mount_specificdata_domain,
1323	&mp->mnt_specdataref, key, data);
1324	}
1325
1326	/*
1327	* Check to see if a filesystem is mounted on a block device.
1328	*/
1329	int
1330	vfs_mountedon(vnode_t *vp)
1331	{
1332	vnode_t *vq;
1333	int error = `0`;
1334
1335	if (vp->v_type != VBLK)
1336	return ENOTBLK;
1337	if (spec_node_getmountedfs(vp) != NULL)
1338	return EBUSY;
1339	if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == `0`) {
1340	if (spec_node_getmountedfs(vq) != NULL)
1341	error = EBUSY;
1342	vrele(vq);
1343	}
1344
1345	return error;
1346	}
1347
1348	/*
1349	* Check if a device pointed to by vp is mounted.
1350	*
1351	* Returns:
1352	* EINVAL if it's not a disk
1353	* EBUSY if it's a disk and mounted
1354	* 0 if it's a disk and not mounted
1355	*/
1356	int
1357	rawdev_mounted(vnode_t vp, vnode_t *bvpp)
1358	{
1359	vnode_t *bvp;
1360	dev_t dev;
1361	int d_type;
1362
1363	bvp = NULL;
1364	d_type = D_OTHER;
1365
1366	if (iskmemvp(vp))
1367	return EINVAL;
1368
1369	switch (vp->v_type) {
1370	case VCHR: {
1371	const struct cdevsw *cdev;
1372
1373	dev = vp->v_rdev;
1374	cdev = cdevsw_lookup(dev);
1375	if (cdev != NULL) {
1376	dev_t blkdev;
1377
1378	blkdev = devsw_chr2blk(dev);
1379	if (blkdev != NODEV) {
1380	if (vfinddev(blkdev, VBLK, &bvp) != `0`) {
1381	d_type = (cdev->d_flag & D_TYPEMASK);
1382	/ XXX: what if bvp disappears? /
1383	vrele(bvp);
1384	}
1385	}
1386	}
1387
1388	break;
1389	}
1390
1391	case VBLK: {
1392	const struct bdevsw *bdev;
1393
1394	dev = vp->v_rdev;
1395	bdev = bdevsw_lookup(dev);
1396	if (bdev != NULL)
1397	d_type = (bdev->d_flag & D_TYPEMASK);
1398
1399	bvp = vp;
1400
1401	break;
1402	}
1403
1404	default:
1405	break;
1406	}
1407
1408	if (d_type != D_DISK)
1409	return EINVAL;
1410
1411	if (bvpp != NULL)
1412	*bvpp = bvp;
1413
1414	/*
1415	* XXX: This is bogus. We should be failing the request
1416	* XXX: not only if this specific slice is mounted, but
1417	* XXX: if it's on a disk with any other mounted slice.
1418	*/
1419	if (vfs_mountedon(bvp))
1420	return EBUSY;
1421
1422	return `0`;
1423	}
1424
1425	/*
1426	* Make a 'unique' number from a mount type name.
1427	*/
1428	long
1429	makefstype(const char *type)
1430	{
1431	long rv;
1432
1433	for (rv = `0`; *type; type++) {
1434	rv <<= `2`;
1435	rv ^= *type;
1436	}
1437	return rv;
1438	}
1439
1440	static struct mountlist_entry *
1441	mountlist_alloc(enum mountlist_type type, struct mount *mp)
1442	{
1443	struct mountlist_entry *me;
1444
1445	me = kmem_zalloc(sizeof(*me), KM_SLEEP);
1446	me->me_mount = mp;
1447	me->me_type = type;
1448
1449	return me;
1450	}
1451
1452	static void
1453	mountlist_free(struct mountlist_entry *me)
1454	{
1455
1456	kmem_free(me, sizeof(*me));
1457	}
1458
1459	void
1460	mountlist_iterator_init(mount_iterator_t **mip)
1461	{
1462	struct mountlist_entry *me;
1463
1464	me = mountlist_alloc(ME_MARKER, NULL);
1465	mutex_enter(&mountlist_lock);
1466	TAILQ_INSERT_HEAD(&mountlist, me, me_list);
1467	mutex_exit(&mountlist_lock);
1468	mip = (mount_iterator_t )me;
1469	}
1470
1471	void
1472	mountlist_iterator_destroy(mount_iterator_t *mi)
1473	{
1474	struct mountlist_entry *marker = &mi->mi_entry;
1475
1476	if (marker->me_mount != NULL)
1477	vfs_unbusy(marker->me_mount);
1478
1479	mutex_enter(&mountlist_lock);
1480	TAILQ_REMOVE(&mountlist, marker, me_list);
1481	mutex_exit(&mountlist_lock);
1482
1483	mountlist_free(marker);
1484
1485	}
1486
1487	/*
1488	* Return the next mount or NULL for this iterator.
1489	* Mark it busy on success.
1490	*/
1491	static inline struct mount *
1492	_mountlist_iterator_next(mount_iterator_t *mi, bool wait)
1493	{
1494	struct mountlist_entry me, marker = &mi->mi_entry;
1495	struct mount *mp;
1496	int error;
1497
1498	if (marker->me_mount != NULL) {
1499	vfs_unbusy(marker->me_mount);
1500	marker->me_mount = NULL;
1501	}
1502
1503	mutex_enter(&mountlist_lock);
1504	for (;;) {
1505	KASSERT(marker->me_type == ME_MARKER);
1506
1507	me = TAILQ_NEXT(marker, me_list);
1508	if (me == NULL) {
1509	/ End of list: keep marker and return. /
1510	mutex_exit(&mountlist_lock);
1511	return NULL;
1512	}
1513	TAILQ_REMOVE(&mountlist, marker, me_list);
1514	TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
1515
1516	/ Skip other markers. /
1517	if (me->me_type != ME_MOUNT)
1518	continue;
1519
1520	/ Take an initial reference for vfs_busy() below. /
1521	mp = me->me_mount;
1522	KASSERT(mp != NULL);
1523	vfs_ref(mp);
1524	mutex_exit(&mountlist_lock);
1525
1526	/ Try to mark this mount busy and return on success. /
1527	if (wait)
1528	error = vfs_busy(mp);
1529	else
1530	error = vfs_trybusy(mp);
1531	if (error == `0`) {
1532	vfs_rele(mp);
1533	marker->me_mount = mp;
1534	return mp;
1535	}
1536	vfs_rele(mp);
1537	mutex_enter(&mountlist_lock);
1538	}
1539	}
1540
1541	struct mount *
1542	mountlist_iterator_next(mount_iterator_t *mi)
1543	{
1544
1545	return _mountlist_iterator_next(mi, true);
1546	}
1547
1548	struct mount *
1549	mountlist_iterator_trynext(mount_iterator_t *mi)
1550	{
1551
1552	return _mountlist_iterator_next(mi, false);
1553	}
1554
1555	/*
1556	* Attach new mount to the end of the mount list.
1557	*/
1558	void
1559	mountlist_append(struct mount *mp)
1560	{
1561	struct mountlist_entry *me;
1562
1563	me = mountlist_alloc(ME_MOUNT, mp);
1564	mutex_enter(&mountlist_lock);
1565	TAILQ_INSERT_TAIL(&mountlist, me, me_list);
1566	mutex_exit(&mountlist_lock);
1567	}
1568
1569	/*
1570	* Remove mount from mount list.
1571	/void*
1572	mountlist_remove(struct mount *mp)
1573	{
1574	struct mountlist_entry *me;
1575
1576	mutex_enter(&mountlist_lock);
1577	TAILQ_FOREACH(me, &mountlist, me_list)
1578	if (me->me_type == ME_MOUNT && me->me_mount == mp)
1579	break;
1580	KASSERT(me != NULL);
1581	TAILQ_REMOVE(&mountlist, me, me_list);
1582	mutex_exit(&mountlist_lock);
1583	mountlist_free(me);
1584	}
1585
1586	/*
1587	* Unlocked variant to traverse the mountlist.
1588	* To be used from DDB only.
1589	*/
1590	struct mount *
1591	_mountlist_next(struct mount *mp)
1592	{
1593	struct mountlist_entry *me;
1594
1595	if (mp == NULL) {
1596	me = TAILQ_FIRST(&mountlist);
1597	} else {
1598	TAILQ_FOREACH(me, &mountlist, me_list)
1599	if (me->me_type == ME_MOUNT && me->me_mount == mp)
1600	break;
1601	if (me != NULL)
1602	me = TAILQ_NEXT(me, me_list);
1603	}
1604
1605	while (me != NULL && me->me_type != ME_MOUNT)
1606	me = TAILQ_NEXT(me, me_list);
1607
1608	return (me ? me->me_mount : NULL);
1609	}
1610

Browse the source code of netbsd/sys/kern/vfs_mount.c