1 | /* |
2 | * CDDL HEADER START |
3 | * |
4 | * The contents of this file are subject to the terms of the |
5 | * Common Development and Distribution License (the "License"). |
6 | * You may not use this file except in compliance with the License. |
7 | * |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
9 | * or http://www.opensolaris.org/os/licensing. |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. |
12 | * |
13 | * When distributing Covered Code, include this CDDL HEADER in each |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
15 | * If applicable, add the following below this CDDL HEADER, with the |
16 | * fields enclosed by brackets "[]" replaced with your own identifying |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
18 | * |
19 | * CDDL HEADER END |
20 | */ |
21 | /* |
22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
23 | * Copyright (c) 2012, 2015 by Delphix. All rights reserved. |
24 | * Copyright (c) 2014 Integros [integros.com] |
25 | */ |
26 | |
27 | #ifndef _SYS_FS_ZFS_ZNODE_H |
28 | #define _SYS_FS_ZFS_ZNODE_H |
29 | |
30 | #ifdef _KERNEL |
31 | #include <sys/list.h> |
32 | #include <sys/dmu.h> |
33 | #include <sys/sa.h> |
34 | #include <sys/zfs_vfsops.h> |
35 | #include <sys/rrwlock.h> |
36 | #include <sys/zfs_sa.h> |
37 | #include <sys/zfs_stat.h> |
38 | #endif |
39 | #include <sys/zfs_acl.h> |
40 | #include <sys/zil.h> |
41 | |
42 | #ifdef __NetBSD__ |
43 | #include <miscfs/genfs/genfs_node.h> |
44 | #endif |
45 | |
46 | #ifdef __cplusplus |
47 | extern "C" { |
48 | #endif |
49 | |
50 | /* |
51 | * Additional file level attributes, that are stored |
52 | * in the upper half of zp_flags |
53 | */ |
54 | #define ZFS_READONLY 0x0000000100000000 |
55 | #define ZFS_HIDDEN 0x0000000200000000 |
56 | #define ZFS_SYSTEM 0x0000000400000000 |
57 | #define ZFS_ARCHIVE 0x0000000800000000 |
58 | #define ZFS_IMMUTABLE 0x0000001000000000 |
59 | #define ZFS_NOUNLINK 0x0000002000000000 |
60 | #define ZFS_APPENDONLY 0x0000004000000000 |
61 | #define ZFS_NODUMP 0x0000008000000000 |
62 | #define ZFS_OPAQUE 0x0000010000000000 |
63 | #define ZFS_AV_QUARANTINED 0x0000020000000000 |
64 | #define ZFS_AV_MODIFIED 0x0000040000000000 |
65 | #define ZFS_REPARSE 0x0000080000000000 |
66 | #define ZFS_OFFLINE 0x0000100000000000 |
67 | #define ZFS_SPARSE 0x0000200000000000 |
68 | |
69 | #define ZFS_ATTR_SET(zp, attr, value, pflags, tx) \ |
70 | { \ |
71 | if (value) \ |
72 | pflags |= attr; \ |
73 | else \ |
74 | pflags &= ~attr; \ |
75 | VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs), \ |
76 | &pflags, sizeof (pflags), tx)); \ |
77 | } |
78 | |
79 | /* |
80 | * Define special zfs pflags |
81 | */ |
82 | #define ZFS_XATTR 0x1 /* is an extended attribute */ |
83 | #define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */ |
84 | #define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */ |
85 | #define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */ |
86 | #define ZFS_ACL_PROTECTED 0x10 /* ACL protected */ |
87 | #define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */ |
88 | #define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */ |
89 | #define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */ |
90 | #define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */ |
91 | |
92 | #define SA_ZPL_ATIME(z) z->z_attr_table[ZPL_ATIME] |
93 | #define SA_ZPL_MTIME(z) z->z_attr_table[ZPL_MTIME] |
94 | #define SA_ZPL_CTIME(z) z->z_attr_table[ZPL_CTIME] |
95 | #define SA_ZPL_CRTIME(z) z->z_attr_table[ZPL_CRTIME] |
96 | #define SA_ZPL_GEN(z) z->z_attr_table[ZPL_GEN] |
97 | #define SA_ZPL_DACL_ACES(z) z->z_attr_table[ZPL_DACL_ACES] |
98 | #define SA_ZPL_XATTR(z) z->z_attr_table[ZPL_XATTR] |
99 | #define SA_ZPL_SYMLINK(z) z->z_attr_table[ZPL_SYMLINK] |
100 | #define SA_ZPL_RDEV(z) z->z_attr_table[ZPL_RDEV] |
101 | #define SA_ZPL_SCANSTAMP(z) z->z_attr_table[ZPL_SCANSTAMP] |
102 | #define SA_ZPL_UID(z) z->z_attr_table[ZPL_UID] |
103 | #define SA_ZPL_GID(z) z->z_attr_table[ZPL_GID] |
104 | #define SA_ZPL_PARENT(z) z->z_attr_table[ZPL_PARENT] |
105 | #define SA_ZPL_LINKS(z) z->z_attr_table[ZPL_LINKS] |
106 | #define SA_ZPL_MODE(z) z->z_attr_table[ZPL_MODE] |
107 | #define SA_ZPL_DACL_COUNT(z) z->z_attr_table[ZPL_DACL_COUNT] |
108 | #define SA_ZPL_FLAGS(z) z->z_attr_table[ZPL_FLAGS] |
109 | #define SA_ZPL_SIZE(z) z->z_attr_table[ZPL_SIZE] |
110 | #define SA_ZPL_ZNODE_ACL(z) z->z_attr_table[ZPL_ZNODE_ACL] |
111 | #define SA_ZPL_PAD(z) z->z_attr_table[ZPL_PAD] |
112 | |
113 | /* |
114 | * Is ID ephemeral? |
115 | */ |
116 | #define IS_EPHEMERAL(x) (x > MAXUID) |
117 | |
118 | /* |
119 | * Should we use FUIDs? |
120 | */ |
121 | #define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID && \ |
122 | spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) |
123 | #define USE_SA(version, os) (version >= ZPL_VERSION_SA && \ |
124 | spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA) |
125 | |
126 | #define MASTER_NODE_OBJ 1 |
127 | |
128 | /* |
129 | * Special attributes for master node. |
130 | * "userquota@" and "groupquota@" are also valid (from |
131 | * zfs_userquota_prop_prefixes[]). |
132 | */ |
133 | #define ZFS_FSID "FSID" |
134 | #define ZFS_UNLINKED_SET "DELETE_QUEUE" |
135 | #define ZFS_ROOT_OBJ "ROOT" |
136 | #define ZPL_VERSION_STR "VERSION" |
137 | #define ZFS_FUID_TABLES "FUID" |
138 | #define ZFS_SHARES_DIR "SHARES" |
139 | #define ZFS_SA_ATTRS "SA_ATTRS" |
140 | |
141 | /* Path component length */ |
142 | /* |
143 | * The generic fs code uses MAXNAMELEN to represent |
144 | * what the largest component length is. Unfortunately, |
145 | * this length includes the terminating NULL. ZFS needs |
146 | * to tell the users via pathconf() and statvfs() what the |
147 | * true maximum length of a component is, excluding the NULL. |
148 | */ |
149 | #define ZFS_MAXNAMELEN (MAXNAMELEN - 1) |
150 | |
151 | /* |
152 | * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in |
153 | * the directory entries. |
154 | */ |
155 | #ifndef IFTODT |
156 | #define IFTODT(mode) (((mode) & S_IFMT) >> 12) |
157 | #endif |
158 | |
159 | /* |
160 | * The directory entry has the type (currently unused on Solaris) in the |
161 | * top 4 bits, and the object number in the low 48 bits. The "middle" |
162 | * 12 bits are unused. |
163 | */ |
164 | #define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4) |
165 | #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) |
166 | |
167 | /* |
168 | * Directory entry locks control access to directory entries. |
169 | * They are used to protect creates, deletes, and renames. |
170 | * Each directory znode has a mutex and a list of locked names. |
171 | */ |
172 | #ifdef _KERNEL |
173 | typedef struct zfs_dirlock { |
174 | char *dl_name; /* directory entry being locked */ |
175 | uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */ |
176 | uint8_t dl_namelock; /* 1 if z_name_lock is NOT held */ |
177 | uint16_t dl_namesize; /* set if dl_name was allocated */ |
178 | kcondvar_t dl_cv; /* wait for entry to be unlocked */ |
179 | struct znode *dl_dzp; /* directory znode */ |
180 | struct zfs_dirlock *dl_next; /* next in z_dirlocks list */ |
181 | } zfs_dirlock_t; |
182 | |
183 | typedef struct znode { |
184 | #ifdef __NetBSD__ |
185 | struct genfs_node z_gnode; |
186 | #endif |
187 | struct zfsvfs *z_zfsvfs; |
188 | vnode_t *z_vnode; |
189 | uint64_t z_id; /* object ID for this znode */ |
190 | #ifdef illumos |
191 | kmutex_t z_lock; /* znode modification lock */ |
192 | krwlock_t z_parent_lock; /* parent lock for directories */ |
193 | krwlock_t z_name_lock; /* "master" lock for dirent locks */ |
194 | zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ |
195 | #endif |
196 | kmutex_t z_range_lock; /* protects changes to z_range_avl */ |
197 | avl_tree_t z_range_avl; /* avl tree of file range locks */ |
198 | uint8_t z_unlinked; /* file has been unlinked */ |
199 | uint8_t z_atime_dirty; /* atime needs to be synced */ |
200 | uint8_t z_zn_prefetch; /* Prefetch znodes? */ |
201 | uint8_t z_moved; /* Has this znode been moved? */ |
202 | uint_t z_blksz; /* block size in bytes */ |
203 | uint_t z_seq; /* modification sequence number */ |
204 | uint64_t z_mapcnt; /* number of pages mapped to file */ |
205 | uint64_t z_gen; /* generation (cached) */ |
206 | uint64_t z_size; /* file size (cached) */ |
207 | uint64_t z_atime[2]; /* atime (cached) */ |
208 | uint64_t z_links; /* file links (cached) */ |
209 | uint64_t z_pflags; /* pflags (cached) */ |
210 | uint64_t z_uid; /* uid fuid (cached) */ |
211 | uint64_t z_gid; /* gid fuid (cached) */ |
212 | mode_t z_mode; /* mode (cached) */ |
213 | uint32_t z_sync_cnt; /* synchronous open count */ |
214 | kmutex_t z_acl_lock; /* acl data lock */ |
215 | zfs_acl_t *z_acl_cached; /* cached acl */ |
216 | list_node_t z_link_node; /* all znodes in fs link */ |
217 | sa_handle_t *z_sa_hdl; /* handle to sa data */ |
218 | boolean_t z_is_sa; /* are we native sa? */ |
219 | #ifdef __NetBSD__ |
220 | struct lockf *z_lockf; /* head of byte-level lock list */ |
221 | #endif |
222 | } znode_t; |
223 | |
224 | |
225 | /* |
226 | * Range locking rules |
227 | * -------------------- |
228 | * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole |
229 | * file range needs to be locked as RL_WRITER. Only then can the pages be |
230 | * freed etc and zp_size reset. zp_size must be set within range lock. |
231 | * 2. For writes and punching holes (zfs_write & zfs_space) just the range |
232 | * being written or freed needs to be locked as RL_WRITER. |
233 | * Multiple writes at the end of the file must coordinate zp_size updates |
234 | * to ensure data isn't lost. A compare and swap loop is currently used |
235 | * to ensure the file size is at least the offset last written. |
236 | * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being |
237 | * read needs to be locked as RL_READER. A check against zp_size can then |
238 | * be made for reading beyond end of file. |
239 | */ |
240 | |
241 | /* |
242 | * Convert between znode pointers and vnode pointers |
243 | */ |
244 | #ifdef DEBUG |
245 | static __inline vnode_t * |
246 | ZTOV(znode_t *zp) |
247 | { |
248 | vnode_t *vp = zp->z_vnode; |
249 | |
250 | ASSERT(vp == NULL || vp->v_data == NULL || vp->v_data == zp); |
251 | return (vp); |
252 | } |
253 | static __inline znode_t * |
254 | VTOZ(vnode_t *vp) |
255 | { |
256 | znode_t *zp = (znode_t *)vp->v_data; |
257 | |
258 | ASSERT(zp == NULL || zp->z_vnode == NULL || zp->z_vnode == vp); |
259 | return (zp); |
260 | } |
261 | #else |
262 | #define ZTOV(ZP) ((ZP)->z_vnode) |
263 | #define VTOZ(VP) ((znode_t *)(VP)->v_data) |
264 | #endif |
265 | |
266 | /* Called on entry to each ZFS vnode and vfs operation */ |
267 | #define ZFS_ENTER(zfsvfs) \ |
268 | { \ |
269 | rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \ |
270 | if ((zfsvfs)->z_unmounted) { \ |
271 | ZFS_EXIT(zfsvfs); \ |
272 | return (EIO); \ |
273 | } \ |
274 | } |
275 | |
276 | /* Must be called before exiting the vop */ |
277 | #define ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG) |
278 | |
279 | /* Verifies the znode is valid */ |
280 | #define ZFS_VERIFY_ZP(zp) \ |
281 | if ((zp)->z_sa_hdl == NULL) { \ |
282 | ZFS_EXIT((zp)->z_zfsvfs); \ |
283 | return (EIO); \ |
284 | } \ |
285 | |
286 | /* |
287 | * Macros for dealing with dmu_buf_hold |
288 | */ |
289 | #define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1)) |
290 | #define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \ |
291 | (&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]) |
292 | #define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \ |
293 | mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) |
294 | #define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \ |
295 | mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) |
296 | #define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \ |
297 | mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) |
298 | |
299 | /* Encode ZFS stored time values from a struct timespec */ |
300 | #define ZFS_TIME_ENCODE(tp, stmp) \ |
301 | { \ |
302 | (stmp)[0] = (uint64_t)(tp)->tv_sec; \ |
303 | (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ |
304 | } |
305 | |
306 | /* Decode ZFS stored time values to a struct timespec */ |
307 | #define ZFS_TIME_DECODE(tp, stmp) \ |
308 | { \ |
309 | (tp)->tv_sec = (time_t)(stmp)[0]; \ |
310 | (tp)->tv_nsec = (long)(stmp)[1]; \ |
311 | } |
312 | |
313 | /* |
314 | * Timestamp defines |
315 | */ |
316 | #define ACCESSED (AT_ATIME) |
317 | #define STATE_CHANGED (AT_CTIME) |
318 | #define CONTENT_MODIFIED (AT_MTIME | AT_CTIME) |
319 | |
320 | #define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \ |
321 | if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \ |
322 | zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE); |
323 | |
324 | extern int zfs_init_fs(zfsvfs_t *, znode_t **); |
325 | extern void zfs_set_dataprop(objset_t *); |
326 | extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *, |
327 | dmu_tx_t *tx); |
328 | extern void zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2], |
329 | uint64_t [2], boolean_t); |
330 | extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *); |
331 | extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t); |
332 | extern void zfs_znode_init(void); |
333 | extern void zfs_znode_fini(void); |
334 | #ifdef __NetBSD__ |
335 | extern int zfs_loadvnode(struct mount *, struct vnode *, |
336 | const void *, size_t, const void **); |
337 | extern int zfs_newvnode(struct mount *, struct vnode *, struct vnode *, |
338 | struct vattr *, kauth_cred_t, void *, size_t *, const void **); |
339 | extern int zfs_zget_cleaner(zfsvfs_t *, uint64_t, znode_t **); |
340 | #endif |
341 | extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **); |
342 | extern int zfs_rezget(znode_t *); |
343 | extern void zfs_zinactive(znode_t *); |
344 | extern void zfs_znode_delete(znode_t *, dmu_tx_t *); |
345 | extern void zfs_znode_free(znode_t *); |
346 | extern void zfs_remove_op_tables(); |
347 | extern int zfs_create_op_tables(); |
348 | extern dev_t zfs_cmpldev(uint64_t); |
349 | extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value); |
350 | extern int zfs_get_stats(objset_t *os, nvlist_t *nv); |
351 | extern void zfs_znode_dmu_fini(znode_t *); |
352 | |
353 | extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
354 | znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *, |
355 | vattr_t *vap); |
356 | extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, |
357 | vattr_t *vap); |
358 | extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
359 | znode_t *dzp, char *name, uint64_t foid); |
360 | #define ZFS_NO_OBJECT 0 /* no object id */ |
361 | extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
362 | znode_t *dzp, znode_t *zp, char *name); |
363 | extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
364 | znode_t *dzp, znode_t *zp, char *name, char *link); |
365 | extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
366 | znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp); |
367 | extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
368 | znode_t *zp, offset_t off, ssize_t len, int ioflag); |
369 | extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
370 | znode_t *zp, uint64_t off, uint64_t len); |
371 | extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
372 | znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp); |
373 | #ifndef ZFS_NO_ACL |
374 | extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, |
375 | vsecattr_t *vsecp, zfs_fuid_info_t *fuidp); |
376 | #endif |
377 | extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx); |
378 | extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx); |
379 | extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx); |
380 | |
381 | extern caddr_t zfs_map_page(page_t *, enum seg_rw); |
382 | extern void zfs_unmap_page(page_t *, caddr_t); |
383 | |
384 | extern zil_get_data_t zfs_get_data; |
385 | extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; |
386 | extern int zfsfstype; |
387 | |
388 | extern int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf); |
389 | |
390 | #endif /* _KERNEL */ |
391 | |
392 | extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); |
393 | |
394 | #ifdef __cplusplus |
395 | } |
396 | #endif |
397 | |
398 | #endif /* _SYS_FS_ZFS_ZNODE_H */ |
399 | |