| 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
| 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
| 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
| 21 | /* |
| 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
| 23 | * Copyright (c) 2012, 2015 by Delphix. All rights reserved. |
| 24 | * Copyright (c) 2014 Integros [integros.com] |
| 25 | */ |
| 26 | |
| 27 | #ifndef _SYS_FS_ZFS_ZNODE_H |
| 28 | #define _SYS_FS_ZFS_ZNODE_H |
| 29 | |
| 30 | #ifdef _KERNEL |
| 31 | #include <sys/list.h> |
| 32 | #include <sys/dmu.h> |
| 33 | #include <sys/sa.h> |
| 34 | #include <sys/zfs_vfsops.h> |
| 35 | #include <sys/rrwlock.h> |
| 36 | #include <sys/zfs_sa.h> |
| 37 | #include <sys/zfs_stat.h> |
| 38 | #endif |
| 39 | #include <sys/zfs_acl.h> |
| 40 | #include <sys/zil.h> |
| 41 | |
| 42 | #ifdef __NetBSD__ |
| 43 | #include <miscfs/genfs/genfs_node.h> |
| 44 | #endif |
| 45 | |
| 46 | #ifdef __cplusplus |
| 47 | extern "C" { |
| 48 | #endif |
| 49 | |
| 50 | /* |
| 51 | * Additional file level attributes, that are stored |
| 52 | * in the upper half of zp_flags |
| 53 | */ |
| 54 | #define ZFS_READONLY 0x0000000100000000 |
| 55 | #define ZFS_HIDDEN 0x0000000200000000 |
| 56 | #define ZFS_SYSTEM 0x0000000400000000 |
| 57 | #define ZFS_ARCHIVE 0x0000000800000000 |
| 58 | #define ZFS_IMMUTABLE 0x0000001000000000 |
| 59 | #define ZFS_NOUNLINK 0x0000002000000000 |
| 60 | #define ZFS_APPENDONLY 0x0000004000000000 |
| 61 | #define ZFS_NODUMP 0x0000008000000000 |
| 62 | #define ZFS_OPAQUE 0x0000010000000000 |
| 63 | #define ZFS_AV_QUARANTINED 0x0000020000000000 |
| 64 | #define ZFS_AV_MODIFIED 0x0000040000000000 |
| 65 | #define ZFS_REPARSE 0x0000080000000000 |
| 66 | #define ZFS_OFFLINE 0x0000100000000000 |
| 67 | #define ZFS_SPARSE 0x0000200000000000 |
| 68 | |
| 69 | #define ZFS_ATTR_SET(zp, attr, value, pflags, tx) \ |
| 70 | { \ |
| 71 | if (value) \ |
| 72 | pflags |= attr; \ |
| 73 | else \ |
| 74 | pflags &= ~attr; \ |
| 75 | VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs), \ |
| 76 | &pflags, sizeof (pflags), tx)); \ |
| 77 | } |
| 78 | |
| 79 | /* |
| 80 | * Define special zfs pflags |
| 81 | */ |
| 82 | #define ZFS_XATTR 0x1 /* is an extended attribute */ |
| 83 | #define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */ |
| 84 | #define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */ |
| 85 | #define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */ |
| 86 | #define ZFS_ACL_PROTECTED 0x10 /* ACL protected */ |
| 87 | #define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */ |
| 88 | #define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */ |
| 89 | #define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */ |
| 90 | #define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */ |
| 91 | |
| 92 | #define SA_ZPL_ATIME(z) z->z_attr_table[ZPL_ATIME] |
| 93 | #define SA_ZPL_MTIME(z) z->z_attr_table[ZPL_MTIME] |
| 94 | #define SA_ZPL_CTIME(z) z->z_attr_table[ZPL_CTIME] |
| 95 | #define SA_ZPL_CRTIME(z) z->z_attr_table[ZPL_CRTIME] |
| 96 | #define SA_ZPL_GEN(z) z->z_attr_table[ZPL_GEN] |
| 97 | #define SA_ZPL_DACL_ACES(z) z->z_attr_table[ZPL_DACL_ACES] |
| 98 | #define SA_ZPL_XATTR(z) z->z_attr_table[ZPL_XATTR] |
| 99 | #define SA_ZPL_SYMLINK(z) z->z_attr_table[ZPL_SYMLINK] |
| 100 | #define SA_ZPL_RDEV(z) z->z_attr_table[ZPL_RDEV] |
| 101 | #define SA_ZPL_SCANSTAMP(z) z->z_attr_table[ZPL_SCANSTAMP] |
| 102 | #define SA_ZPL_UID(z) z->z_attr_table[ZPL_UID] |
| 103 | #define SA_ZPL_GID(z) z->z_attr_table[ZPL_GID] |
| 104 | #define SA_ZPL_PARENT(z) z->z_attr_table[ZPL_PARENT] |
| 105 | #define SA_ZPL_LINKS(z) z->z_attr_table[ZPL_LINKS] |
| 106 | #define SA_ZPL_MODE(z) z->z_attr_table[ZPL_MODE] |
| 107 | #define SA_ZPL_DACL_COUNT(z) z->z_attr_table[ZPL_DACL_COUNT] |
| 108 | #define SA_ZPL_FLAGS(z) z->z_attr_table[ZPL_FLAGS] |
| 109 | #define SA_ZPL_SIZE(z) z->z_attr_table[ZPL_SIZE] |
| 110 | #define SA_ZPL_ZNODE_ACL(z) z->z_attr_table[ZPL_ZNODE_ACL] |
| 111 | #define SA_ZPL_PAD(z) z->z_attr_table[ZPL_PAD] |
| 112 | |
| 113 | /* |
| 114 | * Is ID ephemeral? |
| 115 | */ |
| 116 | #define IS_EPHEMERAL(x) (x > MAXUID) |
| 117 | |
| 118 | /* |
| 119 | * Should we use FUIDs? |
| 120 | */ |
| 121 | #define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID && \ |
| 122 | spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) |
| 123 | #define USE_SA(version, os) (version >= ZPL_VERSION_SA && \ |
| 124 | spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA) |
| 125 | |
| 126 | #define MASTER_NODE_OBJ 1 |
| 127 | |
| 128 | /* |
| 129 | * Special attributes for master node. |
| 130 | * "userquota@" and "groupquota@" are also valid (from |
| 131 | * zfs_userquota_prop_prefixes[]). |
| 132 | */ |
| 133 | #define ZFS_FSID "FSID" |
| 134 | #define ZFS_UNLINKED_SET "DELETE_QUEUE" |
| 135 | #define ZFS_ROOT_OBJ "ROOT" |
| 136 | #define ZPL_VERSION_STR "VERSION" |
| 137 | #define ZFS_FUID_TABLES "FUID" |
| 138 | #define ZFS_SHARES_DIR "SHARES" |
| 139 | #define ZFS_SA_ATTRS "SA_ATTRS" |
| 140 | |
| 141 | /* Path component length */ |
| 142 | /* |
| 143 | * The generic fs code uses MAXNAMELEN to represent |
| 144 | * what the largest component length is. Unfortunately, |
| 145 | * this length includes the terminating NULL. ZFS needs |
| 146 | * to tell the users via pathconf() and statvfs() what the |
| 147 | * true maximum length of a component is, excluding the NULL. |
| 148 | */ |
| 149 | #define ZFS_MAXNAMELEN (MAXNAMELEN - 1) |
| 150 | |
| 151 | /* |
| 152 | * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in |
| 153 | * the directory entries. |
| 154 | */ |
| 155 | #ifndef IFTODT |
| 156 | #define IFTODT(mode) (((mode) & S_IFMT) >> 12) |
| 157 | #endif |
| 158 | |
| 159 | /* |
| 160 | * The directory entry has the type (currently unused on Solaris) in the |
| 161 | * top 4 bits, and the object number in the low 48 bits. The "middle" |
| 162 | * 12 bits are unused. |
| 163 | */ |
| 164 | #define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4) |
| 165 | #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) |
| 166 | |
| 167 | /* |
| 168 | * Directory entry locks control access to directory entries. |
| 169 | * They are used to protect creates, deletes, and renames. |
| 170 | * Each directory znode has a mutex and a list of locked names. |
| 171 | */ |
| 172 | #ifdef _KERNEL |
| 173 | typedef struct zfs_dirlock { |
| 174 | char *dl_name; /* directory entry being locked */ |
| 175 | uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */ |
| 176 | uint8_t dl_namelock; /* 1 if z_name_lock is NOT held */ |
| 177 | uint16_t dl_namesize; /* set if dl_name was allocated */ |
| 178 | kcondvar_t dl_cv; /* wait for entry to be unlocked */ |
| 179 | struct znode *dl_dzp; /* directory znode */ |
| 180 | struct zfs_dirlock *dl_next; /* next in z_dirlocks list */ |
| 181 | } zfs_dirlock_t; |
| 182 | |
| 183 | typedef struct znode { |
| 184 | #ifdef __NetBSD__ |
| 185 | struct genfs_node z_gnode; |
| 186 | #endif |
| 187 | struct zfsvfs *z_zfsvfs; |
| 188 | vnode_t *z_vnode; |
| 189 | uint64_t z_id; /* object ID for this znode */ |
| 190 | #ifdef illumos |
| 191 | kmutex_t z_lock; /* znode modification lock */ |
| 192 | krwlock_t z_parent_lock; /* parent lock for directories */ |
| 193 | krwlock_t z_name_lock; /* "master" lock for dirent locks */ |
| 194 | zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ |
| 195 | #endif |
| 196 | kmutex_t z_range_lock; /* protects changes to z_range_avl */ |
| 197 | avl_tree_t z_range_avl; /* avl tree of file range locks */ |
| 198 | uint8_t z_unlinked; /* file has been unlinked */ |
| 199 | uint8_t z_atime_dirty; /* atime needs to be synced */ |
| 200 | uint8_t z_zn_prefetch; /* Prefetch znodes? */ |
| 201 | uint8_t z_moved; /* Has this znode been moved? */ |
| 202 | uint_t z_blksz; /* block size in bytes */ |
| 203 | uint_t z_seq; /* modification sequence number */ |
| 204 | uint64_t z_mapcnt; /* number of pages mapped to file */ |
| 205 | uint64_t z_gen; /* generation (cached) */ |
| 206 | uint64_t z_size; /* file size (cached) */ |
| 207 | uint64_t z_atime[2]; /* atime (cached) */ |
| 208 | uint64_t z_links; /* file links (cached) */ |
| 209 | uint64_t z_pflags; /* pflags (cached) */ |
| 210 | uint64_t z_uid; /* uid fuid (cached) */ |
| 211 | uint64_t z_gid; /* gid fuid (cached) */ |
| 212 | mode_t z_mode; /* mode (cached) */ |
| 213 | uint32_t z_sync_cnt; /* synchronous open count */ |
| 214 | kmutex_t z_acl_lock; /* acl data lock */ |
| 215 | zfs_acl_t *z_acl_cached; /* cached acl */ |
| 216 | list_node_t z_link_node; /* all znodes in fs link */ |
| 217 | sa_handle_t *z_sa_hdl; /* handle to sa data */ |
| 218 | boolean_t z_is_sa; /* are we native sa? */ |
| 219 | #ifdef __NetBSD__ |
| 220 | struct lockf *z_lockf; /* head of byte-level lock list */ |
| 221 | #endif |
| 222 | } znode_t; |
| 223 | |
| 224 | |
| 225 | /* |
| 226 | * Range locking rules |
| 227 | * -------------------- |
| 228 | * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole |
| 229 | * file range needs to be locked as RL_WRITER. Only then can the pages be |
| 230 | * freed etc and zp_size reset. zp_size must be set within range lock. |
| 231 | * 2. For writes and punching holes (zfs_write & zfs_space) just the range |
| 232 | * being written or freed needs to be locked as RL_WRITER. |
| 233 | * Multiple writes at the end of the file must coordinate zp_size updates |
| 234 | * to ensure data isn't lost. A compare and swap loop is currently used |
| 235 | * to ensure the file size is at least the offset last written. |
| 236 | * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being |
| 237 | * read needs to be locked as RL_READER. A check against zp_size can then |
| 238 | * be made for reading beyond end of file. |
| 239 | */ |
| 240 | |
| 241 | /* |
| 242 | * Convert between znode pointers and vnode pointers |
| 243 | */ |
| 244 | #ifdef DEBUG |
| 245 | static __inline vnode_t * |
| 246 | ZTOV(znode_t *zp) |
| 247 | { |
| 248 | vnode_t *vp = zp->z_vnode; |
| 249 | |
| 250 | ASSERT(vp == NULL || vp->v_data == NULL || vp->v_data == zp); |
| 251 | return (vp); |
| 252 | } |
| 253 | static __inline znode_t * |
| 254 | VTOZ(vnode_t *vp) |
| 255 | { |
| 256 | znode_t *zp = (znode_t *)vp->v_data; |
| 257 | |
| 258 | ASSERT(zp == NULL || zp->z_vnode == NULL || zp->z_vnode == vp); |
| 259 | return (zp); |
| 260 | } |
| 261 | #else |
| 262 | #define ZTOV(ZP) ((ZP)->z_vnode) |
| 263 | #define VTOZ(VP) ((znode_t *)(VP)->v_data) |
| 264 | #endif |
| 265 | |
| 266 | /* Called on entry to each ZFS vnode and vfs operation */ |
| 267 | #define ZFS_ENTER(zfsvfs) \ |
| 268 | { \ |
| 269 | rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \ |
| 270 | if ((zfsvfs)->z_unmounted) { \ |
| 271 | ZFS_EXIT(zfsvfs); \ |
| 272 | return (EIO); \ |
| 273 | } \ |
| 274 | } |
| 275 | |
| 276 | /* Must be called before exiting the vop */ |
| 277 | #define ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG) |
| 278 | |
| 279 | /* Verifies the znode is valid */ |
| 280 | #define ZFS_VERIFY_ZP(zp) \ |
| 281 | if ((zp)->z_sa_hdl == NULL) { \ |
| 282 | ZFS_EXIT((zp)->z_zfsvfs); \ |
| 283 | return (EIO); \ |
| 284 | } \ |
| 285 | |
| 286 | /* |
| 287 | * Macros for dealing with dmu_buf_hold |
| 288 | */ |
| 289 | #define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1)) |
| 290 | #define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \ |
| 291 | (&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]) |
| 292 | #define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \ |
| 293 | mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) |
| 294 | #define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \ |
| 295 | mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) |
| 296 | #define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \ |
| 297 | mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) |
| 298 | |
| 299 | /* Encode ZFS stored time values from a struct timespec */ |
| 300 | #define ZFS_TIME_ENCODE(tp, stmp) \ |
| 301 | { \ |
| 302 | (stmp)[0] = (uint64_t)(tp)->tv_sec; \ |
| 303 | (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ |
| 304 | } |
| 305 | |
| 306 | /* Decode ZFS stored time values to a struct timespec */ |
| 307 | #define ZFS_TIME_DECODE(tp, stmp) \ |
| 308 | { \ |
| 309 | (tp)->tv_sec = (time_t)(stmp)[0]; \ |
| 310 | (tp)->tv_nsec = (long)(stmp)[1]; \ |
| 311 | } |
| 312 | |
| 313 | /* |
| 314 | * Timestamp defines |
| 315 | */ |
| 316 | #define ACCESSED (AT_ATIME) |
| 317 | #define STATE_CHANGED (AT_CTIME) |
| 318 | #define CONTENT_MODIFIED (AT_MTIME | AT_CTIME) |
| 319 | |
| 320 | #define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \ |
| 321 | if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \ |
| 322 | zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE); |
| 323 | |
| 324 | extern int zfs_init_fs(zfsvfs_t *, znode_t **); |
| 325 | extern void zfs_set_dataprop(objset_t *); |
| 326 | extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *, |
| 327 | dmu_tx_t *tx); |
| 328 | extern void zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2], |
| 329 | uint64_t [2], boolean_t); |
| 330 | extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *); |
| 331 | extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t); |
| 332 | extern void zfs_znode_init(void); |
| 333 | extern void zfs_znode_fini(void); |
| 334 | #ifdef __NetBSD__ |
| 335 | extern int zfs_loadvnode(struct mount *, struct vnode *, |
| 336 | const void *, size_t, const void **); |
| 337 | extern int zfs_newvnode(struct mount *, struct vnode *, struct vnode *, |
| 338 | struct vattr *, kauth_cred_t, void *, size_t *, const void **); |
| 339 | extern int zfs_zget_cleaner(zfsvfs_t *, uint64_t, znode_t **); |
| 340 | #endif |
| 341 | extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **); |
| 342 | extern int zfs_rezget(znode_t *); |
| 343 | extern void zfs_zinactive(znode_t *); |
| 344 | extern void zfs_znode_delete(znode_t *, dmu_tx_t *); |
| 345 | extern void zfs_znode_free(znode_t *); |
| 346 | extern void zfs_remove_op_tables(); |
| 347 | extern int zfs_create_op_tables(); |
| 348 | extern dev_t zfs_cmpldev(uint64_t); |
| 349 | extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value); |
| 350 | extern int zfs_get_stats(objset_t *os, nvlist_t *nv); |
| 351 | extern void zfs_znode_dmu_fini(znode_t *); |
| 352 | |
| 353 | extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
| 354 | znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *, |
| 355 | vattr_t *vap); |
| 356 | extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, |
| 357 | vattr_t *vap); |
| 358 | extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
| 359 | znode_t *dzp, char *name, uint64_t foid); |
| 360 | #define ZFS_NO_OBJECT 0 /* no object id */ |
| 361 | extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
| 362 | znode_t *dzp, znode_t *zp, char *name); |
| 363 | extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
| 364 | znode_t *dzp, znode_t *zp, char *name, char *link); |
| 365 | extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
| 366 | znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp); |
| 367 | extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
| 368 | znode_t *zp, offset_t off, ssize_t len, int ioflag); |
| 369 | extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
| 370 | znode_t *zp, uint64_t off, uint64_t len); |
| 371 | extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
| 372 | znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp); |
| 373 | #ifndef ZFS_NO_ACL |
| 374 | extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, |
| 375 | vsecattr_t *vsecp, zfs_fuid_info_t *fuidp); |
| 376 | #endif |
| 377 | extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx); |
| 378 | extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx); |
| 379 | extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx); |
| 380 | |
| 381 | extern caddr_t zfs_map_page(page_t *, enum seg_rw); |
| 382 | extern void zfs_unmap_page(page_t *, caddr_t); |
| 383 | |
| 384 | extern zil_get_data_t zfs_get_data; |
| 385 | extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; |
| 386 | extern int zfsfstype; |
| 387 | |
| 388 | extern int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf); |
| 389 | |
| 390 | #endif /* _KERNEL */ |
| 391 | |
| 392 | extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); |
| 393 | |
| 394 | #ifdef __cplusplus |
| 395 | } |
| 396 | #endif |
| 397 | |
| 398 | #endif /* _SYS_FS_ZFS_ZNODE_H */ |
| 399 | |