| 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
| 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
| 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
| 21 | |
| 22 | /* |
| 23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
| 24 | * Copyright 2011 Nexenta Systems, Inc. All rights reserved. |
| 25 | * Copyright (c) 2012, 2016 by Delphix. All rights reserved. |
| 26 | * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. |
| 27 | * Copyright 2016 Toomas Soome <tsoome@me.com> |
| 28 | */ |
| 29 | |
| 30 | #ifndef _ZIO_H |
| 31 | #define _ZIO_H |
| 32 | |
| 33 | #include <sys/zio_priority.h> |
| 34 | #include <sys/zfs_context.h> |
| 35 | #include <sys/spa.h> |
| 36 | #include <sys/txg.h> |
| 37 | #include <sys/avl.h> |
| 38 | #include <sys/kstat.h> |
| 39 | #include <sys/fs/zfs.h> |
| 40 | #include <sys/zio_impl.h> |
| 41 | |
| 42 | #ifdef __cplusplus |
| 43 | extern "C" { |
| 44 | #endif |
| 45 | |
| 46 | /* |
| 47 | * Embedded checksum |
| 48 | */ |
| 49 | #define ZEC_MAGIC 0x210da7ab10c7a11ULL |
| 50 | |
| 51 | typedef struct zio_eck { |
| 52 | uint64_t zec_magic; /* for validation, endianness */ |
| 53 | zio_cksum_t zec_cksum; /* 256-bit checksum */ |
| 54 | } zio_eck_t; |
| 55 | |
| 56 | /* |
| 57 | * Gang block headers are self-checksumming and contain an array |
| 58 | * of block pointers. |
| 59 | */ |
| 60 | #define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE |
| 61 | #define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ |
| 62 | sizeof (zio_eck_t)) / sizeof (blkptr_t)) |
| 63 | #define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ |
| 64 | sizeof (zio_eck_t) - \ |
| 65 | (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ |
| 66 | sizeof (uint64_t)) |
| 67 | |
| 68 | typedef struct zio_gbh { |
| 69 | blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; |
| 70 | uint64_t zg_filler[SPA_GBH_FILLER]; |
| 71 | zio_eck_t zg_tail; |
| 72 | } zio_gbh_phys_t; |
| 73 | |
| 74 | enum zio_checksum { |
| 75 | ZIO_CHECKSUM_INHERIT = 0, |
| 76 | ZIO_CHECKSUM_ON, |
| 77 | ZIO_CHECKSUM_OFF, |
| 78 | ZIO_CHECKSUM_LABEL, |
| 79 | , |
| 80 | ZIO_CHECKSUM_ZILOG, |
| 81 | ZIO_CHECKSUM_FLETCHER_2, |
| 82 | ZIO_CHECKSUM_FLETCHER_4, |
| 83 | ZIO_CHECKSUM_SHA256, |
| 84 | ZIO_CHECKSUM_ZILOG2, |
| 85 | ZIO_CHECKSUM_NOPARITY, |
| 86 | #ifndef __NetBSD__ |
| 87 | ZIO_CHECKSUM_SHA512, |
| 88 | ZIO_CHECKSUM_SKEIN, |
| 89 | #endif |
| 90 | #ifdef illumos |
| 91 | ZIO_CHECKSUM_EDONR, |
| 92 | #endif |
| 93 | ZIO_CHECKSUM_FUNCTIONS |
| 94 | }; |
| 95 | |
| 96 | /* |
| 97 | * The number of "legacy" compression functions which can be set on individual |
| 98 | * objects. |
| 99 | */ |
| 100 | #define ZIO_CHECKSUM_LEGACY_FUNCTIONS ZIO_CHECKSUM_ZILOG2 |
| 101 | |
| 102 | #define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4 |
| 103 | #define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON |
| 104 | |
| 105 | #define ZIO_CHECKSUM_MASK 0xffULL |
| 106 | #define ZIO_CHECKSUM_VERIFY (1 << 8) |
| 107 | |
| 108 | #define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 |
| 109 | #define ZIO_DEDUPDITTO_MIN 100 |
| 110 | |
| 111 | enum zio_compress { |
| 112 | ZIO_COMPRESS_INHERIT = 0, |
| 113 | ZIO_COMPRESS_ON, |
| 114 | ZIO_COMPRESS_OFF, |
| 115 | ZIO_COMPRESS_LZJB, |
| 116 | ZIO_COMPRESS_EMPTY, |
| 117 | ZIO_COMPRESS_GZIP_1, |
| 118 | ZIO_COMPRESS_GZIP_2, |
| 119 | ZIO_COMPRESS_GZIP_3, |
| 120 | ZIO_COMPRESS_GZIP_4, |
| 121 | ZIO_COMPRESS_GZIP_5, |
| 122 | ZIO_COMPRESS_GZIP_6, |
| 123 | ZIO_COMPRESS_GZIP_7, |
| 124 | ZIO_COMPRESS_GZIP_8, |
| 125 | ZIO_COMPRESS_GZIP_9, |
| 126 | ZIO_COMPRESS_ZLE, |
| 127 | ZIO_COMPRESS_LZ4, |
| 128 | ZIO_COMPRESS_FUNCTIONS |
| 129 | }; |
| 130 | |
| 131 | /* |
| 132 | * The number of "legacy" compression functions which can be set on individual |
| 133 | * objects. |
| 134 | */ |
| 135 | #define ZIO_COMPRESS_LEGACY_FUNCTIONS ZIO_COMPRESS_LZ4 |
| 136 | |
| 137 | /* |
| 138 | * The meaning of "compress = on" selected by the compression features enabled |
| 139 | * on a given pool. |
| 140 | */ |
| 141 | #define ZIO_COMPRESS_LEGACY_ON_VALUE ZIO_COMPRESS_LZJB |
| 142 | #define ZIO_COMPRESS_LZ4_ON_VALUE ZIO_COMPRESS_LZ4 |
| 143 | |
| 144 | #define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF |
| 145 | |
| 146 | #define BOOTFS_COMPRESS_VALID(compress) \ |
| 147 | ((compress) == ZIO_COMPRESS_LZJB || \ |
| 148 | (compress) == ZIO_COMPRESS_LZ4 || \ |
| 149 | (compress) == ZIO_COMPRESS_ON || \ |
| 150 | (compress) == ZIO_COMPRESS_OFF) |
| 151 | |
| 152 | #define ZIO_FAILURE_MODE_WAIT 0 |
| 153 | #define ZIO_FAILURE_MODE_CONTINUE 1 |
| 154 | #define ZIO_FAILURE_MODE_PANIC 2 |
| 155 | |
| 156 | enum zio_flag { |
| 157 | /* |
| 158 | * Flags inherited by gang, ddt, and vdev children, |
| 159 | * and that must be equal for two zios to aggregate |
| 160 | */ |
| 161 | ZIO_FLAG_DONT_AGGREGATE = 1 << 0, |
| 162 | ZIO_FLAG_IO_REPAIR = 1 << 1, |
| 163 | ZIO_FLAG_SELF_HEAL = 1 << 2, |
| 164 | ZIO_FLAG_RESILVER = 1 << 3, |
| 165 | ZIO_FLAG_SCRUB = 1 << 4, |
| 166 | ZIO_FLAG_SCAN_THREAD = 1 << 5, |
| 167 | ZIO_FLAG_PHYSICAL = 1 << 6, |
| 168 | |
| 169 | #define ZIO_FLAG_AGG_INHERIT (ZIO_FLAG_CANFAIL - 1) |
| 170 | |
| 171 | /* |
| 172 | * Flags inherited by ddt, gang, and vdev children. |
| 173 | */ |
| 174 | ZIO_FLAG_CANFAIL = 1 << 7, /* must be first for INHERIT */ |
| 175 | ZIO_FLAG_SPECULATIVE = 1 << 8, |
| 176 | ZIO_FLAG_CONFIG_WRITER = 1 << 9, |
| 177 | ZIO_FLAG_DONT_RETRY = 1 << 10, |
| 178 | ZIO_FLAG_DONT_CACHE = 1 << 11, |
| 179 | ZIO_FLAG_NODATA = 1 << 12, |
| 180 | ZIO_FLAG_INDUCE_DAMAGE = 1 << 13, |
| 181 | ZIO_FLAG_IO_ALLOCATING = 1 << 14, |
| 182 | |
| 183 | #define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1) |
| 184 | #define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1) |
| 185 | |
| 186 | /* |
| 187 | * Flags inherited by vdev children. |
| 188 | */ |
| 189 | ZIO_FLAG_IO_RETRY = 1 << 15, /* must be first for INHERIT */ |
| 190 | ZIO_FLAG_PROBE = 1 << 16, |
| 191 | ZIO_FLAG_TRYHARD = 1 << 17, |
| 192 | ZIO_FLAG_OPTIONAL = 1 << 18, |
| 193 | |
| 194 | #define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1) |
| 195 | |
| 196 | /* |
| 197 | * Flags not inherited by any children. |
| 198 | */ |
| 199 | ZIO_FLAG_DONT_QUEUE = 1 << 19, /* must be first for INHERIT */ |
| 200 | ZIO_FLAG_DONT_PROPAGATE = 1 << 20, |
| 201 | ZIO_FLAG_IO_BYPASS = 1 << 21, |
| 202 | ZIO_FLAG_IO_REWRITE = 1 << 22, |
| 203 | ZIO_FLAG_RAW = 1 << 23, |
| 204 | ZIO_FLAG_GANG_CHILD = 1 << 24, |
| 205 | ZIO_FLAG_DDT_CHILD = 1 << 25, |
| 206 | ZIO_FLAG_GODFATHER = 1 << 26, |
| 207 | ZIO_FLAG_NOPWRITE = 1 << 27, |
| 208 | ZIO_FLAG_REEXECUTED = 1 << 28, |
| 209 | ZIO_FLAG_DELEGATED = 1 << 29, |
| 210 | }; |
| 211 | |
| 212 | #define ZIO_FLAG_MUSTSUCCEED 0 |
| 213 | |
| 214 | #define ZIO_DDT_CHILD_FLAGS(zio) \ |
| 215 | (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ |
| 216 | ZIO_FLAG_DDT_CHILD | ZIO_FLAG_CANFAIL) |
| 217 | |
| 218 | #define ZIO_GANG_CHILD_FLAGS(zio) \ |
| 219 | (((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \ |
| 220 | ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL) |
| 221 | |
| 222 | #define ZIO_VDEV_CHILD_FLAGS(zio) \ |
| 223 | (((zio)->io_flags & ZIO_FLAG_VDEV_INHERIT) | \ |
| 224 | ZIO_FLAG_CANFAIL) |
| 225 | |
| 226 | enum zio_child { |
| 227 | ZIO_CHILD_VDEV = 0, |
| 228 | ZIO_CHILD_GANG, |
| 229 | ZIO_CHILD_DDT, |
| 230 | ZIO_CHILD_LOGICAL, |
| 231 | ZIO_CHILD_TYPES |
| 232 | }; |
| 233 | |
| 234 | enum zio_wait_type { |
| 235 | ZIO_WAIT_READY = 0, |
| 236 | ZIO_WAIT_DONE, |
| 237 | ZIO_WAIT_TYPES |
| 238 | }; |
| 239 | |
| 240 | /* |
| 241 | * We'll take the number 122 and 123 to indicate checksum errors and |
| 242 | * fragmentation. Those doesn't collide with any errno values as they |
| 243 | * are greater than ELAST. |
| 244 | */ |
| 245 | #define ECKSUM 122 |
| 246 | #define EFRAGS 123 |
| 247 | |
| 248 | typedef void zio_done_func_t(zio_t *zio); |
| 249 | |
| 250 | extern boolean_t zio_dva_throttle_enabled; |
| 251 | extern const char *zio_type_name[ZIO_TYPES]; |
| 252 | |
| 253 | /* |
| 254 | * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely |
| 255 | * identifies any block in the pool. By convention, the meta-objset (MOS) |
| 256 | * is objset 0, and the meta-dnode is object 0. This covers all blocks |
| 257 | * except root blocks and ZIL blocks, which are defined as follows: |
| 258 | * |
| 259 | * Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>. |
| 260 | * ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>. |
| 261 | * dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>. |
| 262 | * dnode visit bookmarks are <objset, object id of dnode, -3, 0>. |
| 263 | * |
| 264 | * Note: this structure is called a bookmark because its original purpose |
| 265 | * was to remember where to resume a pool-wide traverse. |
| 266 | * |
| 267 | * Note: this structure is passed between userland and the kernel, and is |
| 268 | * stored on disk (by virtue of being incorporated into other on-disk |
| 269 | * structures, e.g. dsl_scan_phys_t). |
| 270 | */ |
| 271 | typedef struct zbookmark_phys { |
| 272 | uint64_t zb_objset; |
| 273 | uint64_t zb_object; |
| 274 | int64_t zb_level; |
| 275 | uint64_t zb_blkid; |
| 276 | } zbookmark_phys_t; |
| 277 | |
| 278 | #define SET_BOOKMARK(zb, objset, object, level, blkid) \ |
| 279 | { \ |
| 280 | (zb)->zb_objset = objset; \ |
| 281 | (zb)->zb_object = object; \ |
| 282 | (zb)->zb_level = level; \ |
| 283 | (zb)->zb_blkid = blkid; \ |
| 284 | } |
| 285 | |
| 286 | #define ZB_DESTROYED_OBJSET (-1ULL) |
| 287 | |
| 288 | #define ZB_ROOT_OBJECT (0ULL) |
| 289 | #define ZB_ROOT_LEVEL (-1LL) |
| 290 | #define ZB_ROOT_BLKID (0ULL) |
| 291 | |
| 292 | #define ZB_ZIL_OBJECT (0ULL) |
| 293 | #define ZB_ZIL_LEVEL (-2LL) |
| 294 | |
| 295 | #define ZB_DNODE_LEVEL (-3LL) |
| 296 | #define ZB_DNODE_BLKID (0ULL) |
| 297 | |
| 298 | #define ZB_IS_ZERO(zb) \ |
| 299 | ((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \ |
| 300 | (zb)->zb_level == 0 && (zb)->zb_blkid == 0) |
| 301 | #define ZB_IS_ROOT(zb) \ |
| 302 | ((zb)->zb_object == ZB_ROOT_OBJECT && \ |
| 303 | (zb)->zb_level == ZB_ROOT_LEVEL && \ |
| 304 | (zb)->zb_blkid == ZB_ROOT_BLKID) |
| 305 | |
| 306 | typedef struct zio_prop { |
| 307 | enum zio_checksum zp_checksum; |
| 308 | enum zio_compress zp_compress; |
| 309 | dmu_object_type_t zp_type; |
| 310 | uint8_t zp_level; |
| 311 | uint8_t zp_copies; |
| 312 | boolean_t zp_dedup; |
| 313 | boolean_t zp_dedup_verify; |
| 314 | boolean_t zp_nopwrite; |
| 315 | } zio_prop_t; |
| 316 | |
| 317 | typedef struct zio_cksum_report zio_cksum_report_t; |
| 318 | |
| 319 | typedef void zio_cksum_finish_f(zio_cksum_report_t *rep, |
| 320 | const void *good_data); |
| 321 | typedef void zio_cksum_free_f(void *cbdata, size_t size); |
| 322 | |
| 323 | struct zio_bad_cksum; /* defined in zio_checksum.h */ |
| 324 | struct dnode_phys; |
| 325 | |
| 326 | struct zio_cksum_report { |
| 327 | struct zio_cksum_report *zcr_next; |
| 328 | nvlist_t *zcr_ereport; |
| 329 | nvlist_t *zcr_detector; |
| 330 | void *zcr_cbdata; |
| 331 | size_t zcr_cbinfo; /* passed to zcr_free() */ |
| 332 | uint64_t zcr_align; |
| 333 | uint64_t zcr_length; |
| 334 | zio_cksum_finish_f *zcr_finish; |
| 335 | zio_cksum_free_f *zcr_free; |
| 336 | |
| 337 | /* internal use only */ |
| 338 | struct zio_bad_cksum *zcr_ckinfo; /* information from failure */ |
| 339 | }; |
| 340 | |
| 341 | typedef void zio_vsd_cksum_report_f(zio_t *zio, zio_cksum_report_t *zcr, |
| 342 | void *arg); |
| 343 | |
| 344 | zio_vsd_cksum_report_f zio_vsd_default_cksum_report; |
| 345 | |
| 346 | typedef struct zio_vsd_ops { |
| 347 | zio_done_func_t *vsd_free; |
| 348 | zio_vsd_cksum_report_f *vsd_cksum_report; |
| 349 | } zio_vsd_ops_t; |
| 350 | |
| 351 | typedef struct zio_gang_node { |
| 352 | zio_gbh_phys_t *gn_gbh; |
| 353 | struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS]; |
| 354 | } zio_gang_node_t; |
| 355 | |
| 356 | typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp, |
| 357 | zio_gang_node_t *gn, void *data); |
| 358 | |
| 359 | typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size); |
| 360 | |
| 361 | typedef struct zio_transform { |
| 362 | void *zt_orig_data; |
| 363 | uint64_t zt_orig_size; |
| 364 | uint64_t zt_bufsize; |
| 365 | zio_transform_func_t *zt_transform; |
| 366 | struct zio_transform *zt_next; |
| 367 | } zio_transform_t; |
| 368 | |
| 369 | typedef int zio_pipe_stage_t(zio_t *zio); |
| 370 | |
| 371 | /* |
| 372 | * The io_reexecute flags are distinct from io_flags because the child must |
| 373 | * be able to propagate them to the parent. The normal io_flags are local |
| 374 | * to the zio, not protected by any lock, and not modifiable by children; |
| 375 | * the reexecute flags are protected by io_lock, modifiable by children, |
| 376 | * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. |
| 377 | */ |
| 378 | #define ZIO_REEXECUTE_NOW 0x01 |
| 379 | #define ZIO_REEXECUTE_SUSPEND 0x02 |
| 380 | |
| 381 | typedef struct zio_alloc_list { |
| 382 | list_t zal_list; |
| 383 | uint64_t zal_size; |
| 384 | } zio_alloc_list_t; |
| 385 | |
| 386 | typedef struct zio_link { |
| 387 | zio_t *zl_parent; |
| 388 | zio_t *zl_child; |
| 389 | list_node_t zl_parent_node; |
| 390 | list_node_t zl_child_node; |
| 391 | } zio_link_t; |
| 392 | |
| 393 | /* |
| 394 | * Used for TRIM kstat. |
| 395 | */ |
| 396 | typedef struct zio_trim_stats { |
| 397 | /* |
| 398 | * Number of bytes successfully TRIMmed. |
| 399 | */ |
| 400 | kstat_named_t bytes; |
| 401 | |
| 402 | /* |
| 403 | * Number of successful TRIM requests. |
| 404 | */ |
| 405 | kstat_named_t success; |
| 406 | |
| 407 | /* |
| 408 | * Number of TRIM requests that failed because TRIM is not |
| 409 | * supported. |
| 410 | */ |
| 411 | kstat_named_t unsupported; |
| 412 | |
| 413 | /* |
| 414 | * Number of TRIM requests that failed for other reasons. |
| 415 | */ |
| 416 | kstat_named_t failed; |
| 417 | } zio_trim_stats_t; |
| 418 | |
| 419 | extern zio_trim_stats_t zio_trim_stats; |
| 420 | |
| 421 | #define ZIO_TRIM_STAT_INCR(stat, val) \ |
| 422 | atomic_add_64(&zio_trim_stats.stat.value.ui64, (val)); |
| 423 | #define ZIO_TRIM_STAT_BUMP(stat) \ |
| 424 | ZIO_TRIM_STAT_INCR(stat, 1); |
| 425 | |
| 426 | struct zio { |
| 427 | /* Core information about this I/O */ |
| 428 | zbookmark_phys_t io_bookmark; |
| 429 | zio_prop_t io_prop; |
| 430 | zio_type_t io_type; |
| 431 | enum zio_child io_child_type; |
| 432 | int io_cmd; |
| 433 | zio_priority_t io_priority; |
| 434 | uint8_t io_reexecute; |
| 435 | uint8_t io_state[ZIO_WAIT_TYPES]; |
| 436 | uint64_t io_txg; |
| 437 | spa_t *io_spa; |
| 438 | blkptr_t *io_bp; |
| 439 | blkptr_t *io_bp_override; |
| 440 | blkptr_t io_bp_copy; |
| 441 | list_t io_parent_list; |
| 442 | list_t io_child_list; |
| 443 | zio_t *io_logical; |
| 444 | zio_transform_t *io_transform_stack; |
| 445 | |
| 446 | /* Callback info */ |
| 447 | zio_done_func_t *io_ready; |
| 448 | zio_done_func_t *io_children_ready; |
| 449 | zio_done_func_t *io_physdone; |
| 450 | zio_done_func_t *io_done; |
| 451 | void *io_private; |
| 452 | int64_t io_prev_space_delta; /* DMU private */ |
| 453 | blkptr_t io_bp_orig; |
| 454 | |
| 455 | /* Data represented by this I/O */ |
| 456 | void *io_data; |
| 457 | void *io_orig_data; |
| 458 | uint64_t io_size; |
| 459 | uint64_t io_orig_size; |
| 460 | |
| 461 | /* Stuff for the vdev stack */ |
| 462 | vdev_t *io_vd; |
| 463 | void *io_vsd; |
| 464 | const zio_vsd_ops_t *io_vsd_ops; |
| 465 | |
| 466 | uint64_t io_offset; |
| 467 | hrtime_t io_timestamp; |
| 468 | hrtime_t io_queued_timestamp; |
| 469 | hrtime_t io_target_timestamp; |
| 470 | avl_node_t io_queue_node; |
| 471 | avl_node_t io_offset_node; |
| 472 | avl_node_t io_alloc_node; |
| 473 | zio_alloc_list_t io_alloc_list; |
| 474 | |
| 475 | /* Internal pipeline state */ |
| 476 | enum zio_flag io_flags; |
| 477 | enum zio_stage io_stage; |
| 478 | enum zio_stage io_pipeline; |
| 479 | enum zio_flag io_orig_flags; |
| 480 | enum zio_stage io_orig_stage; |
| 481 | enum zio_stage io_orig_pipeline; |
| 482 | enum zio_stage io_pipeline_trace; |
| 483 | int io_error; |
| 484 | int io_child_error[ZIO_CHILD_TYPES]; |
| 485 | uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES]; |
| 486 | uint64_t io_child_count; |
| 487 | uint64_t io_phys_children; |
| 488 | uint64_t io_parent_count; |
| 489 | uint64_t *io_stall; |
| 490 | zio_t *io_gang_leader; |
| 491 | zio_gang_node_t *io_gang_tree; |
| 492 | void *io_executor; |
| 493 | void *io_waiter; |
| 494 | kmutex_t io_lock; |
| 495 | kcondvar_t io_cv; |
| 496 | |
| 497 | /* FMA state */ |
| 498 | zio_cksum_report_t *io_cksum_report; |
| 499 | uint64_t io_ena; |
| 500 | |
| 501 | /* Taskq dispatching state */ |
| 502 | taskq_ent_t io_tqent; |
| 503 | |
| 504 | avl_node_t io_trim_node; |
| 505 | list_node_t io_trim_link; |
| 506 | }; |
| 507 | |
| 508 | extern int zio_timestamp_compare(const void *, const void *); |
| 509 | |
| 510 | extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, |
| 511 | zio_done_func_t *done, void *priv, enum zio_flag flags); |
| 512 | |
| 513 | extern zio_t *zio_root(spa_t *spa, |
| 514 | zio_done_func_t *done, void *priv, enum zio_flag flags); |
| 515 | |
| 516 | extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data, |
| 517 | uint64_t size, zio_done_func_t *done, void *priv, |
| 518 | zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb); |
| 519 | |
| 520 | extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
| 521 | void *data, uint64_t size, const zio_prop_t *zp, |
| 522 | zio_done_func_t *ready, zio_done_func_t *children_ready, |
| 523 | zio_done_func_t *physdone, zio_done_func_t *done, |
| 524 | void *priv, zio_priority_t priority, enum zio_flag flags, |
| 525 | const zbookmark_phys_t *zb); |
| 526 | |
| 527 | extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
| 528 | void *data, uint64_t size, zio_done_func_t *done, void *priv, |
| 529 | zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb); |
| 530 | |
| 531 | extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies, |
| 532 | boolean_t nopwrite); |
| 533 | |
| 534 | extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp); |
| 535 | |
| 536 | extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, |
| 537 | const blkptr_t *bp, |
| 538 | zio_done_func_t *done, void *priv, enum zio_flag flags); |
| 539 | |
| 540 | extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, |
| 541 | uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv, |
| 542 | zio_priority_t priority, enum zio_flag flags); |
| 543 | |
| 544 | extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, |
| 545 | uint64_t size, void *data, int checksum, |
| 546 | zio_done_func_t *done, void *priv, zio_priority_t priority, |
| 547 | enum zio_flag flags, boolean_t labels); |
| 548 | |
| 549 | extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, |
| 550 | uint64_t size, void *data, int checksum, |
| 551 | zio_done_func_t *done, void *priv, zio_priority_t priority, |
| 552 | enum zio_flag flags, boolean_t labels); |
| 553 | |
| 554 | extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, |
| 555 | const blkptr_t *bp, uint64_t size, enum zio_flag flags); |
| 556 | |
| 557 | extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, |
| 558 | blkptr_t *old_bp, uint64_t size, boolean_t *slog); |
| 559 | extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp); |
| 560 | extern void zio_flush(zio_t *zio, vdev_t *vd); |
| 561 | extern zio_t *zio_trim(zio_t *zio, spa_t *spa, vdev_t *vd, uint64_t offset, |
| 562 | uint64_t size); |
| 563 | extern void zio_shrink(zio_t *zio, uint64_t size); |
| 564 | |
| 565 | extern int zio_wait(zio_t *zio); |
| 566 | extern void zio_nowait(zio_t *zio); |
| 567 | extern void zio_execute(zio_t *zio); |
| 568 | extern void zio_interrupt(zio_t *zio); |
| 569 | extern void zio_delay_init(zio_t *zio); |
| 570 | extern void zio_delay_interrupt(zio_t *zio); |
| 571 | |
| 572 | extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **); |
| 573 | extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **); |
| 574 | extern zio_t *zio_unique_parent(zio_t *cio); |
| 575 | extern void zio_add_child(zio_t *pio, zio_t *cio); |
| 576 | |
| 577 | extern void *zio_buf_alloc(size_t size); |
| 578 | extern void *zio_buf_alloc_nowait(size_t size); |
| 579 | extern void zio_buf_free(void *buf, size_t size); |
| 580 | extern void *zio_data_buf_alloc(size_t size); |
| 581 | extern void zio_data_buf_free(void *buf, size_t size); |
| 582 | |
| 583 | extern void zio_push_transform(zio_t *zio, void *data, uint64_t size, |
| 584 | uint64_t bufsize, zio_transform_func_t *transform); |
| 585 | extern void zio_pop_transforms(zio_t *zio); |
| 586 | |
| 587 | extern void zio_resubmit_stage_async(void *); |
| 588 | |
| 589 | extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, |
| 590 | uint64_t offset, void *data, uint64_t size, int type, |
| 591 | zio_priority_t priority, enum zio_flag flags, |
| 592 | zio_done_func_t *done, void *priv); |
| 593 | |
| 594 | extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, |
| 595 | void *data, uint64_t size, int type, zio_priority_t priority, |
| 596 | enum zio_flag flags, zio_done_func_t *done, void *priv); |
| 597 | |
| 598 | extern void zio_vdev_io_bypass(zio_t *zio); |
| 599 | extern void zio_vdev_io_reissue(zio_t *zio); |
| 600 | extern void zio_vdev_io_redone(zio_t *zio); |
| 601 | |
| 602 | extern void zio_checksum_verified(zio_t *zio); |
| 603 | extern int zio_worst_error(int e1, int e2); |
| 604 | |
| 605 | extern enum zio_checksum zio_checksum_select(enum zio_checksum child, |
| 606 | enum zio_checksum parent); |
| 607 | extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa, |
| 608 | enum zio_checksum child, enum zio_checksum parent); |
| 609 | extern enum zio_compress zio_compress_select(spa_t *spa, |
| 610 | enum zio_compress child, enum zio_compress parent); |
| 611 | |
| 612 | extern void zio_suspend(spa_t *spa, zio_t *zio); |
| 613 | extern int zio_resume(spa_t *spa); |
| 614 | extern void zio_resume_wait(spa_t *spa); |
| 615 | |
| 616 | /* |
| 617 | * Initial setup and teardown. |
| 618 | */ |
| 619 | extern void zio_init(void); |
| 620 | extern void zio_fini(void); |
| 621 | |
| 622 | /* |
| 623 | * Fault injection |
| 624 | */ |
| 625 | struct zinject_record; |
| 626 | extern uint32_t zio_injection_enabled; |
| 627 | extern int zio_inject_fault(char *name, int flags, int *id, |
| 628 | struct zinject_record *record); |
| 629 | extern int zio_inject_list_next(int *id, char *name, size_t buflen, |
| 630 | struct zinject_record *record); |
| 631 | extern int zio_clear_fault(int id); |
| 632 | extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type); |
| 633 | extern int zio_handle_fault_injection(zio_t *zio, int error); |
| 634 | extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); |
| 635 | extern int zio_handle_label_injection(zio_t *zio, int error); |
| 636 | extern void zio_handle_ignored_writes(zio_t *zio); |
| 637 | extern hrtime_t zio_handle_io_delay(zio_t *zio); |
| 638 | |
| 639 | /* |
| 640 | * Checksum ereport functions |
| 641 | */ |
| 642 | extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, |
| 643 | uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info); |
| 644 | extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, |
| 645 | const void *good_data, const void *bad_data, boolean_t drop_if_identical); |
| 646 | |
| 647 | extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report); |
| 648 | extern void zfs_ereport_free_checksum(zio_cksum_report_t *report); |
| 649 | |
| 650 | /* If we have the good data in hand, this function can be used */ |
| 651 | extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, |
| 652 | struct zio *zio, uint64_t offset, uint64_t length, |
| 653 | const void *good_data, const void *bad_data, struct zio_bad_cksum *info); |
| 654 | |
| 655 | /* Called from spa_sync(), but primarily an injection handler */ |
| 656 | extern void spa_handle_ignored_writes(spa_t *spa); |
| 657 | |
| 658 | /* zbookmark_phys functions */ |
| 659 | boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp, |
| 660 | const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block); |
| 661 | int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2, |
| 662 | uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2); |
| 663 | |
| 664 | #ifdef __cplusplus |
| 665 | } |
| 666 | #endif |
| 667 | |
| 668 | #endif /* _ZIO_H */ |
| 669 | |