1 | /* |
2 | * CDDL HEADER START |
3 | * |
4 | * The contents of this file are subject to the terms of the |
5 | * Common Development and Distribution License (the "License"). |
6 | * You may not use this file except in compliance with the License. |
7 | * |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
9 | * or http://www.opensolaris.org/os/licensing. |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. |
12 | * |
13 | * When distributing Covered Code, include this CDDL HEADER in each |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
15 | * If applicable, add the following below this CDDL HEADER, with the |
16 | * fields enclosed by brackets "[]" replaced with your own identifying |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
18 | * |
19 | * CDDL HEADER END |
20 | */ |
21 | |
22 | /* |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
24 | * Copyright (c) 2012, 2015 by Delphix. All rights reserved. |
25 | * Copyright 2015 RackTop Systems. |
26 | * Copyright 2016 Nexenta Systems, Inc. |
27 | */ |
28 | |
29 | /* |
30 | * Pool import support functions. |
31 | * |
32 | * To import a pool, we rely on reading the configuration information from the |
33 | * ZFS label of each device. If we successfully read the label, then we |
34 | * organize the configuration information in the following hierarchy: |
35 | * |
36 | * pool guid -> toplevel vdev guid -> label txg |
37 | * |
38 | * Duplicate entries matching this same tuple will be discarded. Once we have |
39 | * examined every device, we pick the best label txg config for each toplevel |
40 | * vdev. We then arrange these toplevel vdevs into a complete pool config, and |
41 | * update any paths that have changed. Finally, we attempt to import the pool |
42 | * using our derived config, and record the results. |
43 | */ |
44 | |
45 | #include <ctype.h> |
46 | #include <devid.h> |
47 | #include <dirent.h> |
48 | #include <errno.h> |
49 | #include <libintl.h> |
50 | #include <stddef.h> |
51 | #include <stdlib.h> |
52 | #include <string.h> |
53 | #include <sys/stat.h> |
54 | #include <unistd.h> |
55 | #include <fcntl.h> |
56 | #include <thread_pool.h> |
57 | #ifdef __FreeBSD__ |
58 | #include <libgeom.h> |
59 | #endif |
60 | #ifdef __NetBSD__ |
61 | #include <util.h> |
62 | static int native_ioctl(int fd, unsigned long cmd, void *arg); |
63 | #endif |
64 | |
65 | #include <sys/vdev_impl.h> |
66 | |
67 | #include "libzfs.h" |
68 | #include "libzfs_impl.h" |
69 | |
70 | /* |
71 | * Intermediate structures used to gather configuration information. |
72 | */ |
73 | typedef struct config_entry { |
74 | uint64_t ce_txg; |
75 | nvlist_t *ce_config; |
76 | struct config_entry *ce_next; |
77 | } config_entry_t; |
78 | |
79 | typedef struct vdev_entry { |
80 | uint64_t ve_guid; |
81 | config_entry_t *ve_configs; |
82 | struct vdev_entry *ve_next; |
83 | } vdev_entry_t; |
84 | |
85 | typedef struct pool_entry { |
86 | uint64_t pe_guid; |
87 | vdev_entry_t *pe_vdevs; |
88 | struct pool_entry *pe_next; |
89 | } pool_entry_t; |
90 | |
91 | typedef struct name_entry { |
92 | char *ne_name; |
93 | uint64_t ne_guid; |
94 | struct name_entry *ne_next; |
95 | } name_entry_t; |
96 | |
97 | typedef struct pool_list { |
98 | pool_entry_t *pools; |
99 | name_entry_t *names; |
100 | } pool_list_t; |
101 | |
102 | static char * |
103 | get_devid(const char *path) |
104 | { |
105 | #ifdef have_devid |
106 | int fd; |
107 | ddi_devid_t devid; |
108 | char *minor, *ret; |
109 | |
110 | if ((fd = open(path, O_RDONLY)) < 0) |
111 | return (NULL); |
112 | |
113 | minor = NULL; |
114 | ret = NULL; |
115 | if (devid_get(fd, &devid) == 0) { |
116 | if (devid_get_minor_name(fd, &minor) == 0) |
117 | ret = devid_str_encode(devid, minor); |
118 | if (minor != NULL) |
119 | devid_str_free(minor); |
120 | devid_free(devid); |
121 | } |
122 | (void) close(fd); |
123 | |
124 | return (ret); |
125 | #else |
126 | return (NULL); |
127 | #endif |
128 | } |
129 | |
130 | |
131 | /* |
132 | * Go through and fix up any path and/or devid information for the given vdev |
133 | * configuration. |
134 | */ |
135 | static int |
136 | fix_paths(nvlist_t *nv, name_entry_t *names) |
137 | { |
138 | nvlist_t **child; |
139 | uint_t c, children; |
140 | uint64_t guid; |
141 | name_entry_t *ne, *best; |
142 | char *path, *devid; |
143 | int matched; |
144 | |
145 | if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, |
146 | &child, &children) == 0) { |
147 | for (c = 0; c < children; c++) |
148 | if (fix_paths(child[c], names) != 0) |
149 | return (-1); |
150 | return (0); |
151 | } |
152 | |
153 | /* |
154 | * This is a leaf (file or disk) vdev. In either case, go through |
155 | * the name list and see if we find a matching guid. If so, replace |
156 | * the path and see if we can calculate a new devid. |
157 | * |
158 | * There may be multiple names associated with a particular guid, in |
159 | * which case we have overlapping slices or multiple paths to the same |
160 | * disk. If this is the case, then we want to pick the path that is |
161 | * the most similar to the original, where "most similar" is the number |
162 | * of matching characters starting from the end of the path. This will |
163 | * preserve slice numbers even if the disks have been reorganized, and |
164 | * will also catch preferred disk names if multiple paths exist. |
165 | */ |
166 | verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); |
167 | if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) |
168 | path = NULL; |
169 | |
170 | matched = 0; |
171 | best = NULL; |
172 | for (ne = names; ne != NULL; ne = ne->ne_next) { |
173 | if (ne->ne_guid == guid) { |
174 | const char *src, *dst; |
175 | int count; |
176 | |
177 | if (path == NULL) { |
178 | best = ne; |
179 | break; |
180 | } |
181 | |
182 | src = ne->ne_name + strlen(ne->ne_name) - 1; |
183 | dst = path + strlen(path) - 1; |
184 | for (count = 0; src >= ne->ne_name && dst >= path; |
185 | src--, dst--, count++) |
186 | if (*src != *dst) |
187 | break; |
188 | |
189 | /* |
190 | * At this point, 'count' is the number of characters |
191 | * matched from the end. |
192 | */ |
193 | if (count > matched || best == NULL) { |
194 | best = ne; |
195 | matched = count; |
196 | } |
197 | } |
198 | } |
199 | |
200 | if (best == NULL) |
201 | return (0); |
202 | |
203 | if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) |
204 | return (-1); |
205 | |
206 | if ((devid = get_devid(best->ne_name)) == NULL) { |
207 | (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); |
208 | } else { |
209 | if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) { |
210 | devid_str_free(devid); |
211 | return (-1); |
212 | } |
213 | devid_str_free(devid); |
214 | } |
215 | |
216 | return (0); |
217 | } |
218 | |
219 | /* |
220 | * Add the given configuration to the list of known devices. |
221 | */ |
222 | static int |
223 | add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, |
224 | nvlist_t *config) |
225 | { |
226 | uint64_t pool_guid, vdev_guid, top_guid, txg, state; |
227 | pool_entry_t *pe; |
228 | vdev_entry_t *ve; |
229 | config_entry_t *ce; |
230 | name_entry_t *ne; |
231 | |
232 | /* |
233 | * If this is a hot spare not currently in use or level 2 cache |
234 | * device, add it to the list of names to translate, but don't do |
235 | * anything else. |
236 | */ |
237 | if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, |
238 | &state) == 0 && |
239 | (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && |
240 | nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { |
241 | if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) |
242 | return (-1); |
243 | |
244 | if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { |
245 | free(ne); |
246 | return (-1); |
247 | } |
248 | ne->ne_guid = vdev_guid; |
249 | ne->ne_next = pl->names; |
250 | pl->names = ne; |
251 | return (0); |
252 | } |
253 | |
254 | /* |
255 | * If we have a valid config but cannot read any of these fields, then |
256 | * it means we have a half-initialized label. In vdev_label_init() |
257 | * we write a label with txg == 0 so that we can identify the device |
258 | * in case the user refers to the same disk later on. If we fail to |
259 | * create the pool, we'll be left with a label in this state |
260 | * which should not be considered part of a valid pool. |
261 | */ |
262 | if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, |
263 | &pool_guid) != 0 || |
264 | nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, |
265 | &vdev_guid) != 0 || |
266 | nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, |
267 | &top_guid) != 0 || |
268 | nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, |
269 | &txg) != 0 || txg == 0) { |
270 | nvlist_free(config); |
271 | return (0); |
272 | } |
273 | |
274 | /* |
275 | * First, see if we know about this pool. If not, then add it to the |
276 | * list of known pools. |
277 | */ |
278 | for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { |
279 | if (pe->pe_guid == pool_guid) |
280 | break; |
281 | } |
282 | |
283 | if (pe == NULL) { |
284 | if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { |
285 | nvlist_free(config); |
286 | return (-1); |
287 | } |
288 | pe->pe_guid = pool_guid; |
289 | pe->pe_next = pl->pools; |
290 | pl->pools = pe; |
291 | } |
292 | |
293 | /* |
294 | * Second, see if we know about this toplevel vdev. Add it if its |
295 | * missing. |
296 | */ |
297 | for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { |
298 | if (ve->ve_guid == top_guid) |
299 | break; |
300 | } |
301 | |
302 | if (ve == NULL) { |
303 | if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { |
304 | nvlist_free(config); |
305 | return (-1); |
306 | } |
307 | ve->ve_guid = top_guid; |
308 | ve->ve_next = pe->pe_vdevs; |
309 | pe->pe_vdevs = ve; |
310 | } |
311 | |
312 | /* |
313 | * Third, see if we have a config with a matching transaction group. If |
314 | * so, then we do nothing. Otherwise, add it to the list of known |
315 | * configs. |
316 | */ |
317 | for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { |
318 | if (ce->ce_txg == txg) |
319 | break; |
320 | } |
321 | |
322 | if (ce == NULL) { |
323 | if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { |
324 | nvlist_free(config); |
325 | return (-1); |
326 | } |
327 | ce->ce_txg = txg; |
328 | ce->ce_config = config; |
329 | ce->ce_next = ve->ve_configs; |
330 | ve->ve_configs = ce; |
331 | } else { |
332 | nvlist_free(config); |
333 | } |
334 | |
335 | /* |
336 | * At this point we've successfully added our config to the list of |
337 | * known configs. The last thing to do is add the vdev guid -> path |
338 | * mappings so that we can fix up the configuration as necessary before |
339 | * doing the import. |
340 | */ |
341 | if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) |
342 | return (-1); |
343 | |
344 | if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { |
345 | free(ne); |
346 | return (-1); |
347 | } |
348 | |
349 | ne->ne_guid = vdev_guid; |
350 | ne->ne_next = pl->names; |
351 | pl->names = ne; |
352 | |
353 | return (0); |
354 | } |
355 | |
356 | /* |
357 | * Returns true if the named pool matches the given GUID. |
358 | */ |
359 | static int |
360 | pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid, |
361 | boolean_t *isactive) |
362 | { |
363 | zpool_handle_t *zhp; |
364 | uint64_t theguid; |
365 | |
366 | if (zpool_open_silent(hdl, name, &zhp) != 0) |
367 | return (-1); |
368 | |
369 | if (zhp == NULL) { |
370 | *isactive = B_FALSE; |
371 | return (0); |
372 | } |
373 | |
374 | verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID, |
375 | &theguid) == 0); |
376 | |
377 | zpool_close(zhp); |
378 | |
379 | *isactive = (theguid == guid); |
380 | return (0); |
381 | } |
382 | |
383 | static nvlist_t * |
384 | refresh_config(libzfs_handle_t *hdl, nvlist_t *config) |
385 | { |
386 | nvlist_t *nvl; |
387 | zfs_cmd_t zc = { 0 }; |
388 | int err; |
389 | |
390 | if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) |
391 | return (NULL); |
392 | |
393 | if (zcmd_alloc_dst_nvlist(hdl, &zc, |
394 | zc.zc_nvlist_conf_size * 2) != 0) { |
395 | zcmd_free_nvlists(&zc); |
396 | return (NULL); |
397 | } |
398 | |
399 | while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT, |
400 | &zc)) != 0 && errno == ENOMEM) { |
401 | if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { |
402 | zcmd_free_nvlists(&zc); |
403 | return (NULL); |
404 | } |
405 | } |
406 | |
407 | if (err) { |
408 | zcmd_free_nvlists(&zc); |
409 | return (NULL); |
410 | } |
411 | |
412 | if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) { |
413 | zcmd_free_nvlists(&zc); |
414 | return (NULL); |
415 | } |
416 | |
417 | zcmd_free_nvlists(&zc); |
418 | return (nvl); |
419 | } |
420 | |
421 | /* |
422 | * Determine if the vdev id is a hole in the namespace. |
423 | */ |
424 | boolean_t |
425 | vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) |
426 | { |
427 | for (int c = 0; c < holes; c++) { |
428 | |
429 | /* Top-level is a hole */ |
430 | if (hole_array[c] == id) |
431 | return (B_TRUE); |
432 | } |
433 | return (B_FALSE); |
434 | } |
435 | |
436 | /* |
437 | * Convert our list of pools into the definitive set of configurations. We |
438 | * start by picking the best config for each toplevel vdev. Once that's done, |
439 | * we assemble the toplevel vdevs into a full config for the pool. We make a |
440 | * pass to fix up any incorrect paths, and then add it to the main list to |
441 | * return to the user. |
442 | */ |
443 | static nvlist_t * |
444 | get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) |
445 | { |
446 | pool_entry_t *pe; |
447 | vdev_entry_t *ve; |
448 | config_entry_t *ce; |
449 | nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot; |
450 | nvlist_t **spares, **l2cache; |
451 | uint_t i, nspares, nl2cache; |
452 | boolean_t config_seen; |
453 | uint64_t best_txg; |
454 | char *name, *hostname = NULL; |
455 | uint64_t guid; |
456 | uint_t children = 0; |
457 | nvlist_t **child = NULL; |
458 | uint_t holes; |
459 | uint64_t *hole_array, max_id; |
460 | uint_t c; |
461 | boolean_t isactive; |
462 | uint64_t hostid; |
463 | nvlist_t *nvl; |
464 | boolean_t found_one = B_FALSE; |
465 | boolean_t valid_top_config = B_FALSE; |
466 | |
467 | if (nvlist_alloc(&ret, 0, 0) != 0) |
468 | goto nomem; |
469 | |
470 | for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { |
471 | uint64_t id, max_txg = 0; |
472 | |
473 | if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) |
474 | goto nomem; |
475 | config_seen = B_FALSE; |
476 | |
477 | /* |
478 | * Iterate over all toplevel vdevs. Grab the pool configuration |
479 | * from the first one we find, and then go through the rest and |
480 | * add them as necessary to the 'vdevs' member of the config. |
481 | */ |
482 | for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { |
483 | |
484 | /* |
485 | * Determine the best configuration for this vdev by |
486 | * selecting the config with the latest transaction |
487 | * group. |
488 | */ |
489 | best_txg = 0; |
490 | for (ce = ve->ve_configs; ce != NULL; |
491 | ce = ce->ce_next) { |
492 | |
493 | if (ce->ce_txg > best_txg) { |
494 | tmp = ce->ce_config; |
495 | best_txg = ce->ce_txg; |
496 | } |
497 | } |
498 | |
499 | /* |
500 | * We rely on the fact that the max txg for the |
501 | * pool will contain the most up-to-date information |
502 | * about the valid top-levels in the vdev namespace. |
503 | */ |
504 | if (best_txg > max_txg) { |
505 | (void) nvlist_remove(config, |
506 | ZPOOL_CONFIG_VDEV_CHILDREN, |
507 | DATA_TYPE_UINT64); |
508 | (void) nvlist_remove(config, |
509 | ZPOOL_CONFIG_HOLE_ARRAY, |
510 | DATA_TYPE_UINT64_ARRAY); |
511 | |
512 | max_txg = best_txg; |
513 | hole_array = NULL; |
514 | holes = 0; |
515 | max_id = 0; |
516 | valid_top_config = B_FALSE; |
517 | |
518 | if (nvlist_lookup_uint64(tmp, |
519 | ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { |
520 | verify(nvlist_add_uint64(config, |
521 | ZPOOL_CONFIG_VDEV_CHILDREN, |
522 | max_id) == 0); |
523 | valid_top_config = B_TRUE; |
524 | } |
525 | |
526 | if (nvlist_lookup_uint64_array(tmp, |
527 | ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, |
528 | &holes) == 0) { |
529 | verify(nvlist_add_uint64_array(config, |
530 | ZPOOL_CONFIG_HOLE_ARRAY, |
531 | hole_array, holes) == 0); |
532 | } |
533 | } |
534 | |
535 | if (!config_seen) { |
536 | /* |
537 | * Copy the relevant pieces of data to the pool |
538 | * configuration: |
539 | * |
540 | * version |
541 | * pool guid |
542 | * name |
543 | * comment (if available) |
544 | * pool state |
545 | * hostid (if available) |
546 | * hostname (if available) |
547 | */ |
548 | uint64_t state, version; |
549 | char * = NULL; |
550 | |
551 | version = fnvlist_lookup_uint64(tmp, |
552 | ZPOOL_CONFIG_VERSION); |
553 | fnvlist_add_uint64(config, |
554 | ZPOOL_CONFIG_VERSION, version); |
555 | guid = fnvlist_lookup_uint64(tmp, |
556 | ZPOOL_CONFIG_POOL_GUID); |
557 | fnvlist_add_uint64(config, |
558 | ZPOOL_CONFIG_POOL_GUID, guid); |
559 | name = fnvlist_lookup_string(tmp, |
560 | ZPOOL_CONFIG_POOL_NAME); |
561 | fnvlist_add_string(config, |
562 | ZPOOL_CONFIG_POOL_NAME, name); |
563 | |
564 | if (nvlist_lookup_string(tmp, |
565 | ZPOOL_CONFIG_COMMENT, &comment) == 0) |
566 | fnvlist_add_string(config, |
567 | ZPOOL_CONFIG_COMMENT, comment); |
568 | |
569 | state = fnvlist_lookup_uint64(tmp, |
570 | ZPOOL_CONFIG_POOL_STATE); |
571 | fnvlist_add_uint64(config, |
572 | ZPOOL_CONFIG_POOL_STATE, state); |
573 | |
574 | hostid = 0; |
575 | if (nvlist_lookup_uint64(tmp, |
576 | ZPOOL_CONFIG_HOSTID, &hostid) == 0) { |
577 | fnvlist_add_uint64(config, |
578 | ZPOOL_CONFIG_HOSTID, hostid); |
579 | hostname = fnvlist_lookup_string(tmp, |
580 | ZPOOL_CONFIG_HOSTNAME); |
581 | fnvlist_add_string(config, |
582 | ZPOOL_CONFIG_HOSTNAME, hostname); |
583 | } |
584 | |
585 | config_seen = B_TRUE; |
586 | } |
587 | |
588 | /* |
589 | * Add this top-level vdev to the child array. |
590 | */ |
591 | verify(nvlist_lookup_nvlist(tmp, |
592 | ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); |
593 | verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, |
594 | &id) == 0); |
595 | |
596 | if (id >= children) { |
597 | nvlist_t **newchild; |
598 | |
599 | newchild = zfs_alloc(hdl, (id + 1) * |
600 | sizeof (nvlist_t *)); |
601 | if (newchild == NULL) |
602 | goto nomem; |
603 | |
604 | for (c = 0; c < children; c++) |
605 | newchild[c] = child[c]; |
606 | |
607 | free(child); |
608 | child = newchild; |
609 | children = id + 1; |
610 | } |
611 | if (nvlist_dup(nvtop, &child[id], 0) != 0) |
612 | goto nomem; |
613 | |
614 | } |
615 | |
616 | /* |
617 | * If we have information about all the top-levels then |
618 | * clean up the nvlist which we've constructed. This |
619 | * means removing any extraneous devices that are |
620 | * beyond the valid range or adding devices to the end |
621 | * of our array which appear to be missing. |
622 | */ |
623 | if (valid_top_config) { |
624 | if (max_id < children) { |
625 | for (c = max_id; c < children; c++) |
626 | nvlist_free(child[c]); |
627 | children = max_id; |
628 | } else if (max_id > children) { |
629 | nvlist_t **newchild; |
630 | |
631 | newchild = zfs_alloc(hdl, (max_id) * |
632 | sizeof (nvlist_t *)); |
633 | if (newchild == NULL) |
634 | goto nomem; |
635 | |
636 | for (c = 0; c < children; c++) |
637 | newchild[c] = child[c]; |
638 | |
639 | free(child); |
640 | child = newchild; |
641 | children = max_id; |
642 | } |
643 | } |
644 | |
645 | verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, |
646 | &guid) == 0); |
647 | |
648 | /* |
649 | * The vdev namespace may contain holes as a result of |
650 | * device removal. We must add them back into the vdev |
651 | * tree before we process any missing devices. |
652 | */ |
653 | if (holes > 0) { |
654 | ASSERT(valid_top_config); |
655 | |
656 | for (c = 0; c < children; c++) { |
657 | nvlist_t *holey; |
658 | |
659 | if (child[c] != NULL || |
660 | !vdev_is_hole(hole_array, holes, c)) |
661 | continue; |
662 | |
663 | if (nvlist_alloc(&holey, NV_UNIQUE_NAME, |
664 | 0) != 0) |
665 | goto nomem; |
666 | |
667 | /* |
668 | * Holes in the namespace are treated as |
669 | * "hole" top-level vdevs and have a |
670 | * special flag set on them. |
671 | */ |
672 | if (nvlist_add_string(holey, |
673 | ZPOOL_CONFIG_TYPE, |
674 | VDEV_TYPE_HOLE) != 0 || |
675 | nvlist_add_uint64(holey, |
676 | ZPOOL_CONFIG_ID, c) != 0 || |
677 | nvlist_add_uint64(holey, |
678 | ZPOOL_CONFIG_GUID, 0ULL) != 0) { |
679 | nvlist_free(holey); |
680 | goto nomem; |
681 | } |
682 | child[c] = holey; |
683 | } |
684 | } |
685 | |
686 | /* |
687 | * Look for any missing top-level vdevs. If this is the case, |
688 | * create a faked up 'missing' vdev as a placeholder. We cannot |
689 | * simply compress the child array, because the kernel performs |
690 | * certain checks to make sure the vdev IDs match their location |
691 | * in the configuration. |
692 | */ |
693 | for (c = 0; c < children; c++) { |
694 | if (child[c] == NULL) { |
695 | nvlist_t *missing; |
696 | if (nvlist_alloc(&missing, NV_UNIQUE_NAME, |
697 | 0) != 0) |
698 | goto nomem; |
699 | if (nvlist_add_string(missing, |
700 | ZPOOL_CONFIG_TYPE, |
701 | VDEV_TYPE_MISSING) != 0 || |
702 | nvlist_add_uint64(missing, |
703 | ZPOOL_CONFIG_ID, c) != 0 || |
704 | nvlist_add_uint64(missing, |
705 | ZPOOL_CONFIG_GUID, 0ULL) != 0) { |
706 | nvlist_free(missing); |
707 | goto nomem; |
708 | } |
709 | child[c] = missing; |
710 | } |
711 | } |
712 | |
713 | /* |
714 | * Put all of this pool's top-level vdevs into a root vdev. |
715 | */ |
716 | if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) |
717 | goto nomem; |
718 | if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, |
719 | VDEV_TYPE_ROOT) != 0 || |
720 | nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || |
721 | nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || |
722 | nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, |
723 | child, children) != 0) { |
724 | nvlist_free(nvroot); |
725 | goto nomem; |
726 | } |
727 | |
728 | for (c = 0; c < children; c++) |
729 | nvlist_free(child[c]); |
730 | free(child); |
731 | children = 0; |
732 | child = NULL; |
733 | |
734 | /* |
735 | * Go through and fix up any paths and/or devids based on our |
736 | * known list of vdev GUID -> path mappings. |
737 | */ |
738 | if (fix_paths(nvroot, pl->names) != 0) { |
739 | nvlist_free(nvroot); |
740 | goto nomem; |
741 | } |
742 | |
743 | /* |
744 | * Add the root vdev to this pool's configuration. |
745 | */ |
746 | if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, |
747 | nvroot) != 0) { |
748 | nvlist_free(nvroot); |
749 | goto nomem; |
750 | } |
751 | nvlist_free(nvroot); |
752 | |
753 | /* |
754 | * zdb uses this path to report on active pools that were |
755 | * imported or created using -R. |
756 | */ |
757 | if (active_ok) |
758 | goto add_pool; |
759 | |
760 | /* |
761 | * Determine if this pool is currently active, in which case we |
762 | * can't actually import it. |
763 | */ |
764 | verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, |
765 | &name) == 0); |
766 | verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, |
767 | &guid) == 0); |
768 | |
769 | if (pool_active(hdl, name, guid, &isactive) != 0) |
770 | goto error; |
771 | |
772 | if (isactive) { |
773 | nvlist_free(config); |
774 | config = NULL; |
775 | continue; |
776 | } |
777 | |
778 | if ((nvl = refresh_config(hdl, config)) == NULL) { |
779 | nvlist_free(config); |
780 | config = NULL; |
781 | continue; |
782 | } |
783 | |
784 | nvlist_free(config); |
785 | config = nvl; |
786 | |
787 | /* |
788 | * Go through and update the paths for spares, now that we have |
789 | * them. |
790 | */ |
791 | verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, |
792 | &nvroot) == 0); |
793 | if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, |
794 | &spares, &nspares) == 0) { |
795 | for (i = 0; i < nspares; i++) { |
796 | if (fix_paths(spares[i], pl->names) != 0) |
797 | goto nomem; |
798 | } |
799 | } |
800 | |
801 | /* |
802 | * Update the paths for l2cache devices. |
803 | */ |
804 | if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, |
805 | &l2cache, &nl2cache) == 0) { |
806 | for (i = 0; i < nl2cache; i++) { |
807 | if (fix_paths(l2cache[i], pl->names) != 0) |
808 | goto nomem; |
809 | } |
810 | } |
811 | |
812 | /* |
813 | * Restore the original information read from the actual label. |
814 | */ |
815 | (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, |
816 | DATA_TYPE_UINT64); |
817 | (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, |
818 | DATA_TYPE_STRING); |
819 | if (hostid != 0) { |
820 | verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, |
821 | hostid) == 0); |
822 | verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, |
823 | hostname) == 0); |
824 | } |
825 | |
826 | add_pool: |
827 | /* |
828 | * Add this pool to the list of configs. |
829 | */ |
830 | verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, |
831 | &name) == 0); |
832 | if (nvlist_add_nvlist(ret, name, config) != 0) |
833 | goto nomem; |
834 | |
835 | found_one = B_TRUE; |
836 | nvlist_free(config); |
837 | config = NULL; |
838 | } |
839 | |
840 | if (!found_one) { |
841 | nvlist_free(ret); |
842 | ret = NULL; |
843 | } |
844 | |
845 | return (ret); |
846 | |
847 | nomem: |
848 | (void) no_memory(hdl); |
849 | error: |
850 | nvlist_free(config); |
851 | nvlist_free(ret); |
852 | for (c = 0; c < children; c++) |
853 | nvlist_free(child[c]); |
854 | free(child); |
855 | |
856 | return (NULL); |
857 | } |
858 | |
859 | /* |
860 | * Return the offset of the given label. |
861 | */ |
862 | static uint64_t |
863 | label_offset(uint64_t size, int l) |
864 | { |
865 | ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); |
866 | return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? |
867 | 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); |
868 | } |
869 | |
870 | /* |
871 | * Given a file descriptor, read the label information and return an nvlist |
872 | * describing the configuration, if there is one. |
873 | */ |
874 | int |
875 | zpool_read_label(int fd, nvlist_t **config) |
876 | { |
877 | struct stat64 statbuf; |
878 | int l; |
879 | vdev_label_t *label; |
880 | uint64_t state, txg, size; |
881 | |
882 | *config = NULL; |
883 | |
884 | if (fstat64(fd, &statbuf) == -1) |
885 | return (0); |
886 | size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); |
887 | |
888 | if ((label = malloc(sizeof (vdev_label_t))) == NULL) |
889 | return (-1); |
890 | |
891 | for (l = 0; l < VDEV_LABELS; l++) { |
892 | if (pread64(fd, label, sizeof (vdev_label_t), |
893 | label_offset(size, l)) != sizeof (vdev_label_t)) |
894 | continue; |
895 | |
896 | if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, |
897 | sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) |
898 | continue; |
899 | |
900 | if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, |
901 | &state) != 0 || state > POOL_STATE_L2CACHE) { |
902 | nvlist_free(*config); |
903 | continue; |
904 | } |
905 | |
906 | if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && |
907 | (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, |
908 | &txg) != 0 || txg == 0)) { |
909 | nvlist_free(*config); |
910 | continue; |
911 | } |
912 | |
913 | free(label); |
914 | return (0); |
915 | } |
916 | |
917 | free(label); |
918 | *config = NULL; |
919 | return (0); |
920 | } |
921 | |
922 | typedef struct rdsk_node { |
923 | char *rn_name; |
924 | int rn_dfd; |
925 | libzfs_handle_t *rn_hdl; |
926 | nvlist_t *rn_config; |
927 | avl_tree_t *rn_avl; |
928 | avl_node_t rn_node; |
929 | boolean_t rn_nozpool; |
930 | } rdsk_node_t; |
931 | |
932 | static int |
933 | slice_cache_compare(const void *arg1, const void *arg2) |
934 | { |
935 | const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; |
936 | const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; |
937 | char *nm1slice, *nm2slice; |
938 | int rv; |
939 | |
940 | /* |
941 | * slices zero and two are the most likely to provide results, |
942 | * so put those first |
943 | */ |
944 | nm1slice = strstr(nm1, "s0" ); |
945 | nm2slice = strstr(nm2, "s0" ); |
946 | if (nm1slice && !nm2slice) { |
947 | return (-1); |
948 | } |
949 | if (!nm1slice && nm2slice) { |
950 | return (1); |
951 | } |
952 | nm1slice = strstr(nm1, "s2" ); |
953 | nm2slice = strstr(nm2, "s2" ); |
954 | if (nm1slice && !nm2slice) { |
955 | return (-1); |
956 | } |
957 | if (!nm1slice && nm2slice) { |
958 | return (1); |
959 | } |
960 | |
961 | rv = strcmp(nm1, nm2); |
962 | if (rv == 0) |
963 | return (0); |
964 | return (rv > 0 ? 1 : -1); |
965 | } |
966 | |
967 | #ifdef illumos |
968 | static void |
969 | check_one_slice(avl_tree_t *r, char *diskname, uint_t partno, |
970 | diskaddr_t size, uint_t blksz) |
971 | { |
972 | rdsk_node_t tmpnode; |
973 | rdsk_node_t *node; |
974 | char sname[MAXNAMELEN]; |
975 | |
976 | tmpnode.rn_name = &sname[0]; |
977 | (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u" , |
978 | diskname, partno); |
979 | /* |
980 | * protect against division by zero for disk labels that |
981 | * contain a bogus sector size |
982 | */ |
983 | if (blksz == 0) |
984 | blksz = DEV_BSIZE; |
985 | /* too small to contain a zpool? */ |
986 | if ((size < (SPA_MINDEVSIZE / blksz)) && |
987 | (node = avl_find(r, &tmpnode, NULL))) |
988 | node->rn_nozpool = B_TRUE; |
989 | } |
990 | #endif /* illumos */ |
991 | |
992 | static void |
993 | nozpool_all_slices(avl_tree_t *r, const char *sname) |
994 | { |
995 | #ifdef illumos |
996 | char diskname[MAXNAMELEN]; |
997 | char *ptr; |
998 | int i; |
999 | |
1000 | (void) strncpy(diskname, sname, MAXNAMELEN); |
1001 | if (((ptr = strrchr(diskname, 's')) == NULL) && |
1002 | ((ptr = strrchr(diskname, 'p')) == NULL)) |
1003 | return; |
1004 | ptr[0] = 's'; |
1005 | ptr[1] = '\0'; |
1006 | for (i = 0; i < NDKMAP; i++) |
1007 | check_one_slice(r, diskname, i, 0, 1); |
1008 | ptr[0] = 'p'; |
1009 | for (i = 0; i <= FD_NUMPART; i++) |
1010 | check_one_slice(r, diskname, i, 0, 1); |
1011 | #endif /* illumos */ |
1012 | } |
1013 | |
1014 | #ifdef illumos |
1015 | static void |
1016 | check_slices(avl_tree_t *r, int fd, const char *sname) |
1017 | { |
1018 | struct extvtoc vtoc; |
1019 | struct dk_gpt *gpt; |
1020 | char diskname[MAXNAMELEN]; |
1021 | char *ptr; |
1022 | int i; |
1023 | |
1024 | (void) strncpy(diskname, sname, MAXNAMELEN); |
1025 | if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1])) |
1026 | return; |
1027 | ptr[1] = '\0'; |
1028 | |
1029 | if (read_extvtoc(fd, &vtoc) >= 0) { |
1030 | for (i = 0; i < NDKMAP; i++) |
1031 | check_one_slice(r, diskname, i, |
1032 | vtoc.v_part[i].p_size, vtoc.v_sectorsz); |
1033 | } else if (efi_alloc_and_read(fd, &gpt) >= 0) { |
1034 | /* |
1035 | * on x86 we'll still have leftover links that point |
1036 | * to slices s[9-15], so use NDKMAP instead |
1037 | */ |
1038 | for (i = 0; i < NDKMAP; i++) |
1039 | check_one_slice(r, diskname, i, |
1040 | gpt->efi_parts[i].p_size, gpt->efi_lbasize); |
1041 | /* nodes p[1-4] are never used with EFI labels */ |
1042 | ptr[0] = 'p'; |
1043 | for (i = 1; i <= FD_NUMPART; i++) |
1044 | check_one_slice(r, diskname, i, 0, 1); |
1045 | efi_free(gpt); |
1046 | } |
1047 | } |
1048 | #endif /* illumos */ |
1049 | |
1050 | static void |
1051 | zpool_open_func(void *arg) |
1052 | { |
1053 | rdsk_node_t *rn = arg; |
1054 | struct stat64 statbuf; |
1055 | nvlist_t *config; |
1056 | int fd; |
1057 | |
1058 | if (rn->rn_nozpool) |
1059 | return; |
1060 | if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) { |
1061 | /* symlink to a device that's no longer there */ |
1062 | if (errno == ENOENT) |
1063 | nozpool_all_slices(rn->rn_avl, rn->rn_name); |
1064 | return; |
1065 | } |
1066 | /* |
1067 | * Ignore failed stats. We only want regular |
1068 | * files, character devs and block devs. |
1069 | */ |
1070 | if (fstat64(fd, &statbuf) != 0 || |
1071 | (!S_ISREG(statbuf.st_mode) && |
1072 | !S_ISCHR(statbuf.st_mode) && |
1073 | !S_ISBLK(statbuf.st_mode))) { |
1074 | (void) close(fd); |
1075 | return; |
1076 | } |
1077 | /* this file is too small to hold a zpool */ |
1078 | #ifdef illumos |
1079 | if (S_ISREG(statbuf.st_mode) && |
1080 | statbuf.st_size < SPA_MINDEVSIZE) { |
1081 | (void) close(fd); |
1082 | return; |
1083 | } else if (!S_ISREG(statbuf.st_mode)) { |
1084 | /* |
1085 | * Try to read the disk label first so we don't have to |
1086 | * open a bunch of minor nodes that can't have a zpool. |
1087 | */ |
1088 | check_slices(rn->rn_avl, fd, rn->rn_name); |
1089 | } |
1090 | #endif /* illumos */ |
1091 | #ifdef __FreeBSD__ |
1092 | if (statbuf.st_size < SPA_MINDEVSIZE) { |
1093 | (void) close(fd); |
1094 | return; |
1095 | } |
1096 | #endif /* __FreeBSD__ */ |
1097 | #ifdef __NetBSD__ |
1098 | off_t size; |
1099 | |
1100 | if (native_ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || |
1101 | size < SPA_MINDEVSIZE) { |
1102 | (void) close(fd); |
1103 | return; |
1104 | } |
1105 | #endif |
1106 | |
1107 | if ((zpool_read_label(fd, &config)) != 0) { |
1108 | (void) close(fd); |
1109 | (void) no_memory(rn->rn_hdl); |
1110 | return; |
1111 | } |
1112 | (void) close(fd); |
1113 | |
1114 | rn->rn_config = config; |
1115 | } |
1116 | |
1117 | /* |
1118 | * Given a file descriptor, clear (zero) the label information. |
1119 | */ |
1120 | int |
1121 | zpool_clear_label(int fd) |
1122 | { |
1123 | struct stat64 statbuf; |
1124 | int l; |
1125 | vdev_label_t *label; |
1126 | uint64_t size; |
1127 | |
1128 | if (fstat64(fd, &statbuf) == -1) |
1129 | return (0); |
1130 | size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); |
1131 | |
1132 | if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL) |
1133 | return (-1); |
1134 | |
1135 | for (l = 0; l < VDEV_LABELS; l++) { |
1136 | if (pwrite64(fd, label, sizeof (vdev_label_t), |
1137 | label_offset(size, l)) != sizeof (vdev_label_t)) { |
1138 | free(label); |
1139 | return (-1); |
1140 | } |
1141 | } |
1142 | |
1143 | free(label); |
1144 | return (0); |
1145 | } |
1146 | |
1147 | /* |
1148 | * Given a list of directories to search, find all pools stored on disk. This |
1149 | * includes partial pools which are not available to import. If no args are |
1150 | * given (argc is 0), then the default directory (/dev/dsk) is searched. |
1151 | * poolname or guid (but not both) are provided by the caller when trying |
1152 | * to import a specific pool. |
1153 | */ |
1154 | static nvlist_t * |
1155 | zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) |
1156 | { |
1157 | int i, dirs = iarg->paths; |
1158 | struct dirent64 *dp; |
1159 | char path[MAXPATHLEN]; |
1160 | char *end, **dir = iarg->path; |
1161 | size_t pathleft; |
1162 | nvlist_t *ret = NULL; |
1163 | static char *default_dir = "/dev" ; |
1164 | pool_list_t pools = { 0 }; |
1165 | pool_entry_t *pe, *penext; |
1166 | vdev_entry_t *ve, *venext; |
1167 | config_entry_t *ce, *cenext; |
1168 | name_entry_t *ne, *nenext; |
1169 | avl_tree_t slice_cache; |
1170 | rdsk_node_t *slice; |
1171 | void *cookie; |
1172 | |
1173 | if (dirs == 0) { |
1174 | dirs = 1; |
1175 | dir = &default_dir; |
1176 | } |
1177 | |
1178 | /* |
1179 | * Go through and read the label configuration information from every |
1180 | * possible device, organizing the information according to pool GUID |
1181 | * and toplevel GUID. |
1182 | */ |
1183 | for (i = 0; i < dirs; i++) { |
1184 | tpool_t *t; |
1185 | char rdsk[MAXPATHLEN]; |
1186 | int dfd; |
1187 | boolean_t config_failed = B_FALSE; |
1188 | DIR *dirp; |
1189 | |
1190 | /* use realpath to normalize the path */ |
1191 | if (realpath(dir[i], path) == 0) { |
1192 | (void) zfs_error_fmt(hdl, EZFS_BADPATH, |
1193 | dgettext(TEXT_DOMAIN, "cannot open '%s'" ), dir[i]); |
1194 | goto error; |
1195 | } |
1196 | end = &path[strlen(path)]; |
1197 | *end++ = '/'; |
1198 | *end = 0; |
1199 | pathleft = &path[sizeof (path)] - end; |
1200 | |
1201 | #ifdef illumos |
1202 | /* |
1203 | * Using raw devices instead of block devices when we're |
1204 | * reading the labels skips a bunch of slow operations during |
1205 | * close(2) processing, so we replace /dev/dsk with /dev/rdsk. |
1206 | */ |
1207 | if (strcmp(path, ZFS_DISK_ROOTD) == 0) |
1208 | (void) strlcpy(rdsk, ZFS_RDISK_ROOTD, sizeof (rdsk)); |
1209 | else |
1210 | #endif |
1211 | (void) strlcpy(rdsk, path, sizeof (rdsk)); |
1212 | |
1213 | if ((dfd = open64(rdsk, O_RDONLY)) < 0 || |
1214 | (dirp = fdopendir(dfd)) == NULL) { |
1215 | if (dfd >= 0) |
1216 | (void) close(dfd); |
1217 | zfs_error_aux(hdl, strerror(errno)); |
1218 | (void) zfs_error_fmt(hdl, EZFS_BADPATH, |
1219 | dgettext(TEXT_DOMAIN, "cannot open '%s'" ), |
1220 | rdsk); |
1221 | goto error; |
1222 | } |
1223 | |
1224 | avl_create(&slice_cache, slice_cache_compare, |
1225 | sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); |
1226 | |
1227 | #ifdef __FreeBSD__ |
1228 | if (strcmp(rdsk, "/dev/" ) == 0) { |
1229 | struct gmesh mesh; |
1230 | struct gclass *mp; |
1231 | struct ggeom *gp; |
1232 | struct gprovider *pp; |
1233 | |
1234 | errno = geom_gettree(&mesh); |
1235 | if (errno != 0) { |
1236 | zfs_error_aux(hdl, strerror(errno)); |
1237 | (void) zfs_error_fmt(hdl, EZFS_BADPATH, |
1238 | dgettext(TEXT_DOMAIN, "cannot get GEOM tree" )); |
1239 | goto error; |
1240 | } |
1241 | |
1242 | LIST_FOREACH(mp, &mesh.lg_class, lg_class) { |
1243 | LIST_FOREACH(gp, &mp->lg_geom, lg_geom) { |
1244 | LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { |
1245 | slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); |
1246 | slice->rn_name = zfs_strdup(hdl, pp->lg_name); |
1247 | slice->rn_avl = &slice_cache; |
1248 | slice->rn_dfd = dfd; |
1249 | slice->rn_hdl = hdl; |
1250 | slice->rn_nozpool = B_FALSE; |
1251 | avl_add(&slice_cache, slice); |
1252 | } |
1253 | } |
1254 | } |
1255 | |
1256 | geom_deletetree(&mesh); |
1257 | goto skipdir; |
1258 | } |
1259 | #endif |
1260 | #ifdef __NetBSD__ |
1261 | if (strcmp(rdsk, "/dev/" ) == 0) { |
1262 | static const char mib_name[] = "hw.disknames" ; |
1263 | size_t len; |
1264 | char *disknames, *last, *name; |
1265 | char part; |
1266 | |
1267 | part = getrawpartition(); |
1268 | if (sysctlbyname(mib_name, NULL, &len, NULL, 0) == -1) { |
1269 | zfs_error_aux(hdl, strerror(errno)); |
1270 | (void) zfs_error_fmt(hdl, EZFS_BADPATH, |
1271 | dgettext(TEXT_DOMAIN, "cannot get hw.disknames list" )); |
1272 | |
1273 | avl_destroy(&slice_cache); |
1274 | (void) closedir(dirp); |
1275 | goto error; |
1276 | } |
1277 | disknames = zfs_alloc(hdl, len + 2); |
1278 | (void)sysctlbyname(mib_name, disknames, &len, NULL, 0); |
1279 | |
1280 | |
1281 | for ((name = strtok_r(disknames, " " , &last)); name; |
1282 | (name = strtok_r(NULL, " " , &last))) { |
1283 | slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); |
1284 | slice->rn_name = zfs_asprintf(hdl, "%s%c" , name, 'a' + part); |
1285 | slice->rn_avl = &slice_cache; |
1286 | slice->rn_dfd = dfd; |
1287 | slice->rn_hdl = hdl; |
1288 | slice->rn_nozpool = B_FALSE; |
1289 | avl_add(&slice_cache, slice); |
1290 | } |
1291 | free(disknames); |
1292 | |
1293 | goto skipdir; |
1294 | } |
1295 | #endif |
1296 | |
1297 | /* |
1298 | * This is not MT-safe, but we have no MT consumers of libzfs |
1299 | */ |
1300 | while ((dp = readdir64(dirp)) != NULL) { |
1301 | const char *name = dp->d_name; |
1302 | if (name[0] == '.' && |
1303 | (name[1] == 0 || (name[1] == '.' && name[2] == 0))) |
1304 | continue; |
1305 | |
1306 | slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); |
1307 | slice->rn_name = zfs_strdup(hdl, name); |
1308 | slice->rn_avl = &slice_cache; |
1309 | slice->rn_dfd = dfd; |
1310 | slice->rn_hdl = hdl; |
1311 | slice->rn_nozpool = B_FALSE; |
1312 | avl_add(&slice_cache, slice); |
1313 | } |
1314 | skipdir: |
1315 | /* |
1316 | * create a thread pool to do all of this in parallel; |
1317 | * rn_nozpool is not protected, so this is racy in that |
1318 | * multiple tasks could decide that the same slice can |
1319 | * not hold a zpool, which is benign. Also choose |
1320 | * double the number of processors; we hold a lot of |
1321 | * locks in the kernel, so going beyond this doesn't |
1322 | * buy us much. |
1323 | */ |
1324 | t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), |
1325 | 0, NULL); |
1326 | for (slice = avl_first(&slice_cache); slice; |
1327 | (slice = avl_walk(&slice_cache, slice, |
1328 | AVL_AFTER))) |
1329 | (void) tpool_dispatch(t, zpool_open_func, slice); |
1330 | tpool_wait(t); |
1331 | tpool_destroy(t); |
1332 | |
1333 | cookie = NULL; |
1334 | while ((slice = avl_destroy_nodes(&slice_cache, |
1335 | &cookie)) != NULL) { |
1336 | if (slice->rn_config != NULL && !config_failed) { |
1337 | nvlist_t *config = slice->rn_config; |
1338 | boolean_t matched = B_TRUE; |
1339 | |
1340 | if (iarg->poolname != NULL) { |
1341 | char *pname; |
1342 | |
1343 | matched = nvlist_lookup_string(config, |
1344 | ZPOOL_CONFIG_POOL_NAME, |
1345 | &pname) == 0 && |
1346 | strcmp(iarg->poolname, pname) == 0; |
1347 | } else if (iarg->guid != 0) { |
1348 | uint64_t this_guid; |
1349 | |
1350 | matched = nvlist_lookup_uint64(config, |
1351 | ZPOOL_CONFIG_POOL_GUID, |
1352 | &this_guid) == 0 && |
1353 | iarg->guid == this_guid; |
1354 | } |
1355 | if (!matched) { |
1356 | nvlist_free(config); |
1357 | } else { |
1358 | /* |
1359 | * use the non-raw path for the config |
1360 | */ |
1361 | (void) strlcpy(end, slice->rn_name, |
1362 | pathleft); |
1363 | if (add_config(hdl, &pools, path, |
1364 | config) != 0) |
1365 | config_failed = B_TRUE; |
1366 | } |
1367 | } |
1368 | free(slice->rn_name); |
1369 | free(slice); |
1370 | } |
1371 | avl_destroy(&slice_cache); |
1372 | |
1373 | (void) closedir(dirp); |
1374 | |
1375 | if (config_failed) |
1376 | goto error; |
1377 | } |
1378 | |
1379 | ret = get_configs(hdl, &pools, iarg->can_be_active); |
1380 | |
1381 | error: |
1382 | for (pe = pools.pools; pe != NULL; pe = penext) { |
1383 | penext = pe->pe_next; |
1384 | for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { |
1385 | venext = ve->ve_next; |
1386 | for (ce = ve->ve_configs; ce != NULL; ce = cenext) { |
1387 | cenext = ce->ce_next; |
1388 | nvlist_free(ce->ce_config); |
1389 | free(ce); |
1390 | } |
1391 | free(ve); |
1392 | } |
1393 | free(pe); |
1394 | } |
1395 | |
1396 | for (ne = pools.names; ne != NULL; ne = nenext) { |
1397 | nenext = ne->ne_next; |
1398 | free(ne->ne_name); |
1399 | free(ne); |
1400 | } |
1401 | |
1402 | return (ret); |
1403 | } |
1404 | |
1405 | nvlist_t * |
1406 | zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) |
1407 | { |
1408 | importargs_t iarg = { 0 }; |
1409 | |
1410 | iarg.paths = argc; |
1411 | iarg.path = argv; |
1412 | |
1413 | return (zpool_find_import_impl(hdl, &iarg)); |
1414 | } |
1415 | |
1416 | /* |
1417 | * Given a cache file, return the contents as a list of importable pools. |
1418 | * poolname or guid (but not both) are provided by the caller when trying |
1419 | * to import a specific pool. |
1420 | */ |
1421 | nvlist_t * |
1422 | zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile, |
1423 | char *poolname, uint64_t guid) |
1424 | { |
1425 | char *buf; |
1426 | int fd; |
1427 | struct stat64 statbuf; |
1428 | nvlist_t *raw, *src, *dst; |
1429 | nvlist_t *pools; |
1430 | nvpair_t *elem; |
1431 | char *name; |
1432 | uint64_t this_guid; |
1433 | boolean_t active; |
1434 | |
1435 | verify(poolname == NULL || guid == 0); |
1436 | |
1437 | if ((fd = open(cachefile, O_RDONLY)) < 0) { |
1438 | zfs_error_aux(hdl, "%s" , strerror(errno)); |
1439 | (void) zfs_error(hdl, EZFS_BADCACHE, |
1440 | dgettext(TEXT_DOMAIN, "failed to open cache file" )); |
1441 | return (NULL); |
1442 | } |
1443 | |
1444 | if (fstat64(fd, &statbuf) != 0) { |
1445 | zfs_error_aux(hdl, "%s" , strerror(errno)); |
1446 | (void) close(fd); |
1447 | (void) zfs_error(hdl, EZFS_BADCACHE, |
1448 | dgettext(TEXT_DOMAIN, "failed to get size of cache file" )); |
1449 | return (NULL); |
1450 | } |
1451 | |
1452 | if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) { |
1453 | (void) close(fd); |
1454 | return (NULL); |
1455 | } |
1456 | |
1457 | if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { |
1458 | (void) close(fd); |
1459 | free(buf); |
1460 | (void) zfs_error(hdl, EZFS_BADCACHE, |
1461 | dgettext(TEXT_DOMAIN, |
1462 | "failed to read cache file contents" )); |
1463 | return (NULL); |
1464 | } |
1465 | |
1466 | (void) close(fd); |
1467 | |
1468 | if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { |
1469 | free(buf); |
1470 | (void) zfs_error(hdl, EZFS_BADCACHE, |
1471 | dgettext(TEXT_DOMAIN, |
1472 | "invalid or corrupt cache file contents" )); |
1473 | return (NULL); |
1474 | } |
1475 | |
1476 | free(buf); |
1477 | |
1478 | /* |
1479 | * Go through and get the current state of the pools and refresh their |
1480 | * state. |
1481 | */ |
1482 | if (nvlist_alloc(&pools, 0, 0) != 0) { |
1483 | (void) no_memory(hdl); |
1484 | nvlist_free(raw); |
1485 | return (NULL); |
1486 | } |
1487 | |
1488 | elem = NULL; |
1489 | while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) { |
1490 | src = fnvpair_value_nvlist(elem); |
1491 | |
1492 | name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME); |
1493 | if (poolname != NULL && strcmp(poolname, name) != 0) |
1494 | continue; |
1495 | |
1496 | this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID); |
1497 | if (guid != 0 && guid != this_guid) |
1498 | continue; |
1499 | |
1500 | if (pool_active(hdl, name, this_guid, &active) != 0) { |
1501 | nvlist_free(raw); |
1502 | nvlist_free(pools); |
1503 | return (NULL); |
1504 | } |
1505 | |
1506 | if (active) |
1507 | continue; |
1508 | |
1509 | if ((dst = refresh_config(hdl, src)) == NULL) { |
1510 | nvlist_free(raw); |
1511 | nvlist_free(pools); |
1512 | return (NULL); |
1513 | } |
1514 | |
1515 | if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { |
1516 | (void) no_memory(hdl); |
1517 | nvlist_free(dst); |
1518 | nvlist_free(raw); |
1519 | nvlist_free(pools); |
1520 | return (NULL); |
1521 | } |
1522 | nvlist_free(dst); |
1523 | } |
1524 | |
1525 | nvlist_free(raw); |
1526 | return (pools); |
1527 | } |
1528 | |
1529 | static int |
1530 | name_or_guid_exists(zpool_handle_t *zhp, void *data) |
1531 | { |
1532 | importargs_t *import = data; |
1533 | int found = 0; |
1534 | |
1535 | if (import->poolname != NULL) { |
1536 | char *pool_name; |
1537 | |
1538 | verify(nvlist_lookup_string(zhp->zpool_config, |
1539 | ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0); |
1540 | if (strcmp(pool_name, import->poolname) == 0) |
1541 | found = 1; |
1542 | } else { |
1543 | uint64_t pool_guid; |
1544 | |
1545 | verify(nvlist_lookup_uint64(zhp->zpool_config, |
1546 | ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0); |
1547 | if (pool_guid == import->guid) |
1548 | found = 1; |
1549 | } |
1550 | |
1551 | zpool_close(zhp); |
1552 | return (found); |
1553 | } |
1554 | |
1555 | nvlist_t * |
1556 | zpool_search_import(libzfs_handle_t *hdl, importargs_t *import) |
1557 | { |
1558 | verify(import->poolname == NULL || import->guid == 0); |
1559 | |
1560 | if (import->unique) |
1561 | import->exists = zpool_iter(hdl, name_or_guid_exists, import); |
1562 | |
1563 | if (import->cachefile != NULL) |
1564 | return (zpool_find_import_cached(hdl, import->cachefile, |
1565 | import->poolname, import->guid)); |
1566 | |
1567 | return (zpool_find_import_impl(hdl, import)); |
1568 | } |
1569 | |
1570 | boolean_t |
1571 | find_guid(nvlist_t *nv, uint64_t guid) |
1572 | { |
1573 | uint64_t tmp; |
1574 | nvlist_t **child; |
1575 | uint_t c, children; |
1576 | |
1577 | verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); |
1578 | if (tmp == guid) |
1579 | return (B_TRUE); |
1580 | |
1581 | if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, |
1582 | &child, &children) == 0) { |
1583 | for (c = 0; c < children; c++) |
1584 | if (find_guid(child[c], guid)) |
1585 | return (B_TRUE); |
1586 | } |
1587 | |
1588 | return (B_FALSE); |
1589 | } |
1590 | |
1591 | typedef struct aux_cbdata { |
1592 | const char *cb_type; |
1593 | uint64_t cb_guid; |
1594 | zpool_handle_t *cb_zhp; |
1595 | } aux_cbdata_t; |
1596 | |
1597 | static int |
1598 | find_aux(zpool_handle_t *zhp, void *data) |
1599 | { |
1600 | aux_cbdata_t *cbp = data; |
1601 | nvlist_t **list; |
1602 | uint_t i, count; |
1603 | uint64_t guid; |
1604 | nvlist_t *nvroot; |
1605 | |
1606 | verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, |
1607 | &nvroot) == 0); |
1608 | |
1609 | if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type, |
1610 | &list, &count) == 0) { |
1611 | for (i = 0; i < count; i++) { |
1612 | verify(nvlist_lookup_uint64(list[i], |
1613 | ZPOOL_CONFIG_GUID, &guid) == 0); |
1614 | if (guid == cbp->cb_guid) { |
1615 | cbp->cb_zhp = zhp; |
1616 | return (1); |
1617 | } |
1618 | } |
1619 | } |
1620 | |
1621 | zpool_close(zhp); |
1622 | return (0); |
1623 | } |
1624 | |
1625 | /* |
1626 | * Determines if the pool is in use. If so, it returns true and the state of |
1627 | * the pool as well as the name of the pool. Both strings are allocated and |
1628 | * must be freed by the caller. |
1629 | */ |
1630 | int |
1631 | zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, |
1632 | boolean_t *inuse) |
1633 | { |
1634 | nvlist_t *config; |
1635 | char *name; |
1636 | boolean_t ret; |
1637 | uint64_t guid, vdev_guid; |
1638 | zpool_handle_t *zhp; |
1639 | nvlist_t *pool_config; |
1640 | uint64_t stateval, isspare; |
1641 | aux_cbdata_t cb = { 0 }; |
1642 | boolean_t isactive; |
1643 | |
1644 | *inuse = B_FALSE; |
1645 | |
1646 | if (zpool_read_label(fd, &config) != 0) { |
1647 | (void) no_memory(hdl); |
1648 | return (-1); |
1649 | } |
1650 | |
1651 | if (config == NULL) |
1652 | return (0); |
1653 | |
1654 | verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, |
1655 | &stateval) == 0); |
1656 | verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, |
1657 | &vdev_guid) == 0); |
1658 | |
1659 | if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { |
1660 | verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, |
1661 | &name) == 0); |
1662 | verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, |
1663 | &guid) == 0); |
1664 | } |
1665 | |
1666 | switch (stateval) { |
1667 | case POOL_STATE_EXPORTED: |
1668 | /* |
1669 | * A pool with an exported state may in fact be imported |
1670 | * read-only, so check the in-core state to see if it's |
1671 | * active and imported read-only. If it is, set |
1672 | * its state to active. |
1673 | */ |
1674 | if (pool_active(hdl, name, guid, &isactive) == 0 && isactive && |
1675 | (zhp = zpool_open_canfail(hdl, name)) != NULL) { |
1676 | if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL)) |
1677 | stateval = POOL_STATE_ACTIVE; |
1678 | |
1679 | /* |
1680 | * All we needed the zpool handle for is the |
1681 | * readonly prop check. |
1682 | */ |
1683 | zpool_close(zhp); |
1684 | } |
1685 | |
1686 | ret = B_TRUE; |
1687 | break; |
1688 | |
1689 | case POOL_STATE_ACTIVE: |
1690 | /* |
1691 | * For an active pool, we have to determine if it's really part |
1692 | * of a currently active pool (in which case the pool will exist |
1693 | * and the guid will be the same), or whether it's part of an |
1694 | * active pool that was disconnected without being explicitly |
1695 | * exported. |
1696 | */ |
1697 | if (pool_active(hdl, name, guid, &isactive) != 0) { |
1698 | nvlist_free(config); |
1699 | return (-1); |
1700 | } |
1701 | |
1702 | if (isactive) { |
1703 | /* |
1704 | * Because the device may have been removed while |
1705 | * offlined, we only report it as active if the vdev is |
1706 | * still present in the config. Otherwise, pretend like |
1707 | * it's not in use. |
1708 | */ |
1709 | if ((zhp = zpool_open_canfail(hdl, name)) != NULL && |
1710 | (pool_config = zpool_get_config(zhp, NULL)) |
1711 | != NULL) { |
1712 | nvlist_t *nvroot; |
1713 | |
1714 | verify(nvlist_lookup_nvlist(pool_config, |
1715 | ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); |
1716 | ret = find_guid(nvroot, vdev_guid); |
1717 | } else { |
1718 | ret = B_FALSE; |
1719 | } |
1720 | |
1721 | /* |
1722 | * If this is an active spare within another pool, we |
1723 | * treat it like an unused hot spare. This allows the |
1724 | * user to create a pool with a hot spare that currently |
1725 | * in use within another pool. Since we return B_TRUE, |
1726 | * libdiskmgt will continue to prevent generic consumers |
1727 | * from using the device. |
1728 | */ |
1729 | if (ret && nvlist_lookup_uint64(config, |
1730 | ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) |
1731 | stateval = POOL_STATE_SPARE; |
1732 | |
1733 | if (zhp != NULL) |
1734 | zpool_close(zhp); |
1735 | } else { |
1736 | stateval = POOL_STATE_POTENTIALLY_ACTIVE; |
1737 | ret = B_TRUE; |
1738 | } |
1739 | break; |
1740 | |
1741 | case POOL_STATE_SPARE: |
1742 | /* |
1743 | * For a hot spare, it can be either definitively in use, or |
1744 | * potentially active. To determine if it's in use, we iterate |
1745 | * over all pools in the system and search for one with a spare |
1746 | * with a matching guid. |
1747 | * |
1748 | * Due to the shared nature of spares, we don't actually report |
1749 | * the potentially active case as in use. This means the user |
1750 | * can freely create pools on the hot spares of exported pools, |
1751 | * but to do otherwise makes the resulting code complicated, and |
1752 | * we end up having to deal with this case anyway. |
1753 | */ |
1754 | cb.cb_zhp = NULL; |
1755 | cb.cb_guid = vdev_guid; |
1756 | cb.cb_type = ZPOOL_CONFIG_SPARES; |
1757 | if (zpool_iter(hdl, find_aux, &cb) == 1) { |
1758 | name = (char *)zpool_get_name(cb.cb_zhp); |
1759 | ret = B_TRUE; |
1760 | } else { |
1761 | ret = B_FALSE; |
1762 | } |
1763 | break; |
1764 | |
1765 | case POOL_STATE_L2CACHE: |
1766 | |
1767 | /* |
1768 | * Check if any pool is currently using this l2cache device. |
1769 | */ |
1770 | cb.cb_zhp = NULL; |
1771 | cb.cb_guid = vdev_guid; |
1772 | cb.cb_type = ZPOOL_CONFIG_L2CACHE; |
1773 | if (zpool_iter(hdl, find_aux, &cb) == 1) { |
1774 | name = (char *)zpool_get_name(cb.cb_zhp); |
1775 | ret = B_TRUE; |
1776 | } else { |
1777 | ret = B_FALSE; |
1778 | } |
1779 | break; |
1780 | |
1781 | default: |
1782 | ret = B_FALSE; |
1783 | } |
1784 | |
1785 | |
1786 | if (ret) { |
1787 | if ((*namestr = zfs_strdup(hdl, name)) == NULL) { |
1788 | if (cb.cb_zhp) |
1789 | zpool_close(cb.cb_zhp); |
1790 | nvlist_free(config); |
1791 | return (-1); |
1792 | } |
1793 | *state = (pool_state_t)stateval; |
1794 | } |
1795 | |
1796 | if (cb.cb_zhp) |
1797 | zpool_close(cb.cb_zhp); |
1798 | |
1799 | nvlist_free(config); |
1800 | *inuse = ret; |
1801 | return (0); |
1802 | } |
1803 | |
1804 | #ifdef __NetBSD__ |
1805 | /* |
1806 | * This needs to be at the end of the file so that we can #undef ioctl |
1807 | * without affecting anything else. |
1808 | */ |
1809 | #undef ioctl |
1810 | |
1811 | static int |
1812 | native_ioctl(int fd, unsigned long cmd, void *arg) |
1813 | { |
1814 | |
1815 | return ioctl(fd, cmd, arg); |
1816 | } |
1817 | #endif |
1818 | |