1/* $NetBSD: pool.h,v 1.89 2019/05/09 08:16:15 skrll Exp $ */
2
3/*-
4 * Copyright (c) 1997, 1998, 1999, 2000, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9 * Simulation Facility, NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#ifndef _SYS_POOL_H_
34#define _SYS_POOL_H_
35
36#include <sys/stdbool.h>
37#include <sys/stdint.h>
38
39struct pool_sysctl {
40 char pr_wchan[16];
41 uint64_t pr_flags;
42 uint64_t pr_size;
43 uint64_t pr_pagesize;
44 uint64_t pr_itemsperpage;
45 uint64_t pr_nitems;
46 uint64_t pr_nout;
47 uint64_t pr_hardlimit;
48 uint64_t pr_npages;
49 uint64_t pr_minpages;
50 uint64_t pr_maxpages;
51
52 uint64_t pr_nget;
53 uint64_t pr_nfail;
54 uint64_t pr_nput;
55 uint64_t pr_npagealloc;
56 uint64_t pr_npagefree;
57 uint64_t pr_hiwat;
58 uint64_t pr_nidle;
59
60 uint64_t pr_cache_meta_size;
61 uint64_t pr_cache_nfull;
62 uint64_t pr_cache_npartial;
63 uint64_t pr_cache_nempty;
64 uint64_t pr_cache_ncontended;
65 uint64_t pr_cache_nmiss_global;
66 uint64_t pr_cache_nhit_global;
67 uint64_t pr_cache_nmiss_pcpu;
68 uint64_t pr_cache_nhit_pcpu;
69};
70
71#ifdef _KERNEL
72#define __POOL_EXPOSE
73#endif
74
75#ifdef __POOL_EXPOSE
76#include <sys/param.h>
77#include <sys/mutex.h>
78#include <sys/condvar.h>
79#include <sys/queue.h>
80#include <sys/time.h>
81#include <sys/tree.h>
82#include <sys/callback.h>
83
84#ifdef _KERNEL_OPT
85#include "opt_pool.h"
86#endif
87
88#define POOL_PADDR_INVALID ((paddr_t) -1)
89
90struct pool;
91
92struct pool_allocator {
93 void *(*pa_alloc)(struct pool *, int);
94 void (*pa_free)(struct pool *, void *);
95 unsigned int pa_pagesz;
96
97 /* The following fields are for internal use only. */
98 kmutex_t pa_lock;
99 TAILQ_HEAD(, pool) pa_list; /* list of pools using this allocator */
100 uint32_t pa_refcnt; /* number of pools using this allocator */
101 int pa_pagemask;
102 int pa_pageshift;
103};
104
105LIST_HEAD(pool_pagelist,pool_item_header);
106SPLAY_HEAD(phtree, pool_item_header);
107
108#define POOL_QUARANTINE_DEPTH 128
109typedef struct {
110 size_t rotor;
111 intptr_t list[POOL_QUARANTINE_DEPTH];
112} pool_quar_t;
113
114struct pool {
115 TAILQ_ENTRY(pool)
116 pr_poollist;
117 struct pool_pagelist
118 pr_emptypages; /* Empty pages */
119 struct pool_pagelist
120 pr_fullpages; /* Full pages */
121 struct pool_pagelist
122 pr_partpages; /* Partially-allocated pages */
123 struct pool_item_header *pr_curpage;
124 struct pool *pr_phpool; /* Pool item header pool */
125 struct pool_cache *pr_cache; /* Cache for this pool */
126 unsigned int pr_size; /* Size of item */
127 unsigned int pr_align; /* Requested alignment, must be 2^n */
128 unsigned int pr_itemoffset; /* offset of the item space */
129 unsigned int pr_minitems; /* minimum # of items to keep */
130 unsigned int pr_minpages; /* same in page units */
131 unsigned int pr_maxpages; /* maximum # of pages to keep */
132 unsigned int pr_npages; /* # of pages allocated */
133 unsigned int pr_itemsperpage;/* # items that fit in a page */
134 unsigned int pr_poolid; /* id of the pool */
135 unsigned int pr_nitems; /* number of available items in pool */
136 unsigned int pr_nout; /* # items currently allocated */
137 unsigned int pr_hardlimit; /* hard limit to number of allocated
138 items */
139 unsigned int pr_refcnt; /* ref count for pagedaemon, etc */
140 struct pool_allocator *pr_alloc;/* back-end allocator */
141 TAILQ_ENTRY(pool) pr_alloc_list;/* link on allocator's pool list */
142
143 /* Drain hook. */
144 void (*pr_drain_hook)(void *, int);
145 void *pr_drain_hook_arg;
146
147 const char *pr_wchan; /* tsleep(9) identifier */
148 unsigned int pr_flags; /* r/w flags */
149 unsigned int pr_roflags; /* r/o flags */
150#define PR_WAITOK 0x01 /* Note: matches KM_SLEEP */
151#define PR_NOWAIT 0x02 /* Note: matches KM_NOSLEEP */
152#define PR_WANTED 0x04
153#define PR_PHINPAGE 0x40
154#define PR_LOGGING 0x80
155#define PR_LIMITFAIL 0x100 /* even if waiting, fail if we hit limit */
156#define PR_RECURSIVE 0x200 /* pool contains pools, for vmstat(8) */
157#define PR_NOTOUCH 0x400 /* don't use free items to keep internal state*/
158#define PR_NOALIGN 0x800 /* don't assume backend alignment */
159#define PR_LARGECACHE 0x1000 /* use large cache groups */
160#define PR_GROWING 0x2000 /* pool_grow in progress */
161#define PR_GROWINGNOWAIT 0x4000 /* pool_grow in progress by PR_NOWAIT alloc */
162#define PR_ZERO 0x8000 /* zero data before returning */
163#define PR_USEBMAP 0x10000 /* use a bitmap to manage freed items */
164
165 /*
166 * `pr_lock' protects the pool's data structures when removing
167 * items from or returning items to the pool, or when reading
168 * or updating read/write fields in the pool descriptor.
169 *
170 * We assume back-end page allocators provide their own locking
171 * scheme. They will be called with the pool descriptor _unlocked_,
172 * since the page allocators may block.
173 */
174 kmutex_t pr_lock;
175 kcondvar_t pr_cv;
176 int pr_ipl;
177
178 struct phtree pr_phtree;
179
180 int pr_maxcolor; /* Cache colouring */
181 int pr_curcolor;
182 int pr_phoffset; /* unused */
183
184 /*
185 * Warning message to be issued, and a per-time-delta rate cap,
186 * if the hard limit is reached.
187 */
188 const char *pr_hardlimit_warning;
189 struct timeval pr_hardlimit_ratecap;
190 struct timeval pr_hardlimit_warning_last;
191
192 /*
193 * Instrumentation
194 */
195 unsigned long pr_nget; /* # of successful requests */
196 unsigned long pr_nfail; /* # of unsuccessful requests */
197 unsigned long pr_nput; /* # of releases */
198 unsigned long pr_npagealloc; /* # of pages allocated */
199 unsigned long pr_npagefree; /* # of pages released */
200 unsigned int pr_hiwat; /* max # of pages in pool */
201 unsigned long pr_nidle; /* # of idle pages */
202
203 /*
204 * Diagnostic aides.
205 */
206 void *pr_freecheck;
207 void *pr_qcache;
208 bool pr_redzone;
209 size_t pr_reqsize;
210 size_t pr_reqsize_with_redzone;
211#ifdef POOL_QUARANTINE
212 pool_quar_t pr_quar;
213#endif
214};
215
216/*
217 * Cache group sizes, assuming 4-byte paddr_t on !_LP64.
218 * All groups will be aligned to CACHE_LINE_SIZE.
219 */
220#ifdef _LP64
221#define PCG_NOBJECTS_NORMAL 15 /* 256 byte group */
222#define PCG_NOBJECTS_LARGE 63 /* 1024 byte group */
223#else
224#define PCG_NOBJECTS_NORMAL 14 /* 124 byte group */
225#define PCG_NOBJECTS_LARGE 62 /* 508 byte group */
226#endif
227
228typedef struct pcgpair {
229 void *pcgo_va; /* object virtual address */
230 paddr_t pcgo_pa; /* object physical address */
231} pcgpair_t;
232
233/* The pool cache group. */
234typedef struct pool_cache_group {
235 struct pool_cache_group *pcg_next; /* link to next group */
236 u_int pcg_avail; /* # available objects */
237 u_int pcg_size; /* max number objects */
238 pcgpair_t pcg_objects[1]; /* the objects */
239} pcg_t;
240
241typedef struct pool_cache_cpu {
242 uint64_t cc_misses;
243 uint64_t cc_hits;
244 struct pool_cache_group *cc_current;
245 struct pool_cache_group *cc_previous;
246 struct pool_cache *cc_cache;
247 int cc_ipl;
248 int cc_cpuindex;
249#ifdef _KERNEL
250 ipl_cookie_t cc_iplcookie;
251#endif
252} pool_cache_cpu_t;
253
254struct pool_cache {
255 /* Pool layer. */
256 struct pool pc_pool;
257
258 /* Cache layer. */
259 kmutex_t pc_lock; /* locks cache layer */
260 TAILQ_ENTRY(pool_cache)
261 pc_cachelist; /* entry on global cache list */
262 pcg_t *pc_emptygroups;/* list of empty cache groups */
263 pcg_t *pc_fullgroups; /* list of full cache groups */
264 pcg_t *pc_partgroups; /* groups for reclamation */
265 struct pool *pc_pcgpool; /* Pool of cache groups */
266 int pc_pcgsize; /* Use large cache groups? */
267 int pc_ncpu; /* number cpus set up */
268 int (*pc_ctor)(void *, void *, int);
269 void (*pc_dtor)(void *, void *);
270 void *pc_arg; /* for ctor/ctor */
271 uint64_t pc_hits; /* cache layer hits */
272 uint64_t pc_misses; /* cache layer misses */
273 uint64_t pc_contended; /* contention events on cache */
274 unsigned int pc_nempty; /* empty groups in cache */
275 unsigned int pc_nfull; /* full groups in cache */
276 unsigned int pc_npart; /* partial groups in cache */
277 unsigned int pc_refcnt; /* ref count for pagedaemon, etc */
278
279 /* Diagnostic aides. */
280 void *pc_freecheck;
281 bool pc_redzone;
282 size_t pc_reqsize;
283
284 /* CPU layer. */
285 pool_cache_cpu_t pc_cpu0 __aligned(CACHE_LINE_SIZE);
286 void *pc_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
287};
288
289#endif /* __POOL_EXPOSE */
290
291typedef struct pool_cache *pool_cache_t;
292
293#ifdef _KERNEL
294/*
295 * pool_allocator_kmem is the default that all pools get unless
296 * otherwise specified. pool_allocator_nointr is provided for
297 * pools that know they will never be accessed in interrupt
298 * context.
299 */
300extern struct pool_allocator pool_allocator_kmem;
301extern struct pool_allocator pool_allocator_nointr;
302extern struct pool_allocator pool_allocator_meta;
303
304void pool_subsystem_init(void);
305
306void pool_init(struct pool *, size_t, u_int, u_int,
307 int, const char *, struct pool_allocator *, int);
308void pool_destroy(struct pool *);
309
310void pool_set_drain_hook(struct pool *,
311 void (*)(void *, int), void *);
312
313void *pool_get(struct pool *, int);
314void pool_put(struct pool *, void *);
315int pool_reclaim(struct pool *);
316
317int pool_prime(struct pool *, int);
318void pool_setlowat(struct pool *, int);
319void pool_sethiwat(struct pool *, int);
320void pool_sethardlimit(struct pool *, int, const char *, int);
321bool pool_drain(struct pool **);
322int pool_totalpages(void);
323int pool_totalpages_locked(void);
324
325/*
326 * Debugging and diagnostic aides.
327 */
328void pool_printit(struct pool *, const char *,
329 void (*)(const char *, ...) __printflike(1, 2));
330void pool_printall(const char *, void (*)(const char *, ...)
331 __printflike(1, 2));
332int pool_chk(struct pool *, const char *);
333
334/*
335 * Pool cache routines.
336 */
337pool_cache_t pool_cache_init(size_t, u_int, u_int, u_int, const char *,
338 struct pool_allocator *, int, int (*)(void *, void *, int),
339 void (*)(void *, void *), void *);
340void pool_cache_bootstrap(pool_cache_t, size_t, u_int, u_int, u_int,
341 const char *, struct pool_allocator *, int,
342 int (*)(void *, void *, int), void (*)(void *, void *),
343 void *);
344void pool_cache_destroy(pool_cache_t);
345void pool_cache_bootstrap_destroy(pool_cache_t);
346void *pool_cache_get_paddr(pool_cache_t, int, paddr_t *);
347void pool_cache_put_paddr(pool_cache_t, void *, paddr_t);
348void pool_cache_destruct_object(pool_cache_t, void *);
349void pool_cache_invalidate(pool_cache_t);
350bool pool_cache_reclaim(pool_cache_t);
351void pool_cache_set_drain_hook(pool_cache_t,
352 void (*)(void *, int), void *);
353void pool_cache_setlowat(pool_cache_t, int);
354void pool_cache_sethiwat(pool_cache_t, int);
355void pool_cache_sethardlimit(pool_cache_t, int, const char *, int);
356void pool_cache_cpu_init(struct cpu_info *);
357
358#define pool_cache_get(pc, f) pool_cache_get_paddr((pc), (f), NULL)
359#define pool_cache_put(pc, o) pool_cache_put_paddr((pc), (o), \
360 POOL_PADDR_INVALID)
361
362void pool_whatis(uintptr_t, void (*)(const char *, ...)
363 __printflike(1, 2));
364#endif /* _KERNEL */
365
366#endif /* _SYS_POOL_H_ */
367