version 1.8, 2006/05/22 12:42:01 |
version 1.21, 2017/05/04 11:03:27 |
|
|
/* $NetBSD$ */ |
/* $NetBSD$ */ |
|
|
/*- |
/*- |
* Copyright (c)2004 YAMAMOTO Takashi, |
* Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi, |
* All rights reserved. |
* All rights reserved. |
* |
* |
* Redistribution and use in source and binary forms, with or without |
* Redistribution and use in source and binary forms, with or without |
Line 34 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 34 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/buf.h> |
#include <sys/buf.h> |
#include <sys/bufq.h> |
#include <sys/bufq.h> |
#include <sys/bufq_impl.h> |
#include <sys/bufq_impl.h> |
#include <sys/malloc.h> |
#include <sys/kmem.h> |
|
#include <sys/rbtree.h> |
|
#include <sys/module.h> |
|
|
|
#undef PRIOCSCAN_USE_GLOBAL_POSITION |
|
|
/* |
/* |
* Cyclical scan (CSCAN) |
* Cyclical scan (CSCAN) |
*/ |
*/ |
TAILQ_HEAD(bqhead, buf); |
|
|
struct cscan_key { |
|
daddr_t k_rawblkno; |
|
int k_cylinder; |
|
}; |
|
|
struct cscan_queue { |
struct cscan_queue { |
struct bqhead cq_head[2]; /* actual lists of buffers */ |
rb_tree_t cq_buffers; /* ordered list of buffers */ |
int cq_idx; /* current list index */ |
#if !defined(PRIOCSCAN_USE_GLOBAL_POSITION) |
int cq_lastcylinder; /* b_cylinder of the last request */ |
struct cscan_key cq_lastkey; /* key of last request */ |
daddr_t cq_lastrawblkno; /* b_rawblkno of the last request */ |
#endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ |
|
int cq_sortby; /* BUFQ_SORT_MASK */ |
|
rb_tree_ops_t cq_ops; |
}; |
}; |
|
|
static inline int cscan_empty(const struct cscan_queue *); |
static signed int |
static void cscan_put(struct cscan_queue *, struct buf *, int); |
buf_cmp(const struct buf *b1, const struct buf *b2, int sortby) |
static struct buf *cscan_get(struct cscan_queue *, int); |
{ |
static void cscan_init(struct cscan_queue *); |
|
|
|
static inline int |
if (buf_inorder(b2, b1, sortby)) { |
cscan_empty(const struct cscan_queue *q) |
return 1; /* b1 > b2 */ |
|
} |
|
if (buf_inorder(b1, b2, sortby)) { |
|
return -1; /* b1 < b2 */ |
|
} |
|
return 0; |
|
} |
|
|
|
/* return positive if n1 > n2 */ |
|
static signed int |
|
cscan_tree_compare_nodes(void *context, const void *n1, const void *n2) |
{ |
{ |
|
const struct cscan_queue * const q = context; |
|
const struct buf * const b1 = n1; |
|
const struct buf * const b2 = n2; |
|
const int sortby = q->cq_sortby; |
|
const int diff = buf_cmp(b1, b2, sortby); |
|
|
|
/* |
|
* XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o |
|
*/ |
|
|
return TAILQ_EMPTY(&q->cq_head[0]) && TAILQ_EMPTY(&q->cq_head[1]); |
if (diff != 0) { |
|
return diff; |
|
} |
|
|
|
/* |
|
* XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o |
|
*/ |
|
if (b1 > b2) { |
|
return 1; |
|
} |
|
if (b1 < b2) { |
|
return -1; |
|
} |
|
return 0; |
} |
} |
|
|
static void |
/* return positive if n1 > k2 */ |
cscan_put(struct cscan_queue *q, struct buf *bp, int sortby) |
static signed int |
|
cscan_tree_compare_key(void *context, const void *n1, const void *k2) |
{ |
{ |
struct buf tmp; |
const struct cscan_queue * const q = context; |
struct buf *it; |
const struct buf * const b1 = n1; |
struct bqhead *bqh; |
const struct cscan_key * const key = k2; |
int idx; |
const struct buf tmp = { |
|
.b_rawblkno = key->k_rawblkno, |
|
.b_cylinder = key->k_cylinder, |
|
}; |
|
const struct buf *b2 = &tmp; |
|
const int sortby = q->cq_sortby; |
|
|
|
return buf_cmp(b1, b2, sortby); |
|
} |
|
|
|
static void __unused |
|
cscan_dump(struct cscan_queue *cq) |
|
{ |
|
const int sortby = cq->cq_sortby; |
|
struct buf *bp; |
|
|
tmp.b_cylinder = q->cq_lastcylinder; |
RB_TREE_FOREACH(bp, &cq->cq_buffers) { |
tmp.b_rawblkno = q->cq_lastrawblkno; |
if (sortby == BUFQ_SORT_RAWBLOCK) { |
|
printf(" %jd", (intmax_t)bp->b_rawblkno); |
|
} else { |
|
printf(" %jd/%jd", |
|
(intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno); |
|
} |
|
} |
|
} |
|
|
if (buf_inorder(bp, &tmp, sortby)) |
static inline bool |
idx = 1 - q->cq_idx; |
cscan_empty(struct cscan_queue *q) |
else |
{ |
idx = q->cq_idx; |
|
|
|
bqh = &q->cq_head[idx]; |
/* XXX this might do more work than necessary */ |
|
return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL; |
|
} |
|
|
TAILQ_FOREACH(it, bqh, b_actq) |
static void |
if (buf_inorder(bp, it, sortby)) |
cscan_put(struct cscan_queue *q, struct buf *bp) |
break; |
{ |
|
struct buf *obp __diagused; |
|
|
if (it != NULL) |
obp = rb_tree_insert_node(&q->cq_buffers, bp); |
TAILQ_INSERT_BEFORE(it, bp, b_actq); |
KASSERT(obp == bp); /* see cscan_tree_compare_nodes */ |
else |
|
TAILQ_INSERT_TAIL(bqh, bp, b_actq); |
|
} |
} |
|
|
static struct buf * |
static struct buf * |
cscan_get(struct cscan_queue *q, int remove) |
cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key) |
{ |
{ |
int idx = q->cq_idx; |
|
struct bqhead *bqh; |
|
struct buf *bp; |
struct buf *bp; |
|
|
bqh = &q->cq_head[idx]; |
bp = rb_tree_find_node_geq(&q->cq_buffers, key); |
bp = TAILQ_FIRST(bqh); |
KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0); |
|
|
if (bp == NULL) { |
if (bp == NULL) { |
/* switch queue */ |
bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT); |
idx = 1 - idx; |
KDASSERT(cscan_tree_compare_key(q, bp, key) < 0); |
bqh = &q->cq_head[idx]; |
|
bp = TAILQ_FIRST(bqh); |
|
} |
} |
|
|
KDASSERT((bp != NULL && !cscan_empty(q)) || |
|
(bp == NULL && cscan_empty(q))); |
|
|
|
if (bp != NULL && remove) { |
if (bp != NULL && remove) { |
q->cq_idx = idx; |
#if defined(DEBUG) |
TAILQ_REMOVE(bqh, bp, b_actq); |
struct buf *nbp; |
|
#endif /* defined(DEBUG) */ |
|
|
q->cq_lastcylinder = bp->b_cylinder; |
rb_tree_remove_node(&q->cq_buffers, bp); |
q->cq_lastrawblkno = |
/* |
bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT); |
* remember the head position. |
|
*/ |
|
key->k_cylinder = bp->b_cylinder; |
|
key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT); |
|
#if defined(DEBUG) |
|
nbp = rb_tree_find_node_geq(&q->cq_buffers, key); |
|
if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) { |
|
panic("%s: wrong order %p < %p\n", __func__, |
|
nbp, bp); |
|
} |
|
#endif /* defined(DEBUG) */ |
} |
} |
|
return bp; |
return (bp); |
|
} |
} |
|
|
static void |
static void |
cscan_init(struct cscan_queue *q) |
cscan_init(struct cscan_queue *q, int sortby) |
{ |
{ |
|
static const rb_tree_ops_t cscan_tree_ops = { |
|
.rbto_compare_nodes = cscan_tree_compare_nodes, |
|
.rbto_compare_key = cscan_tree_compare_key, |
|
.rbto_node_offset = offsetof(struct buf, b_u.u_rbnode), |
|
.rbto_context = NULL, |
|
}; |
|
|
TAILQ_INIT(&q->cq_head[0]); |
q->cq_sortby = sortby; |
TAILQ_INIT(&q->cq_head[1]); |
/* XXX copy ops to workaround rbtree.h API limitation */ |
|
q->cq_ops = cscan_tree_ops; |
|
q->cq_ops.rbto_context = q; |
|
rb_tree_init(&q->cq_buffers, &q->cq_ops); |
} |
} |
|
|
|
|
/* |
/* |
* Per-prioritiy CSCAN. |
* Per-prioritiy CSCAN. |
* |
* |
Line 138 cscan_init(struct cscan_queue *q) |
|
Line 211 cscan_init(struct cscan_queue *q) |
|
|
|
struct priocscan_queue { |
struct priocscan_queue { |
struct cscan_queue q_queue; |
struct cscan_queue q_queue; |
int q_burst; |
unsigned int q_burst; |
}; |
}; |
|
|
struct bufq_priocscan { |
struct bufq_priocscan { |
struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE]; |
struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE]; |
|
|
#if 0 |
#if defined(PRIOCSCAN_USE_GLOBAL_POSITION) |
/* |
/* |
* XXX using "global" head position can reduce positioning time |
* XXX using "global" head position can reduce positioning time |
* when switching between queues. |
* when switching between queues. |
* although it might affect against fairness. |
* although it might affect against fairness. |
*/ |
*/ |
daddr_t bq_lastrawblkno; |
struct cscan_key bq_lastkey; |
int bq_lastcylinder; |
|
#endif |
#endif |
}; |
}; |
|
|
Line 159 struct bufq_priocscan { |
|
Line 231 struct bufq_priocscan { |
|
* how many requests to serve when having pending requests on other queues. |
* how many requests to serve when having pending requests on other queues. |
* |
* |
* XXX tune |
* XXX tune |
|
* be careful: while making these values larger likely |
|
* increases the total throughput, it can also increase latencies |
|
* for some workloads. |
*/ |
*/ |
const int priocscan_burst[] = { |
const int priocscan_burst[] = { |
64, 16, 4 |
64, 16, 4 |
Line 188 bufq_priocscan_selectqueue(struct bufq_p |
|
Line 263 bufq_priocscan_selectqueue(struct bufq_p |
|
static void |
static void |
bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp) |
bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp) |
{ |
{ |
struct bufq_priocscan *q = bufq->bq_private; |
struct bufq_priocscan *q = bufq_private(bufq); |
struct cscan_queue *cq; |
struct cscan_queue *cq; |
const int sortby = bufq->bq_flags & BUFQ_SORT_MASK; |
|
|
|
cq = bufq_priocscan_selectqueue(q, bp); |
cq = bufq_priocscan_selectqueue(q, bp); |
cscan_put(cq, bp, sortby); |
cscan_put(cq, bp); |
} |
} |
|
|
static struct buf * |
static struct buf * |
bufq_priocscan_get(struct bufq_state *bufq, int remove) |
bufq_priocscan_get(struct bufq_state *bufq, int remove) |
{ |
{ |
struct bufq_priocscan *q = bufq->bq_private; |
struct bufq_priocscan *q = bufq_private(bufq); |
struct priocscan_queue *pq, *npq; |
struct priocscan_queue *pq, *npq; |
struct priocscan_queue *first; /* first non-empty queue */ |
struct priocscan_queue *first; /* highest priority non-empty queue */ |
const struct priocscan_queue *epq; |
const struct priocscan_queue *epq; |
const struct cscan_queue *cq; |
|
struct buf *bp; |
struct buf *bp; |
boolean_t single; /* true if there's only one non-empty queue */ |
bool single; /* true if there's only one non-empty queue */ |
|
|
|
/* |
|
* find the highest priority non-empty queue. |
|
*/ |
pq = &q->bq_queue[0]; |
pq = &q->bq_queue[0]; |
epq = pq + PRIOCSCAN_NQUEUE; |
epq = pq + PRIOCSCAN_NQUEUE; |
for (; pq < epq; pq++) { |
for (; pq < epq; pq++) { |
cq = &pq->q_queue; |
if (!cscan_empty(&pq->q_queue)) { |
if (!cscan_empty(cq)) |
|
break; |
break; |
|
} |
} |
} |
if (pq == epq) { |
if (pq == epq) { |
/* there's no requests */ |
/* |
|
* all our queues are empty. there's nothing to serve. |
|
*/ |
return NULL; |
return NULL; |
} |
} |
|
|
first = pq; |
first = pq; |
single = TRUE; |
|
for (npq = first + 1; npq < epq; npq++) { |
/* |
cq = &npq->q_queue; |
* scan the rest of queues. |
if (!cscan_empty(cq)) { |
* |
single = FALSE; |
* if we have two or more non-empty queues, we serve the highest |
if (pq->q_burst > 0) |
* priority one with non-zero burst count. |
|
*/ |
|
single = true; |
|
for (npq = pq + 1; npq < epq; npq++) { |
|
if (!cscan_empty(&npq->q_queue)) { |
|
/* |
|
* we found another non-empty queue. |
|
* it means that a queue needs to consume its burst |
|
* count to be served. |
|
*/ |
|
single = false; |
|
|
|
/* |
|
* check if our current candidate queue has already |
|
* exhausted its burst count. |
|
*/ |
|
if (pq->q_burst > 0) { |
break; |
break; |
|
} |
pq = npq; |
pq = npq; |
} |
} |
} |
} |
if (single) { |
if (single) { |
/* |
/* |
* there's only a non-empty queue. just serve it. |
* there's only a non-empty queue. |
*/ |
* just serve it without consuming its burst count. |
pq = first; |
|
} else if (pq->q_burst > 0) { |
|
/* |
|
* XXX account only by number of requests. is it good enough? |
|
*/ |
*/ |
if (remove) { |
KASSERT(pq == first); |
pq->q_burst--; |
|
} |
|
} else { |
} else { |
/* |
/* |
* no queue was selected due to burst counts |
* there are two or more non-empty queues. |
*/ |
*/ |
int i; |
if (pq->q_burst == 0) { |
|
/* |
|
* no queues can be served because they have already |
|
* exhausted their burst count. |
|
*/ |
|
unsigned int i; |
#ifdef DEBUG |
#ifdef DEBUG |
for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { |
|
pq = &q->bq_queue[i]; |
|
cq = &pq->q_queue; |
|
if (!cscan_empty(cq) && pq->q_burst) |
|
panic("%s: inconsist", __func__); |
|
} |
|
#endif /* DEBUG */ |
|
|
|
/* |
|
* reset burst counts |
|
*/ |
|
if (remove) { |
|
for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { |
for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { |
pq = &q->bq_queue[i]; |
pq = &q->bq_queue[i]; |
pq->q_burst = priocscan_burst[i]; |
if (!cscan_empty(&pq->q_queue) && pq->q_burst) { |
|
panic("%s: inconsist", __func__); |
|
} |
|
} |
|
#endif /* DEBUG */ |
|
/* |
|
* reset burst counts. |
|
*/ |
|
if (remove) { |
|
for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { |
|
pq = &q->bq_queue[i]; |
|
pq->q_burst = priocscan_burst[i]; |
|
} |
} |
} |
} |
|
|
|
|
/* |
|
* serve the highest priority non-empty queue. |
|
*/ |
|
pq = first; |
|
} |
/* |
/* |
* serve first non-empty queue. |
* consume the burst count. |
|
* |
|
* XXX account only by number of requests. is it good enough? |
*/ |
*/ |
pq = first; |
if (remove) { |
|
KASSERT(pq->q_burst > 0); |
|
pq->q_burst--; |
|
} |
} |
} |
|
|
|
/* |
|
* finally, get a request from the selected queue. |
|
*/ |
KDASSERT(!cscan_empty(&pq->q_queue)); |
KDASSERT(!cscan_empty(&pq->q_queue)); |
bp = cscan_get(&pq->q_queue, remove); |
bp = cscan_get(&pq->q_queue, remove, |
|
#if defined(PRIOCSCAN_USE_GLOBAL_POSITION) |
|
&q->bq_lastkey |
|
#else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ |
|
&pq->q_queue.cq_lastkey |
|
#endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ |
|
); |
KDASSERT(bp != NULL); |
KDASSERT(bp != NULL); |
KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp)); |
KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp)); |
|
|
return bp; |
return bp; |
} |
} |
|
|
|
static struct buf * |
|
bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp) |
|
{ |
|
struct bufq_priocscan * const q = bufq_private(bufq); |
|
unsigned int i; |
|
|
|
for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { |
|
struct cscan_queue * const cq = &q->bq_queue[i].q_queue; |
|
struct buf *it; |
|
|
|
/* |
|
* XXX probably could be faster but the cancel functionality |
|
* is not widely used anyway. |
|
*/ |
|
RB_TREE_FOREACH(it, &cq->cq_buffers) { |
|
if (it == bp) { |
|
rb_tree_remove_node(&cq->cq_buffers, bp); |
|
return bp; |
|
} |
|
} |
|
} |
|
return NULL; |
|
} |
|
|
|
static void |
|
bufq_priocscan_fini(struct bufq_state *bufq) |
|
{ |
|
|
|
KASSERT(bufq->bq_private != NULL); |
|
kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan)); |
|
} |
|
|
static void |
static void |
bufq_priocscan_init(struct bufq_state *bufq) |
bufq_priocscan_init(struct bufq_state *bufq) |
{ |
{ |
struct bufq_priocscan *q; |
struct bufq_priocscan *q; |
int i; |
const int sortby = bufq->bq_flags & BUFQ_SORT_MASK; |
|
unsigned int i; |
|
|
bufq->bq_get = bufq_priocscan_get; |
bufq->bq_get = bufq_priocscan_get; |
bufq->bq_put = bufq_priocscan_put; |
bufq->bq_put = bufq_priocscan_put; |
bufq->bq_private = malloc(sizeof(struct bufq_priocscan), |
bufq->bq_cancel = bufq_priocscan_cancel; |
M_DEVBUF, M_ZERO); |
bufq->bq_fini = bufq_priocscan_fini; |
|
bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP); |
|
|
q = bufq->bq_private; |
q = bufq->bq_private; |
for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { |
for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { |
struct cscan_queue *cq = &q->bq_queue[i].q_queue; |
struct cscan_queue *cq = &q->bq_queue[i].q_queue; |
|
|
cscan_init(cq); |
cscan_init(cq, sortby); |
|
} |
|
} |
|
|
|
MODULE(MODULE_CLASS_BUFQ, bufq_priocscan, NULL); |
|
|
|
static int |
|
bufq_priocscan_modcmd(modcmd_t cmd, void *opaque) |
|
{ |
|
|
|
switch (cmd) { |
|
case MODULE_CMD_INIT: |
|
return bufq_register(&bufq_strat_priocscan); |
|
case MODULE_CMD_FINI: |
|
return bufq_unregister(&bufq_strat_priocscan); |
|
default: |
|
return ENOTTY; |
} |
} |
} |
} |