blob: 3b1f89b6e7435576315c1b75e7754e3658080200 [file] [log] [blame]
Rusty Russell0a8a69d2007-10-22 11:03:40 +10001/* Virtio ring implementation.
2 *
3 * Copyright 2007 Rusty Russell IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19#include <linux/virtio.h>
20#include <linux/virtio_ring.h>
Rusty Russelle34f8722008-07-25 12:06:13 -050021#include <linux/virtio_config.h>
Rusty Russell0a8a69d2007-10-22 11:03:40 +100022#include <linux/device.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090023#include <linux/slab.h>
Paul Gortmakerb5a2c4f2011-07-03 16:20:30 -040024#include <linux/module.h>
Rusty Russelle93300b2012-01-12 15:44:43 +103025#include <linux/hrtimer.h>
Joel Stanley6abb2dd2014-02-13 15:03:46 +103026#include <linux/kmemleak.h>
Rusty Russell0a8a69d2007-10-22 11:03:40 +100027
28#ifdef DEBUG
29/* For development, we want to crash whenever the ring is screwed. */
Rusty Russell9499f5e2009-06-12 22:16:35 -060030#define BAD_RING(_vq, fmt, args...) \
31 do { \
32 dev_err(&(_vq)->vq.vdev->dev, \
33 "%s:"fmt, (_vq)->vq.name, ##args); \
34 BUG(); \
35 } while (0)
Rusty Russellc5f841f2009-03-30 21:55:22 -060036/* Caller is supposed to guarantee no reentry. */
37#define START_USE(_vq) \
38 do { \
39 if ((_vq)->in_use) \
Rusty Russell9499f5e2009-06-12 22:16:35 -060040 panic("%s:in_use = %i\n", \
41 (_vq)->vq.name, (_vq)->in_use); \
Rusty Russellc5f841f2009-03-30 21:55:22 -060042 (_vq)->in_use = __LINE__; \
Rusty Russell9499f5e2009-06-12 22:16:35 -060043 } while (0)
Roel Kluin3a35ce72009-01-22 16:42:57 +010044#define END_USE(_vq) \
Rusty Russell97a545a2010-02-24 14:22:22 -060045 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
Rusty Russell0a8a69d2007-10-22 11:03:40 +100046#else
Rusty Russell9499f5e2009-06-12 22:16:35 -060047#define BAD_RING(_vq, fmt, args...) \
48 do { \
49 dev_err(&_vq->vq.vdev->dev, \
50 "%s:"fmt, (_vq)->vq.name, ##args); \
51 (_vq)->broken = true; \
52 } while (0)
Rusty Russell0a8a69d2007-10-22 11:03:40 +100053#define START_USE(vq)
54#define END_USE(vq)
55#endif
56
57struct vring_virtqueue
58{
59 struct virtqueue vq;
60
61 /* Actual memory layout for this queue */
62 struct vring vring;
63
Rusty Russell7b21e342012-01-12 15:44:42 +103064 /* Can we use weak barriers? */
65 bool weak_barriers;
66
Rusty Russell0a8a69d2007-10-22 11:03:40 +100067 /* Other side has made a mess, don't try any more. */
68 bool broken;
69
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +010070 /* Host supports indirect buffers */
71 bool indirect;
72
Michael S. Tsirkina5c262c2011-05-20 02:10:44 +030073 /* Host publishes avail event idx */
74 bool event;
75
Rusty Russell0a8a69d2007-10-22 11:03:40 +100076 /* Head of free buffer list. */
77 unsigned int free_head;
78 /* Number we've added since last sync. */
79 unsigned int num_added;
80
81 /* Last used index we've seen. */
Anthony Liguori1bc49532007-11-07 15:49:24 -060082 u16 last_used_idx;
Rusty Russell0a8a69d2007-10-22 11:03:40 +100083
84 /* How to notify other side. FIXME: commonalize hcalls! */
Heinz Graalfs46f9c2b2013-10-29 09:38:50 +103085 bool (*notify)(struct virtqueue *vq);
Rusty Russell0a8a69d2007-10-22 11:03:40 +100086
87#ifdef DEBUG
88 /* They're supposed to lock for us. */
89 unsigned int in_use;
Rusty Russelle93300b2012-01-12 15:44:43 +103090
91 /* Figure out if their kicks are too delayed. */
92 bool last_add_time_valid;
93 ktime_t last_add_time;
Rusty Russell0a8a69d2007-10-22 11:03:40 +100094#endif
95
96 /* Tokens for callbacks. */
97 void *data[];
98};
99
100#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
101
Rusty Russellb25bd252014-09-11 10:17:38 +0930102static struct vring_desc *alloc_indirect(unsigned int total_sg, gfp_t gfp)
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100103{
104 struct vring_desc *desc;
Rusty Russellb25bd252014-09-11 10:17:38 +0930105 unsigned int i;
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100106
Will Deaconb92b1b82012-10-19 14:03:33 +0100107 /*
108 * We require lowmem mappings for the descriptors because
109 * otherwise virt_to_phys will give us bogus addresses in the
110 * virtqueue.
111 */
112 gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
113
Rusty Russell13816c72013-03-20 15:37:09 +1030114 desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100115 if (!desc)
Rusty Russellb25bd252014-09-11 10:17:38 +0930116 return NULL;
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100117
Rusty Russellb25bd252014-09-11 10:17:38 +0930118 for (i = 0; i < total_sg; i++)
119 desc[i].next = i+1;
120 return desc;
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100121}
122
Rusty Russell13816c72013-03-20 15:37:09 +1030123static inline int virtqueue_add(struct virtqueue *_vq,
124 struct scatterlist *sgs[],
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930125 unsigned int total_sg,
Rusty Russell13816c72013-03-20 15:37:09 +1030126 unsigned int out_sgs,
127 unsigned int in_sgs,
128 void *data,
129 gfp_t gfp)
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000130{
131 struct vring_virtqueue *vq = to_vvq(_vq);
Rusty Russell13816c72013-03-20 15:37:09 +1030132 struct scatterlist *sg;
Rusty Russellb25bd252014-09-11 10:17:38 +0930133 struct vring_desc *desc;
134 unsigned int i, n, avail, descs_used, uninitialized_var(prev);
Michael S. Tsirkin1fe9b6f2010-07-26 16:55:30 +0930135 int head;
Rusty Russellb25bd252014-09-11 10:17:38 +0930136 bool indirect;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000137
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100138 START_USE(vq);
139
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000140 BUG_ON(data == NULL);
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100141
Rusty Russell70670442014-03-13 11:23:40 +1030142 if (unlikely(vq->broken)) {
143 END_USE(vq);
144 return -EIO;
145 }
146
Rusty Russelle93300b2012-01-12 15:44:43 +1030147#ifdef DEBUG
148 {
149 ktime_t now = ktime_get();
150
151 /* No kick or get, with .1 second between? Warn. */
152 if (vq->last_add_time_valid)
153 WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
154 > 100);
155 vq->last_add_time = now;
156 vq->last_add_time_valid = true;
157 }
158#endif
159
Rusty Russell13816c72013-03-20 15:37:09 +1030160 BUG_ON(total_sg > vq->vring.num);
161 BUG_ON(total_sg == 0);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000162
Rusty Russellb25bd252014-09-11 10:17:38 +0930163 head = vq->free_head;
164
165 /* If the host supports indirect descriptor tables, and we have multiple
166 * buffers, then go indirect. FIXME: tune this threshold */
167 if (vq->indirect && total_sg > 1 && vq->vq.num_free)
168 desc = alloc_indirect(total_sg, gfp);
169 else
170 desc = NULL;
171
172 if (desc) {
173 /* Use a single buffer which doesn't continue */
174 vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
175 vq->vring.desc[head].addr = virt_to_phys(desc);
176 /* avoid kmemleak false positive (hidden by virt_to_phys) */
177 kmemleak_ignore(desc);
178 vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc);
179
180 /* Set up rest to use this indirect table. */
181 i = 0;
182 descs_used = 1;
183 indirect = true;
184 } else {
185 desc = vq->vring.desc;
186 i = head;
187 descs_used = total_sg;
188 indirect = false;
189 }
190
191 if (vq->vq.num_free < descs_used) {
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000192 pr_debug("Can't add buf len %i - avail = %i\n",
Rusty Russellb25bd252014-09-11 10:17:38 +0930193 descs_used, vq->vq.num_free);
Rusty Russell44653ea2008-07-25 12:06:04 -0500194 /* FIXME: for historical reasons, we force a notify here if
195 * there are outgoing parts to the buffer. Presumably the
196 * host should service the ring ASAP. */
Rusty Russell13816c72013-03-20 15:37:09 +1030197 if (out_sgs)
Rusty Russell44653ea2008-07-25 12:06:04 -0500198 vq->notify(&vq->vq);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000199 END_USE(vq);
200 return -ENOSPC;
201 }
202
203 /* We're about to use some buffers from the free list. */
Rusty Russellb25bd252014-09-11 10:17:38 +0930204 vq->vq.num_free -= descs_used;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000205
Rusty Russell13816c72013-03-20 15:37:09 +1030206 for (n = 0; n < out_sgs; n++) {
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930207 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
Rusty Russellb25bd252014-09-11 10:17:38 +0930208 desc[i].flags = VRING_DESC_F_NEXT;
209 desc[i].addr = sg_phys(sg);
210 desc[i].len = sg->length;
Rusty Russell13816c72013-03-20 15:37:09 +1030211 prev = i;
Rusty Russellb25bd252014-09-11 10:17:38 +0930212 i = desc[i].next;
Rusty Russell13816c72013-03-20 15:37:09 +1030213 }
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000214 }
Rusty Russell13816c72013-03-20 15:37:09 +1030215 for (; n < (out_sgs + in_sgs); n++) {
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930216 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
Rusty Russellb25bd252014-09-11 10:17:38 +0930217 desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
218 desc[i].addr = sg_phys(sg);
219 desc[i].len = sg->length;
Rusty Russell13816c72013-03-20 15:37:09 +1030220 prev = i;
Rusty Russellb25bd252014-09-11 10:17:38 +0930221 i = desc[i].next;
Rusty Russell13816c72013-03-20 15:37:09 +1030222 }
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000223 }
224 /* Last one doesn't continue. */
Rusty Russellb25bd252014-09-11 10:17:38 +0930225 desc[prev].flags &= ~VRING_DESC_F_NEXT;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000226
227 /* Update free pointer */
Rusty Russellb25bd252014-09-11 10:17:38 +0930228 if (indirect)
229 vq->free_head = vq->vring.desc[head].next;
230 else
231 vq->free_head = i;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000232
233 /* Set token. */
234 vq->data[head] = data;
235
236 /* Put entry in available array (but don't update avail->idx until they
Rusty Russell3b720b82012-01-12 15:44:43 +1030237 * do sync). */
Rusty Russellee7cd892012-01-12 15:44:43 +1030238 avail = (vq->vring.avail->idx & (vq->vring.num-1));
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000239 vq->vring.avail->ring[avail] = head;
240
Rusty Russellee7cd892012-01-12 15:44:43 +1030241 /* Descriptors and available array need to be set before we expose the
242 * new available array entries. */
Rusty Russella9a0fef2013-03-18 13:22:19 +1030243 virtio_wmb(vq->weak_barriers);
Rusty Russellee7cd892012-01-12 15:44:43 +1030244 vq->vring.avail->idx++;
245 vq->num_added++;
246
247 /* This is very unlikely, but theoretically possible. Kick
248 * just in case. */
249 if (unlikely(vq->num_added == (1 << 16) - 1))
250 virtqueue_kick(_vq);
251
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000252 pr_debug("Added buffer head %i to %p\n", head, vq);
253 END_USE(vq);
Rusty Russell3c1b27d2009-09-23 22:26:31 -0600254
Rusty Russell98e8c6b2012-10-16 23:56:15 +1030255 return 0;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000256}
Rusty Russell13816c72013-03-20 15:37:09 +1030257
258/**
Rusty Russell13816c72013-03-20 15:37:09 +1030259 * virtqueue_add_sgs - expose buffers to other end
260 * @vq: the struct virtqueue we're talking about.
261 * @sgs: array of terminated scatterlists.
262 * @out_num: the number of scatterlists readable by other side
263 * @in_num: the number of scatterlists which are writable (after readable ones)
264 * @data: the token identifying the buffer.
265 * @gfp: how to do memory allocations (if necessary).
266 *
267 * Caller must ensure we don't call this with other virtqueue operations
268 * at the same time (except where noted).
269 *
Rusty Russell70670442014-03-13 11:23:40 +1030270 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
Rusty Russell13816c72013-03-20 15:37:09 +1030271 */
272int virtqueue_add_sgs(struct virtqueue *_vq,
273 struct scatterlist *sgs[],
274 unsigned int out_sgs,
275 unsigned int in_sgs,
276 void *data,
277 gfp_t gfp)
278{
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930279 unsigned int i, total_sg = 0;
Rusty Russell13816c72013-03-20 15:37:09 +1030280
281 /* Count them first. */
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930282 for (i = 0; i < out_sgs + in_sgs; i++) {
Rusty Russell13816c72013-03-20 15:37:09 +1030283 struct scatterlist *sg;
284 for (sg = sgs[i]; sg; sg = sg_next(sg))
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930285 total_sg++;
Rusty Russell13816c72013-03-20 15:37:09 +1030286 }
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930287 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
Rusty Russell13816c72013-03-20 15:37:09 +1030288}
289EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
290
291/**
Rusty Russell282edb32013-03-20 15:44:26 +1030292 * virtqueue_add_outbuf - expose output buffers to other end
293 * @vq: the struct virtqueue we're talking about.
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930294 * @sg: scatterlist (must be well-formed and terminated!)
295 * @num: the number of entries in @sg readable by other side
Rusty Russell282edb32013-03-20 15:44:26 +1030296 * @data: the token identifying the buffer.
297 * @gfp: how to do memory allocations (if necessary).
298 *
299 * Caller must ensure we don't call this with other virtqueue operations
300 * at the same time (except where noted).
301 *
Rusty Russell70670442014-03-13 11:23:40 +1030302 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
Rusty Russell282edb32013-03-20 15:44:26 +1030303 */
304int virtqueue_add_outbuf(struct virtqueue *vq,
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930305 struct scatterlist *sg, unsigned int num,
Rusty Russell282edb32013-03-20 15:44:26 +1030306 void *data,
307 gfp_t gfp)
308{
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930309 return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
Rusty Russell282edb32013-03-20 15:44:26 +1030310}
311EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
312
313/**
314 * virtqueue_add_inbuf - expose input buffers to other end
315 * @vq: the struct virtqueue we're talking about.
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930316 * @sg: scatterlist (must be well-formed and terminated!)
317 * @num: the number of entries in @sg writable by other side
Rusty Russell282edb32013-03-20 15:44:26 +1030318 * @data: the token identifying the buffer.
319 * @gfp: how to do memory allocations (if necessary).
320 *
321 * Caller must ensure we don't call this with other virtqueue operations
322 * at the same time (except where noted).
323 *
Rusty Russell70670442014-03-13 11:23:40 +1030324 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
Rusty Russell282edb32013-03-20 15:44:26 +1030325 */
326int virtqueue_add_inbuf(struct virtqueue *vq,
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930327 struct scatterlist *sg, unsigned int num,
Rusty Russell282edb32013-03-20 15:44:26 +1030328 void *data,
329 gfp_t gfp)
330{
Rusty Russelleeebf9b2014-09-11 10:17:37 +0930331 return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
Rusty Russell282edb32013-03-20 15:44:26 +1030332}
333EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
334
335/**
Rusty Russell41f03772012-01-12 15:44:43 +1030336 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
Rusty Russell5dfc1762012-01-12 15:44:42 +1030337 * @vq: the struct virtqueue
338 *
Rusty Russell41f03772012-01-12 15:44:43 +1030339 * Instead of virtqueue_kick(), you can do:
340 * if (virtqueue_kick_prepare(vq))
341 * virtqueue_notify(vq);
Rusty Russell5dfc1762012-01-12 15:44:42 +1030342 *
Rusty Russell41f03772012-01-12 15:44:43 +1030343 * This is sometimes useful because the virtqueue_kick_prepare() needs
344 * to be serialized, but the actual virtqueue_notify() call does not.
Rusty Russell5dfc1762012-01-12 15:44:42 +1030345 */
Rusty Russell41f03772012-01-12 15:44:43 +1030346bool virtqueue_kick_prepare(struct virtqueue *_vq)
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000347{
348 struct vring_virtqueue *vq = to_vvq(_vq);
Michael S. Tsirkina5c262c2011-05-20 02:10:44 +0300349 u16 new, old;
Rusty Russell41f03772012-01-12 15:44:43 +1030350 bool needs_kick;
351
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000352 START_USE(vq);
Jason Wanga72caae2012-01-20 16:17:08 +0800353 /* We need to expose available array entries before checking avail
354 * event. */
Rusty Russella9a0fef2013-03-18 13:22:19 +1030355 virtio_mb(vq->weak_barriers);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000356
Rusty Russellee7cd892012-01-12 15:44:43 +1030357 old = vq->vring.avail->idx - vq->num_added;
358 new = vq->vring.avail->idx;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000359 vq->num_added = 0;
360
Rusty Russelle93300b2012-01-12 15:44:43 +1030361#ifdef DEBUG
362 if (vq->last_add_time_valid) {
363 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
364 vq->last_add_time)) > 100);
365 }
366 vq->last_add_time_valid = false;
367#endif
368
Rusty Russell41f03772012-01-12 15:44:43 +1030369 if (vq->event) {
370 needs_kick = vring_need_event(vring_avail_event(&vq->vring),
371 new, old);
372 } else {
373 needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
374 }
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000375 END_USE(vq);
Rusty Russell41f03772012-01-12 15:44:43 +1030376 return needs_kick;
377}
378EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
379
380/**
381 * virtqueue_notify - second half of split virtqueue_kick call.
382 * @vq: the struct virtqueue
383 *
384 * This does not need to be serialized.
Heinz Graalfs5b1bf7c2013-10-29 09:39:48 +1030385 *
386 * Returns false if host notify failed or queue is broken, otherwise true.
Rusty Russell41f03772012-01-12 15:44:43 +1030387 */
Heinz Graalfs5b1bf7c2013-10-29 09:39:48 +1030388bool virtqueue_notify(struct virtqueue *_vq)
Rusty Russell41f03772012-01-12 15:44:43 +1030389{
390 struct vring_virtqueue *vq = to_vvq(_vq);
391
Heinz Graalfs5b1bf7c2013-10-29 09:39:48 +1030392 if (unlikely(vq->broken))
393 return false;
394
Rusty Russell41f03772012-01-12 15:44:43 +1030395 /* Prod other side to tell it about changes. */
Heinz Graalfs2342d6a2013-11-05 21:20:27 +1030396 if (!vq->notify(_vq)) {
Heinz Graalfs5b1bf7c2013-10-29 09:39:48 +1030397 vq->broken = true;
398 return false;
399 }
400 return true;
Rusty Russell41f03772012-01-12 15:44:43 +1030401}
402EXPORT_SYMBOL_GPL(virtqueue_notify);
403
404/**
405 * virtqueue_kick - update after add_buf
406 * @vq: the struct virtqueue
407 *
Rusty Russellb3087e42013-05-20 12:15:44 +0930408 * After one or more virtqueue_add_* calls, invoke this to kick
Rusty Russell41f03772012-01-12 15:44:43 +1030409 * the other side.
410 *
411 * Caller must ensure we don't call this with other virtqueue
412 * operations at the same time (except where noted).
Heinz Graalfs5b1bf7c2013-10-29 09:39:48 +1030413 *
414 * Returns false if kick failed, otherwise true.
Rusty Russell41f03772012-01-12 15:44:43 +1030415 */
Heinz Graalfs5b1bf7c2013-10-29 09:39:48 +1030416bool virtqueue_kick(struct virtqueue *vq)
Rusty Russell41f03772012-01-12 15:44:43 +1030417{
418 if (virtqueue_kick_prepare(vq))
Heinz Graalfs5b1bf7c2013-10-29 09:39:48 +1030419 return virtqueue_notify(vq);
420 return true;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000421}
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300422EXPORT_SYMBOL_GPL(virtqueue_kick);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000423
424static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
425{
426 unsigned int i;
427
428 /* Clear data ptr. */
429 vq->data[head] = NULL;
430
431 /* Put back on free list: find end */
432 i = head;
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100433
434 /* Free the indirect table */
435 if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
436 kfree(phys_to_virt(vq->vring.desc[i].addr));
437
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000438 while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
439 i = vq->vring.desc[i].next;
Rusty Russell06ca2872012-10-16 23:56:14 +1030440 vq->vq.num_free++;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000441 }
442
443 vq->vring.desc[i].next = vq->free_head;
444 vq->free_head = head;
445 /* Plus final descriptor */
Rusty Russell06ca2872012-10-16 23:56:14 +1030446 vq->vq.num_free++;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000447}
448
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000449static inline bool more_used(const struct vring_virtqueue *vq)
450{
451 return vq->last_used_idx != vq->vring.used->idx;
452}
453
Rusty Russell5dfc1762012-01-12 15:44:42 +1030454/**
455 * virtqueue_get_buf - get the next used buffer
456 * @vq: the struct virtqueue we're talking about.
457 * @len: the length written into the buffer
458 *
459 * If the driver wrote data into the buffer, @len will be set to the
460 * amount written. This means you don't need to clear the buffer
461 * beforehand to ensure there's no data leakage in the case of short
462 * writes.
463 *
464 * Caller must ensure we don't call this with other virtqueue
465 * operations at the same time (except where noted).
466 *
467 * Returns NULL if there are no used buffers, or the "data" token
Rusty Russellb3087e42013-05-20 12:15:44 +0930468 * handed to virtqueue_add_*().
Rusty Russell5dfc1762012-01-12 15:44:42 +1030469 */
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300470void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000471{
472 struct vring_virtqueue *vq = to_vvq(_vq);
473 void *ret;
474 unsigned int i;
Rusty Russell3b720b82012-01-12 15:44:43 +1030475 u16 last_used;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000476
477 START_USE(vq);
478
Rusty Russell5ef82752008-05-02 21:50:43 -0500479 if (unlikely(vq->broken)) {
480 END_USE(vq);
481 return NULL;
482 }
483
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000484 if (!more_used(vq)) {
485 pr_debug("No more buffers in queue\n");
486 END_USE(vq);
487 return NULL;
488 }
489
Michael S. Tsirkin2d61ba92009-10-25 15:28:53 +0200490 /* Only get used array entries after they have been exposed by host. */
Rusty Russella9a0fef2013-03-18 13:22:19 +1030491 virtio_rmb(vq->weak_barriers);
Michael S. Tsirkin2d61ba92009-10-25 15:28:53 +0200492
Rusty Russell3b720b82012-01-12 15:44:43 +1030493 last_used = (vq->last_used_idx & (vq->vring.num - 1));
494 i = vq->vring.used->ring[last_used].id;
495 *len = vq->vring.used->ring[last_used].len;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000496
497 if (unlikely(i >= vq->vring.num)) {
498 BAD_RING(vq, "id %u out of range\n", i);
499 return NULL;
500 }
501 if (unlikely(!vq->data[i])) {
502 BAD_RING(vq, "id %u is not a head!\n", i);
503 return NULL;
504 }
505
506 /* detach_buf clears data, so grab it now. */
507 ret = vq->data[i];
508 detach_buf(vq, i);
509 vq->last_used_idx++;
Michael S. Tsirkina5c262c2011-05-20 02:10:44 +0300510 /* If we expect an interrupt for the next entry, tell host
511 * by writing event index and flush out the write before
512 * the read in the next get_buf call. */
513 if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
514 vring_used_event(&vq->vring) = vq->last_used_idx;
Rusty Russella9a0fef2013-03-18 13:22:19 +1030515 virtio_mb(vq->weak_barriers);
Michael S. Tsirkina5c262c2011-05-20 02:10:44 +0300516 }
517
Rusty Russelle93300b2012-01-12 15:44:43 +1030518#ifdef DEBUG
519 vq->last_add_time_valid = false;
520#endif
521
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000522 END_USE(vq);
523 return ret;
524}
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300525EXPORT_SYMBOL_GPL(virtqueue_get_buf);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000526
Rusty Russell5dfc1762012-01-12 15:44:42 +1030527/**
528 * virtqueue_disable_cb - disable callbacks
529 * @vq: the struct virtqueue we're talking about.
530 *
531 * Note that this is not necessarily synchronous, hence unreliable and only
532 * useful as an optimization.
533 *
534 * Unlike other operations, this need not be serialized.
535 */
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300536void virtqueue_disable_cb(struct virtqueue *_vq)
Rusty Russell18445c42008-02-04 23:49:57 -0500537{
538 struct vring_virtqueue *vq = to_vvq(_vq);
539
Rusty Russell18445c42008-02-04 23:49:57 -0500540 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
Rusty Russell18445c42008-02-04 23:49:57 -0500541}
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300542EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
Rusty Russell18445c42008-02-04 23:49:57 -0500543
Rusty Russell5dfc1762012-01-12 15:44:42 +1030544/**
Michael S. Tsirkincc229882013-07-09 13:19:18 +0300545 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
546 * @vq: the struct virtqueue we're talking about.
547 *
548 * This re-enables callbacks; it returns current queue state
549 * in an opaque unsigned value. This value should be later tested by
550 * virtqueue_poll, to detect a possible race between the driver checking for
551 * more work, and enabling callbacks.
552 *
553 * Caller must ensure we don't call this with other virtqueue
554 * operations at the same time (except where noted).
555 */
556unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
557{
558 struct vring_virtqueue *vq = to_vvq(_vq);
559 u16 last_used_idx;
560
561 START_USE(vq);
562
563 /* We optimistically turn back on interrupts, then check if there was
564 * more to do. */
565 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
566 * either clear the flags bit or point the event index at the next
567 * entry. Always do both to keep code simple. */
568 vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
569 vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx;
570 END_USE(vq);
571 return last_used_idx;
572}
573EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
574
575/**
576 * virtqueue_poll - query pending used buffers
577 * @vq: the struct virtqueue we're talking about.
578 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
579 *
580 * Returns "true" if there are pending used buffers in the queue.
581 *
582 * This does not need to be serialized.
583 */
584bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
585{
586 struct vring_virtqueue *vq = to_vvq(_vq);
587
588 virtio_mb(vq->weak_barriers);
589 return (u16)last_used_idx != vq->vring.used->idx;
590}
591EXPORT_SYMBOL_GPL(virtqueue_poll);
592
593/**
Rusty Russell5dfc1762012-01-12 15:44:42 +1030594 * virtqueue_enable_cb - restart callbacks after disable_cb.
595 * @vq: the struct virtqueue we're talking about.
596 *
597 * This re-enables callbacks; it returns "false" if there are pending
598 * buffers in the queue, to detect a possible race between the driver
599 * checking for more work, and enabling callbacks.
600 *
601 * Caller must ensure we don't call this with other virtqueue
602 * operations at the same time (except where noted).
603 */
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300604bool virtqueue_enable_cb(struct virtqueue *_vq)
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000605{
Michael S. Tsirkincc229882013-07-09 13:19:18 +0300606 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
607 return !virtqueue_poll(_vq, last_used_idx);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000608}
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300609EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000610
Rusty Russell5dfc1762012-01-12 15:44:42 +1030611/**
612 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
613 * @vq: the struct virtqueue we're talking about.
614 *
615 * This re-enables callbacks but hints to the other side to delay
616 * interrupts until most of the available buffers have been processed;
617 * it returns "false" if there are many pending buffers in the queue,
618 * to detect a possible race between the driver checking for more work,
619 * and enabling callbacks.
620 *
621 * Caller must ensure we don't call this with other virtqueue
622 * operations at the same time (except where noted).
623 */
Michael S. Tsirkin7ab358c2011-05-20 02:11:14 +0300624bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
625{
626 struct vring_virtqueue *vq = to_vvq(_vq);
627 u16 bufs;
628
629 START_USE(vq);
630
631 /* We optimistically turn back on interrupts, then check if there was
632 * more to do. */
633 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
634 * either clear the flags bit or point the event index at the next
635 * entry. Always do both to keep code simple. */
636 vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
637 /* TODO: tune this threshold */
638 bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
639 vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
Rusty Russella9a0fef2013-03-18 13:22:19 +1030640 virtio_mb(vq->weak_barriers);
Michael S. Tsirkin7ab358c2011-05-20 02:11:14 +0300641 if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
642 END_USE(vq);
643 return false;
644 }
645
646 END_USE(vq);
647 return true;
648}
649EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
650
Rusty Russell5dfc1762012-01-12 15:44:42 +1030651/**
652 * virtqueue_detach_unused_buf - detach first unused buffer
653 * @vq: the struct virtqueue we're talking about.
654 *
Rusty Russellb3087e42013-05-20 12:15:44 +0930655 * Returns NULL or the "data" token handed to virtqueue_add_*().
Rusty Russell5dfc1762012-01-12 15:44:42 +1030656 * This is not valid on an active queue; it is useful only for device
657 * shutdown.
658 */
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300659void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
Shirley Mac021eac2010-01-18 19:15:23 +0530660{
661 struct vring_virtqueue *vq = to_vvq(_vq);
662 unsigned int i;
663 void *buf;
664
665 START_USE(vq);
666
667 for (i = 0; i < vq->vring.num; i++) {
668 if (!vq->data[i])
669 continue;
670 /* detach_buf clears data, so grab it now. */
671 buf = vq->data[i];
672 detach_buf(vq, i);
Amit Shahb3258ff2011-03-16 19:12:10 +0530673 vq->vring.avail->idx--;
Shirley Mac021eac2010-01-18 19:15:23 +0530674 END_USE(vq);
675 return buf;
676 }
677 /* That should have freed everything. */
Rusty Russell06ca2872012-10-16 23:56:14 +1030678 BUG_ON(vq->vq.num_free != vq->vring.num);
Shirley Mac021eac2010-01-18 19:15:23 +0530679
680 END_USE(vq);
681 return NULL;
682}
Michael S. Tsirkin7c5e9ed2010-04-12 16:19:07 +0300683EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
Shirley Mac021eac2010-01-18 19:15:23 +0530684
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000685irqreturn_t vring_interrupt(int irq, void *_vq)
686{
687 struct vring_virtqueue *vq = to_vvq(_vq);
688
689 if (!more_used(vq)) {
690 pr_debug("virtqueue interrupt with no work for %p\n", vq);
691 return IRQ_NONE;
692 }
693
694 if (unlikely(vq->broken))
695 return IRQ_HANDLED;
696
697 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
Rusty Russell18445c42008-02-04 23:49:57 -0500698 if (vq->vq.callback)
699 vq->vq.callback(&vq->vq);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000700
701 return IRQ_HANDLED;
702}
Rusty Russellc6fd4702008-02-04 23:50:05 -0500703EXPORT_SYMBOL_GPL(vring_interrupt);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000704
Jason Wang17bb6d42012-08-28 13:54:13 +0200705struct virtqueue *vring_new_virtqueue(unsigned int index,
706 unsigned int num,
Rusty Russell87c7d572008-12-30 09:26:03 -0600707 unsigned int vring_align,
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000708 struct virtio_device *vdev,
Rusty Russell7b21e342012-01-12 15:44:42 +1030709 bool weak_barriers,
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000710 void *pages,
Heinz Graalfs46f9c2b2013-10-29 09:38:50 +1030711 bool (*notify)(struct virtqueue *),
Rusty Russell9499f5e2009-06-12 22:16:35 -0600712 void (*callback)(struct virtqueue *),
713 const char *name)
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000714{
715 struct vring_virtqueue *vq;
716 unsigned int i;
717
Rusty Russell42b36cc2007-11-12 13:39:18 +1100718 /* We assume num is a power of 2. */
719 if (num & (num - 1)) {
720 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
721 return NULL;
722 }
723
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000724 vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
725 if (!vq)
726 return NULL;
727
Rusty Russell87c7d572008-12-30 09:26:03 -0600728 vring_init(&vq->vring, num, pages, vring_align);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000729 vq->vq.callback = callback;
730 vq->vq.vdev = vdev;
Rusty Russell9499f5e2009-06-12 22:16:35 -0600731 vq->vq.name = name;
Rusty Russell06ca2872012-10-16 23:56:14 +1030732 vq->vq.num_free = num;
733 vq->vq.index = index;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000734 vq->notify = notify;
Rusty Russell7b21e342012-01-12 15:44:42 +1030735 vq->weak_barriers = weak_barriers;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000736 vq->broken = false;
737 vq->last_used_idx = 0;
738 vq->num_added = 0;
Rusty Russell9499f5e2009-06-12 22:16:35 -0600739 list_add_tail(&vq->vq.list, &vdev->vqs);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000740#ifdef DEBUG
741 vq->in_use = false;
Rusty Russelle93300b2012-01-12 15:44:43 +1030742 vq->last_add_time_valid = false;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000743#endif
744
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100745 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
Michael S. Tsirkina5c262c2011-05-20 02:10:44 +0300746 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100747
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000748 /* No callback? Tell other side not to bother us. */
749 if (!callback)
750 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
751
752 /* Put everything in free lists. */
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000753 vq->free_head = 0;
Amit Shah3b870622010-02-12 10:32:14 +0530754 for (i = 0; i < num-1; i++) {
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000755 vq->vring.desc[i].next = i+1;
Amit Shah3b870622010-02-12 10:32:14 +0530756 vq->data[i] = NULL;
757 }
758 vq->data[i] = NULL;
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000759
760 return &vq->vq;
761}
Rusty Russellc6fd4702008-02-04 23:50:05 -0500762EXPORT_SYMBOL_GPL(vring_new_virtqueue);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000763
764void vring_del_virtqueue(struct virtqueue *vq)
765{
Rusty Russell9499f5e2009-06-12 22:16:35 -0600766 list_del(&vq->list);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000767 kfree(to_vvq(vq));
768}
Rusty Russellc6fd4702008-02-04 23:50:05 -0500769EXPORT_SYMBOL_GPL(vring_del_virtqueue);
Rusty Russell0a8a69d2007-10-22 11:03:40 +1000770
Rusty Russelle34f8722008-07-25 12:06:13 -0500771/* Manipulates transport-specific feature bits. */
772void vring_transport_features(struct virtio_device *vdev)
773{
774 unsigned int i;
775
776 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
777 switch (i) {
Mark McLoughlin9fa29b9d2009-05-11 18:11:45 +0100778 case VIRTIO_RING_F_INDIRECT_DESC:
779 break;
Michael S. Tsirkina5c262c2011-05-20 02:10:44 +0300780 case VIRTIO_RING_F_EVENT_IDX:
781 break;
Rusty Russelle34f8722008-07-25 12:06:13 -0500782 default:
783 /* We don't understand this bit. */
784 clear_bit(i, vdev->features);
785 }
786 }
787}
788EXPORT_SYMBOL_GPL(vring_transport_features);
789
Rusty Russell5dfc1762012-01-12 15:44:42 +1030790/**
791 * virtqueue_get_vring_size - return the size of the virtqueue's vring
792 * @vq: the struct virtqueue containing the vring of interest.
793 *
794 * Returns the size of the vring. This is mainly used for boasting to
795 * userspace. Unlike other operations, this need not be serialized.
796 */
Rick Jones8f9f4662011-10-19 08:10:59 +0000797unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
798{
799
800 struct vring_virtqueue *vq = to_vvq(_vq);
801
802 return vq->vring.num;
803}
804EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
805
Heinz Graalfsb3b32c92013-10-29 09:40:19 +1030806bool virtqueue_is_broken(struct virtqueue *_vq)
807{
808 struct vring_virtqueue *vq = to_vvq(_vq);
809
810 return vq->broken;
811}
812EXPORT_SYMBOL_GPL(virtqueue_is_broken);
813
Rusty Russelle2dcdfe2014-04-28 11:15:08 +0930814/*
815 * This should prevent the device from being used, allowing drivers to
816 * recover. You may need to grab appropriate locks to flush.
817 */
818void virtio_break_device(struct virtio_device *dev)
819{
820 struct virtqueue *_vq;
821
822 list_for_each_entry(_vq, &dev->vqs, list) {
823 struct vring_virtqueue *vq = to_vvq(_vq);
824 vq->broken = true;
825 }
826}
827EXPORT_SYMBOL_GPL(virtio_break_device);
828
Rusty Russellc6fd4702008-02-04 23:50:05 -0500829MODULE_LICENSE("GPL");