Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 Red Hat, Inc. |
| 3 | * Author: Michael S. Tsirkin <mst@redhat.com> |
| 4 | * This work is licensed under the terms of the GNU GPL, version 2. |
| 5 | * |
| 6 | * Simple descriptor-based ring. virtio 0.9 compatible event index is used for |
| 7 | * signalling, unconditionally. |
| 8 | */ |
| 9 | #define _GNU_SOURCE |
| 10 | #include "main.h" |
| 11 | #include <stdlib.h> |
| 12 | #include <stdio.h> |
| 13 | #include <string.h> |
| 14 | |
| 15 | /* Next - Where next entry will be written. |
| 16 | * Prev - "Next" value when event triggered previously. |
| 17 | * Event - Peer requested event after writing this entry. |
| 18 | */ |
| 19 | static inline bool need_event(unsigned short event, |
| 20 | unsigned short next, |
| 21 | unsigned short prev) |
| 22 | { |
| 23 | return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); |
| 24 | } |
| 25 | |
| 26 | /* Design: |
| 27 | * Guest adds descriptors with unique index values and DESC_HW in flags. |
| 28 | * Host overwrites used descriptors with correct len, index, and DESC_HW clear. |
| 29 | * Flags are always set last. |
| 30 | */ |
| 31 | #define DESC_HW 0x1 |
| 32 | |
| 33 | struct desc { |
| 34 | unsigned short flags; |
| 35 | unsigned short index; |
| 36 | unsigned len; |
| 37 | unsigned long long addr; |
| 38 | }; |
| 39 | |
| 40 | /* how much padding is needed to avoid false cache sharing */ |
| 41 | #define HOST_GUEST_PADDING 0x80 |
| 42 | |
| 43 | /* Mostly read */ |
| 44 | struct event { |
| 45 | unsigned short kick_index; |
| 46 | unsigned char reserved0[HOST_GUEST_PADDING - 2]; |
| 47 | unsigned short call_index; |
| 48 | unsigned char reserved1[HOST_GUEST_PADDING - 2]; |
| 49 | }; |
| 50 | |
| 51 | struct data { |
| 52 | void *buf; /* descriptor is writeable, we can't get buf from there */ |
| 53 | void *data; |
| 54 | } *data; |
| 55 | |
| 56 | struct desc *ring; |
| 57 | struct event *event; |
| 58 | |
| 59 | struct guest { |
| 60 | unsigned avail_idx; |
| 61 | unsigned last_used_idx; |
| 62 | unsigned num_free; |
| 63 | unsigned kicked_avail_idx; |
| 64 | unsigned char reserved[HOST_GUEST_PADDING - 12]; |
| 65 | } guest; |
| 66 | |
| 67 | struct host { |
| 68 | /* we do not need to track last avail index |
| 69 | * unless we have more than one in flight. |
| 70 | */ |
| 71 | unsigned used_idx; |
| 72 | unsigned called_used_idx; |
| 73 | unsigned char reserved[HOST_GUEST_PADDING - 4]; |
| 74 | } host; |
| 75 | |
| 76 | /* implemented by ring */ |
| 77 | void alloc_ring(void) |
| 78 | { |
| 79 | int ret; |
| 80 | int i; |
| 81 | |
| 82 | ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); |
| 83 | if (ret) { |
| 84 | perror("Unable to allocate ring buffer.\n"); |
| 85 | exit(3); |
| 86 | } |
| 87 | event = malloc(sizeof *event); |
| 88 | if (!event) { |
| 89 | perror("Unable to allocate event buffer.\n"); |
| 90 | exit(3); |
| 91 | } |
| 92 | memset(event, 0, sizeof *event); |
| 93 | guest.avail_idx = 0; |
| 94 | guest.kicked_avail_idx = -1; |
| 95 | guest.last_used_idx = 0; |
| 96 | host.used_idx = 0; |
| 97 | host.called_used_idx = -1; |
| 98 | for (i = 0; i < ring_size; ++i) { |
| 99 | struct desc desc = { |
| 100 | .index = i, |
| 101 | }; |
| 102 | ring[i] = desc; |
| 103 | } |
| 104 | guest.num_free = ring_size; |
| 105 | data = malloc(ring_size * sizeof *data); |
| 106 | if (!data) { |
| 107 | perror("Unable to allocate data buffer.\n"); |
| 108 | exit(3); |
| 109 | } |
| 110 | memset(data, 0, ring_size * sizeof *data); |
| 111 | } |
| 112 | |
| 113 | /* guest side */ |
| 114 | int add_inbuf(unsigned len, void *buf, void *datap) |
| 115 | { |
| 116 | unsigned head, index; |
| 117 | |
| 118 | if (!guest.num_free) |
| 119 | return -1; |
| 120 | |
| 121 | guest.num_free--; |
| 122 | head = (ring_size - 1) & (guest.avail_idx++); |
| 123 | |
| 124 | /* Start with a write. On MESI architectures this helps |
| 125 | * avoid a shared state with consumer that is polling this descriptor. |
| 126 | */ |
| 127 | ring[head].addr = (unsigned long)(void*)buf; |
| 128 | ring[head].len = len; |
| 129 | /* read below might bypass write above. That is OK because it's just an |
| 130 | * optimization. If this happens, we will get the cache line in a |
| 131 | * shared state which is unfortunate, but probably not worth it to |
| 132 | * add an explicit full barrier to avoid this. |
| 133 | */ |
| 134 | barrier(); |
| 135 | index = ring[head].index; |
| 136 | data[index].buf = buf; |
| 137 | data[index].data = datap; |
| 138 | /* Barrier A (for pairing) */ |
| 139 | smp_release(); |
| 140 | ring[head].flags = DESC_HW; |
| 141 | |
| 142 | return 0; |
| 143 | } |
| 144 | |
| 145 | void *get_buf(unsigned *lenp, void **bufp) |
| 146 | { |
| 147 | unsigned head = (ring_size - 1) & guest.last_used_idx; |
| 148 | unsigned index; |
| 149 | void *datap; |
| 150 | |
| 151 | if (ring[head].flags & DESC_HW) |
| 152 | return NULL; |
| 153 | /* Barrier B (for pairing) */ |
| 154 | smp_acquire(); |
| 155 | *lenp = ring[head].len; |
| 156 | index = ring[head].index & (ring_size - 1); |
| 157 | datap = data[index].data; |
| 158 | *bufp = data[index].buf; |
| 159 | data[index].buf = NULL; |
| 160 | data[index].data = NULL; |
| 161 | guest.num_free++; |
| 162 | guest.last_used_idx++; |
| 163 | return datap; |
| 164 | } |
| 165 | |
Paolo Bonzini | d3c3589 | 2016-10-06 11:39:11 +0200 | [diff] [blame] | 166 | bool used_empty() |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 167 | { |
| 168 | unsigned head = (ring_size - 1) & guest.last_used_idx; |
| 169 | |
Paolo Bonzini | d3c3589 | 2016-10-06 11:39:11 +0200 | [diff] [blame] | 170 | return (ring[head].flags & DESC_HW); |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 171 | } |
| 172 | |
| 173 | void disable_call() |
| 174 | { |
| 175 | /* Doing nothing to disable calls might cause |
| 176 | * extra interrupts, but reduces the number of cache misses. |
| 177 | */ |
| 178 | } |
| 179 | |
| 180 | bool enable_call() |
| 181 | { |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 182 | event->call_index = guest.last_used_idx; |
| 183 | /* Flush call index write */ |
| 184 | /* Barrier D (for pairing) */ |
| 185 | smp_mb(); |
Paolo Bonzini | d3c3589 | 2016-10-06 11:39:11 +0200 | [diff] [blame] | 186 | return used_empty(); |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 187 | } |
| 188 | |
| 189 | void kick_available(void) |
| 190 | { |
| 191 | /* Flush in previous flags write */ |
| 192 | /* Barrier C (for pairing) */ |
| 193 | smp_mb(); |
| 194 | if (!need_event(event->kick_index, |
| 195 | guest.avail_idx, |
| 196 | guest.kicked_avail_idx)) |
| 197 | return; |
| 198 | |
| 199 | guest.kicked_avail_idx = guest.avail_idx; |
| 200 | kick(); |
| 201 | } |
| 202 | |
| 203 | /* host side */ |
| 204 | void disable_kick() |
| 205 | { |
| 206 | /* Doing nothing to disable kicks might cause |
| 207 | * extra interrupts, but reduces the number of cache misses. |
| 208 | */ |
| 209 | } |
| 210 | |
| 211 | bool enable_kick() |
| 212 | { |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 213 | event->kick_index = host.used_idx; |
| 214 | /* Barrier C (for pairing) */ |
| 215 | smp_mb(); |
Paolo Bonzini | d3c3589 | 2016-10-06 11:39:11 +0200 | [diff] [blame] | 216 | return avail_empty(); |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 217 | } |
| 218 | |
Paolo Bonzini | d3c3589 | 2016-10-06 11:39:11 +0200 | [diff] [blame] | 219 | bool avail_empty() |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 220 | { |
| 221 | unsigned head = (ring_size - 1) & host.used_idx; |
| 222 | |
Paolo Bonzini | d3c3589 | 2016-10-06 11:39:11 +0200 | [diff] [blame] | 223 | return !(ring[head].flags & DESC_HW); |
Michael S. Tsirkin | 481eaec | 2016-01-21 14:44:10 +0200 | [diff] [blame] | 224 | } |
| 225 | |
| 226 | bool use_buf(unsigned *lenp, void **bufp) |
| 227 | { |
| 228 | unsigned head = (ring_size - 1) & host.used_idx; |
| 229 | |
| 230 | if (!(ring[head].flags & DESC_HW)) |
| 231 | return false; |
| 232 | |
| 233 | /* make sure length read below is not speculated */ |
| 234 | /* Barrier A (for pairing) */ |
| 235 | smp_acquire(); |
| 236 | |
| 237 | /* simple in-order completion: we don't need |
| 238 | * to touch index at all. This also means we |
| 239 | * can just modify the descriptor in-place. |
| 240 | */ |
| 241 | ring[head].len--; |
| 242 | /* Make sure len is valid before flags. |
| 243 | * Note: alternative is to write len and flags in one access - |
| 244 | * possible on 64 bit architectures but wmb is free on Intel anyway |
| 245 | * so I have no way to test whether it's a gain. |
| 246 | */ |
| 247 | /* Barrier B (for pairing) */ |
| 248 | smp_release(); |
| 249 | ring[head].flags = 0; |
| 250 | host.used_idx++; |
| 251 | return true; |
| 252 | } |
| 253 | |
| 254 | void call_used(void) |
| 255 | { |
| 256 | /* Flush in previous flags write */ |
| 257 | /* Barrier D (for pairing) */ |
| 258 | smp_mb(); |
| 259 | if (!need_event(event->call_index, |
| 260 | host.used_idx, |
| 261 | host.called_used_idx)) |
| 262 | return; |
| 263 | |
| 264 | host.called_used_idx = host.used_idx; |
| 265 | call(); |
| 266 | } |