blob: 741b1387c25b730a96a00ec8b881749f7272097e [file] [log] [blame]
Greg Hartman0055e0d2018-04-05 17:59:11 -07001/*
2 * Copyright (C) 2017 Google, Inc.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef _UAPI_LINUX_VSOC_SHM_H
16#define _UAPI_LINUX_VSOC_SHM_H
17
18#include <linux/types.h>
19
20/**
21 * A permission is a token that permits a receiver to read and/or write an area
22 * of memory within a Vsoc region.
23 *
24 * An fd_scoped permission grants both read and write access, and can be
25 * attached to a file description (see open(2)).
26 * Ownership of the area can then be shared by passing a file descriptor
27 * among processes.
28 *
29 * begin_offset and end_offset define the area of memory that is controlled by
30 * the permission. owner_offset points to a word, also in shared memory, that
31 * controls ownership of the area.
32 *
33 * ownership of the region expires when the associated file description is
34 * released.
35 *
36 * At most one permission can be attached to each file description.
37 *
38 * This is useful when implementing HALs like gralloc that scope and pass
39 * ownership of shared resources via file descriptors.
40 *
41 * The caller is responsibe for doing any fencing.
42 *
43 * The calling process will normally identify a currently free area of
44 * memory. It will construct a proposed fd_scoped_permission_arg structure:
45 *
46 * begin_offset and end_offset describe the area being claimed
47 *
48 * owner_offset points to the location in shared memory that indicates the
49 * owner of the area.
50 *
51 * owned_value is the value that will be stored in owner_offset iff the
52 * permission can be granted. It must be different than VSOC_REGION_FREE.
53 *
54 * Two fd_scoped_permission structures are compatible if they vary only by
55 * their owned_value fields.
56 *
57 * The driver ensures that, for any group of simultaneous callers proposing
58 * compatible fd_scoped_permissions, it will accept exactly one of the
59 * propopsals. The other callers will get a failure with errno of EAGAIN.
60 *
61 * A process receiving a file descriptor can identify the region being
62 * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl.
63 */
64struct fd_scoped_permission {
65 __u32 begin_offset;
66 __u32 end_offset;
67 __u32 owner_offset;
68 __u32 owned_value;
69};
70
71/*
72 * This value represents a free area of memory. The driver expects to see this
73 * value at owner_offset when creating a permission otherwise it will not do it,
74 * and will write this value back once the permission is no longer needed.
75 */
76#define VSOC_REGION_FREE ((__u32)0)
77
78/**
79 * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION
80 */
81struct fd_scoped_permission_arg {
82 struct fd_scoped_permission perm;
83 __s32 managed_region_fd;
84};
85
86#define VSOC_NODE_FREE ((__u32)0)
87
88/*
89 * Describes a signal table in shared memory. Each non-zero entry in the
90 * table indicates that the receiver should signal the futex at the given
91 * offset. Offsets are relative to the region, not the shared memory window.
92 *
93 * interrupt_signalled_offset is used to reliably signal interrupts across the
94 * vmm boundary. There are two roles: transmitter and receiver. For example,
95 * in the host_to_guest_signal_table the host is the transmitter and the
96 * guest is the receiver. The protocol is as follows:
97 *
98 * 1. The transmitter should convert the offset of the futex to an offset
99 * in the signal table [0, (1 << num_nodes_lg2))
100 * The transmitter can choose any appropriate hashing algorithm, including
101 * hash = futex_offset & ((1 << num_nodes_lg2) - 1)
102 *
103 * 3. The transmitter should atomically compare and swap futex_offset with 0
104 * at hash. There are 3 possible outcomes
105 * a. The swap fails because the futex_offset is already in the table.
106 * The transmitter should stop.
107 * b. Some other offset is in the table. This is a hash collision. The
108 * transmitter should move to another table slot and try again. One
109 * possible algorithm:
110 * hash = (hash + 1) & ((1 << num_nodes_lg2) - 1)
111 * c. The swap worked. Continue below.
112 *
113 * 3. The transmitter atomically swaps 1 with the value at the
114 * interrupt_signalled_offset. There are two outcomes:
115 * a. The prior value was 1. In this case an interrupt has already been
116 * posted. The transmitter is done.
117 * b. The prior value was 0, indicating that the receiver may be sleeping.
118 * The transmitter will issue an interrupt.
119 *
120 * 4. On waking the receiver immediately exchanges a 0 with the
121 * interrupt_signalled_offset. If it receives a 0 then this a spurious
122 * interrupt. That may occasionally happen in the current protocol, but
123 * should be rare.
124 *
125 * 5. The receiver scans the signal table by atomicaly exchanging 0 at each
126 * location. If a non-zero offset is returned from the exchange the
127 * receiver wakes all sleepers at the given offset:
128 * futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT);
129 *
130 * 6. The receiver thread then does a conditional wait, waking immediately
131 * if the value at interrupt_signalled_offset is non-zero. This catches cases
132 * here additional signals were posted while the table was being scanned.
133 * On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT
134 * ioctl.
135 */
136struct vsoc_signal_table_layout {
137 /* log_2(Number of signal table entries) */
138 __u32 num_nodes_lg2;
139 /*
140 * Offset to the first signal table entry relative to the start of the
141 * region
142 */
143 __u32 futex_uaddr_table_offset;
144 /*
145 * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates
146 * that one or more offsets are currently posted in the table.
147 * semi-unique access to an entry in the table
148 */
149 __u32 interrupt_signalled_offset;
150};
151
152#define VSOC_REGION_WHOLE ((__s32)0)
153#define VSOC_DEVICE_NAME_SZ 16
154
155/**
156 * Each HAL would (usually) talk to a single device region
157 * Mulitple entities care about these regions:
158 * - The ivshmem_server will populate the regions in shared memory
159 * - The guest kernel will read the region, create minor device nodes, and
160 * allow interested parties to register for FUTEX_WAKE events in the region
161 * - HALs will access via the minor device nodes published by the guest kernel
162 * - Host side processes will access the region via the ivshmem_server:
163 * 1. Pass name to ivshmem_server at a UNIX socket
164 * 2. ivshmemserver will reply with 2 fds:
165 * - host->guest doorbell fd
166 * - guest->host doorbell fd
167 * - fd for the shared memory region
168 * - region offset
169 * 3. Start a futex receiver thread on the doorbell fd pointed at the
170 * signal_nodes
171 */
172struct vsoc_device_region {
173 __u16 current_version;
174 __u16 min_compatible_version;
175 __u32 region_begin_offset;
176 __u32 region_end_offset;
177 __u32 offset_of_region_data;
178 struct vsoc_signal_table_layout guest_to_host_signal_table;
179 struct vsoc_signal_table_layout host_to_guest_signal_table;
180 /* Name of the device. Must always be terminated with a '\0', so
181 * the longest supported device name is 15 characters.
182 */
183 char device_name[VSOC_DEVICE_NAME_SZ];
184 /* There are two ways that permissions to access regions are handled:
185 * - When subdivided_by is VSOC_REGION_WHOLE, any process that can
186 * open the device node for the region gains complete access to it.
187 * - When subdivided is set processes that open the region cannot
188 * access it. Access to a sub-region must be established by invoking
189 * the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region
190 * referenced in subdivided_by, providing a fileinstance
191 * (represented by a fd) opened on this region.
192 */
193 __u32 managed_by;
194};
195
196/*
197 * The vsoc layout descriptor.
198 * The first 4K should be reserved for the shm header and region descriptors.
199 * The regions should be page aligned.
200 */
201
202struct vsoc_shm_layout_descriptor {
203 __u16 major_version;
204 __u16 minor_version;
205
206 /* size of the shm. This may be redundant but nice to have */
207 __u32 size;
208
209 /* number of shared memory regions */
210 __u32 region_count;
211
212 /* The offset to the start of region descriptors */
213 __u32 vsoc_region_desc_offset;
214};
215
216/*
217 * This specifies the current version that should be stored in
218 * vsoc_shm_layout_descriptor.major_version and
219 * vsoc_shm_layout_descriptor.minor_version.
220 * It should be updated only if the vsoc_device_region and
221 * vsoc_shm_layout_descriptor structures have changed.
222 * Versioning within each region is transferred
223 * via the min_compatible_version and current_version fields in
224 * vsoc_device_region. The driver does not consult these fields: they are left
225 * for the HALs and host processes and will change independently of the layout
226 * version.
227 */
228#define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2
229#define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0
230
231#define VSOC_CREATE_FD_SCOPED_PERMISSION \
232 _IOW(0xF5, 0, struct fd_scoped_permission)
233#define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission)
234
235/*
236 * This is used to signal the host to scan the guest_to_host_signal_table
237 * for new futexes to wake. This sends an interrupt if one is not already
238 * in flight.
239 */
240#define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2)
241
242/*
243 * When this returns the guest will scan host_to_guest_signal_table to
244 * check for new futexes to wake.
245 */
246/* TODO(ghartman): Consider moving this to the bottom half */
247#define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3)
248
249/*
250 * Guest HALs will use this to retrieve the region description after
251 * opening their device node.
252 */
253#define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region)
254
255/*
256 * Wake any threads that may be waiting for a host interrupt on this region.
257 * This is mostly used during shutdown.
258 */
259#define VSOC_SELF_INTERRUPT _IO(0xF5, 5)
260
261/*
262 * This is used to signal the host to scan the guest_to_host_signal_table
263 * for new futexes to wake. This sends an interrupt unconditionally.
264 */
265#define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6)
266
267enum wait_types {
268 VSOC_WAIT_UNDEFINED = 0,
269 VSOC_WAIT_IF_EQUAL = 1,
270 VSOC_WAIT_IF_EQUAL_TIMEOUT = 2
271};
272
273/*
274 * Wait for a condition to be true
275 *
276 * Note, this is sized and aligned so the 32 bit and 64 bit layouts are
277 * identical.
278 */
279struct vsoc_cond_wait {
280 /* Input: Offset of the 32 bit word to check */
281 __u32 offset;
282 /* Input: Value that will be compared with the offset */
283 __u32 value;
284 /* Monotonic time to wake at in seconds */
285 __u64 wake_time_sec;
286 /* Input: Monotonic time to wait in nanoseconds */
287 __u32 wake_time_nsec;
288 /* Input: Type of wait */
289 __u32 wait_type;
290 /* Output: Number of times the thread woke before returning. */
291 __u32 wakes;
292 /* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit
293 * compatibility.
294 */
295 __u32 reserved_1;
296};
297
298#define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait)
299
300/* Wake any local threads waiting at the offset given in arg */
301#define VSOC_COND_WAKE _IO(0xF5, 8)
302
303#endif /* _UAPI_LINUX_VSOC_SHM_H */