| /* |
| * Intel MIC Platform Software Stack (MPSS) |
| * |
| * This file is provided under a dual BSD/GPLv2 license. When using or |
| * redistributing this file, you may do so under either license. |
| * |
| * GPL LICENSE SUMMARY |
| * |
| * Copyright(c) 2014 Intel Corporation. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of version 2 of the GNU General Public License as |
| * published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, but |
| * WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * General Public License for more details. |
| * |
| * BSD LICENSE |
| * |
| * Copyright(c) 2014 Intel Corporation. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of Intel Corporation nor the names of its |
| * contributors may be used to endorse or promote products derived |
| * from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Intel SCIF driver. |
| * |
| */ |
| #ifndef __SCIF_H__ |
| #define __SCIF_H__ |
| |
| #include <linux/types.h> |
| #include <linux/poll.h> |
| #include <linux/scif_ioctl.h> |
| |
| #define SCIF_ACCEPT_SYNC 1 |
| #define SCIF_SEND_BLOCK 1 |
| #define SCIF_RECV_BLOCK 1 |
| |
| enum { |
| SCIF_PROT_READ = (1 << 0), |
| SCIF_PROT_WRITE = (1 << 1) |
| }; |
| |
| enum { |
| SCIF_MAP_FIXED = 0x10, |
| SCIF_MAP_KERNEL = 0x20, |
| }; |
| |
| enum { |
| SCIF_FENCE_INIT_SELF = (1 << 0), |
| SCIF_FENCE_INIT_PEER = (1 << 1), |
| SCIF_SIGNAL_LOCAL = (1 << 4), |
| SCIF_SIGNAL_REMOTE = (1 << 5) |
| }; |
| |
| enum { |
| SCIF_RMA_USECPU = (1 << 0), |
| SCIF_RMA_USECACHE = (1 << 1), |
| SCIF_RMA_SYNC = (1 << 2), |
| SCIF_RMA_ORDERED = (1 << 3) |
| }; |
| |
| /* End of SCIF Admin Reserved Ports */ |
| #define SCIF_ADMIN_PORT_END 1024 |
| |
| /* End of SCIF Reserved Ports */ |
| #define SCIF_PORT_RSVD 1088 |
| |
| typedef struct scif_endpt *scif_epd_t; |
| |
| #define SCIF_OPEN_FAILED ((scif_epd_t)-1) |
| #define SCIF_REGISTER_FAILED ((off_t)-1) |
| #define SCIF_MMAP_FAILED ((void *)-1) |
| |
| /** |
| * scif_open() - Create an endpoint |
| * |
| * Return: |
| * Upon successful completion, scif_open() returns an endpoint descriptor to |
| * be used in subsequent SCIF functions calls to refer to that endpoint; |
| * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is |
| * returned and errno is set to indicate the error; in kernel mode a NULL |
| * scif_epd_t is returned. |
| * |
| * Errors: |
| * ENOMEM - Insufficient kernel memory was available |
| */ |
| scif_epd_t scif_open(void); |
| |
| /** |
| * scif_bind() - Bind an endpoint to a port |
| * @epd: endpoint descriptor |
| * @pn: port number |
| * |
| * scif_bind() binds endpoint epd to port pn, where pn is a port number on the |
| * local node. If pn is zero, a port number greater than or equal to |
| * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to |
| * exactly one local port. Ports less than 1024 when requested can only be bound |
| * by system (or root) processes or by processes executed by privileged users. |
| * |
| * Return: |
| * Upon successful completion, scif_bind() returns the port number to which epd |
| * is bound; otherwise in user mode -1 is returned and errno is set to |
| * indicate the error; in kernel mode the negative of one of the following |
| * errors is returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * EINVAL - the endpoint or the port is already bound |
| * EISCONN - The endpoint is already connected |
| * ENOSPC - No port number available for assignment |
| * EACCES - The port requested is protected and the user is not the superuser |
| */ |
| int scif_bind(scif_epd_t epd, u16 pn); |
| |
| /** |
| * scif_listen() - Listen for connections on an endpoint |
| * @epd: endpoint descriptor |
| * @backlog: maximum pending connection requests |
| * |
| * scif_listen() marks the endpoint epd as a listening endpoint - that is, as |
| * an endpoint that will be used to accept incoming connection requests. Once |
| * so marked, the endpoint is said to be in the listening state and may not be |
| * used as the endpoint of a connection. |
| * |
| * The endpoint, epd, must have been bound to a port. |
| * |
| * The backlog argument defines the maximum length to which the queue of |
| * pending connections for epd may grow. If a connection request arrives when |
| * the queue is full, the client may receive an error with an indication that |
| * the connection was refused. |
| * |
| * Return: |
| * Upon successful completion, scif_listen() returns 0; otherwise in user mode |
| * -1 is returned and errno is set to indicate the error; in kernel mode the |
| * negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * EINVAL - the endpoint is not bound to a port |
| * EISCONN - The endpoint is already connected or listening |
| */ |
| int scif_listen(scif_epd_t epd, int backlog); |
| |
| /** |
| * scif_connect() - Initiate a connection on a port |
| * @epd: endpoint descriptor |
| * @dst: global id of port to which to connect |
| * |
| * The scif_connect() function requests the connection of endpoint epd to remote |
| * port dst. If the connection is successful, a peer endpoint, bound to dst, is |
| * created on node dst.node. On successful return, the connection is complete. |
| * |
| * If the endpoint epd has not already been bound to a port, scif_connect() |
| * will bind it to an unused local port. |
| * |
| * A connection is terminated when an endpoint of the connection is closed, |
| * either explicitly by scif_close(), or when a process that owns one of the |
| * endpoints of the connection is terminated. |
| * |
| * In user space, scif_connect() supports an asynchronous connection mode |
| * if the application has set the O_NONBLOCK flag on the endpoint via the |
| * fcntl() system call. Setting this flag will result in the calling process |
| * not to wait during scif_connect(). |
| * |
| * Return: |
| * Upon successful completion, scif_connect() returns the port ID to which the |
| * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is |
| * set to indicate the error; in kernel mode the negative of one of the |
| * following errors is returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNREFUSED - The destination was not listening for connections or refused |
| * the connection request |
| * EINVAL - dst.port is not a valid port ID |
| * EISCONN - The endpoint is already connected |
| * ENOMEM - No buffer space is available |
| * ENODEV - The destination node does not exist, or the node is lost or existed, |
| * but is not currently in the network since it may have crashed |
| * ENOSPC - No port number available for assignment |
| * EOPNOTSUPP - The endpoint is listening and cannot be connected |
| */ |
| int scif_connect(scif_epd_t epd, struct scif_port_id *dst); |
| |
| /** |
| * scif_accept() - Accept a connection on an endpoint |
| * @epd: endpoint descriptor |
| * @peer: global id of port to which connected |
| * @newepd: new connected endpoint descriptor |
| * @flags: flags |
| * |
| * The scif_accept() call extracts the first connection request from the queue |
| * of pending connections for the port on which epd is listening. scif_accept() |
| * creates a new endpoint, bound to the same port as epd, and allocates a new |
| * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new |
| * endpoint is connected to the endpoint through which the connection was |
| * requested. epd is unaffected by this call, and remains in the listening |
| * state. |
| * |
| * On successful return, peer holds the global port identifier (node id and |
| * local port number) of the port which requested the connection. |
| * |
| * A connection is terminated when an endpoint of the connection is closed, |
| * either explicitly by scif_close(), or when a process that owns one of the |
| * endpoints of the connection is terminated. |
| * |
| * The number of connections that can (subsequently) be accepted on epd is only |
| * limited by system resources (memory). |
| * |
| * The flags argument is formed by OR'ing together zero or more of the |
| * following values. |
| * SCIF_ACCEPT_SYNC - block until a connection request is presented. If |
| * SCIF_ACCEPT_SYNC is not in flags, and no pending |
| * connections are present on the queue, scif_accept() |
| * fails with an EAGAIN error |
| * |
| * In user mode, the select() and poll() functions can be used to determine |
| * when there is a connection request. In kernel mode, the scif_poll() |
| * function may be used for this purpose. A readable event will be delivered |
| * when a connection is requested. |
| * |
| * Return: |
| * Upon successful completion, scif_accept() returns 0; otherwise in user mode |
| * -1 is returned and errno is set to indicate the error; in kernel mode the |
| * negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be |
| * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete |
| * its connection request |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * EINTR - Interrupted function |
| * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is |
| * NULL, or newepd is NULL |
| * ENODEV - The requesting node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOMEM - Not enough space |
| * ENOENT - Secondary part of epd registration failed |
| */ |
| int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t |
| *newepd, int flags); |
| |
| /** |
| * scif_close() - Close an endpoint |
| * @epd: endpoint descriptor |
| * |
| * scif_close() closes an endpoint and performs necessary teardown of |
| * facilities associated with that endpoint. |
| * |
| * If epd is a listening endpoint then it will no longer accept connection |
| * requests on the port to which it is bound. Any pending connection requests |
| * are rejected. |
| * |
| * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs |
| * which are in-process through epd or its peer endpoint will complete before |
| * scif_close() returns. Registered windows of the local and peer endpoints are |
| * released as if scif_unregister() was called against each window. |
| * |
| * Closing a SCIF endpoint does not affect local registered memory mapped by |
| * a SCIF endpoint on a remote node. The local memory remains mapped by the peer |
| * SCIF endpoint explicitly removed by calling munmap(..) by the peer. |
| * |
| * If the peer endpoint's receive queue is not empty at the time that epd is |
| * closed, then the peer endpoint can be passed as the endpoint parameter to |
| * scif_recv() until the receive queue is empty. |
| * |
| * epd is freed and may no longer be accessed. |
| * |
| * Return: |
| * Upon successful completion, scif_close() returns 0; otherwise in user mode |
| * -1 is returned and errno is set to indicate the error; in kernel mode the |
| * negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| */ |
| int scif_close(scif_epd_t epd); |
| |
| /** |
| * scif_send() - Send a message |
| * @epd: endpoint descriptor |
| * @msg: message buffer address |
| * @len: message length |
| * @flags: blocking mode flags |
| * |
| * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data |
| * are copied from memory starting at address msg. On successful execution the |
| * return value of scif_send() is the number of bytes that were sent, and is |
| * zero if no bytes were sent because len was zero. scif_send() may be called |
| * only when the endpoint is in a connected state. |
| * |
| * If a scif_send() call is non-blocking, then it sends only those bytes which |
| * can be sent without waiting, up to a maximum of len bytes. |
| * |
| * If a scif_send() call is blocking, then it normally returns after sending |
| * all len bytes. If a blocking call is interrupted or the connection is |
| * reset, the call is considered successful if some bytes were sent or len is |
| * zero, otherwise the call is considered unsuccessful. |
| * |
| * In user mode, the select() and poll() functions can be used to determine |
| * when the send queue is not full. In kernel mode, the scif_poll() function |
| * may be used for this purpose. |
| * |
| * It is recommended that scif_send()/scif_recv() only be used for short |
| * control-type message communication between SCIF endpoints. The SCIF RMA |
| * APIs are expected to provide better performance for transfer sizes of |
| * 1024 bytes or longer for the current MIC hardware and software |
| * implementation. |
| * |
| * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK |
| * is passed as the flags argument. |
| * |
| * Return: |
| * Upon successful completion, scif_send() returns the number of bytes sent; |
| * otherwise in user mode -1 is returned and errno is set to indicate the |
| * error; in kernel mode the negative of one of the following errors is |
| * returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EFAULT - An invalid address was specified for a parameter |
| * EINVAL - flags is invalid, or len is negative |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOMEM - Not enough space |
| * ENOTCONN - The endpoint is not connected |
| */ |
| int scif_send(scif_epd_t epd, void *msg, int len, int flags); |
| |
| /** |
| * scif_recv() - Receive a message |
| * @epd: endpoint descriptor |
| * @msg: message buffer address |
| * @len: message buffer length |
| * @flags: blocking mode flags |
| * |
| * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of |
| * data are copied to memory starting at address msg. On successful execution |
| * the return value of scif_recv() is the number of bytes that were received, |
| * and is zero if no bytes were received because len was zero. scif_recv() may |
| * be called only when the endpoint is in a connected state. |
| * |
| * If a scif_recv() call is non-blocking, then it receives only those bytes |
| * which can be received without waiting, up to a maximum of len bytes. |
| * |
| * If a scif_recv() call is blocking, then it normally returns after receiving |
| * all len bytes. If the blocking call was interrupted due to a disconnection, |
| * subsequent calls to scif_recv() will copy all bytes received upto the point |
| * of disconnection. |
| * |
| * In user mode, the select() and poll() functions can be used to determine |
| * when data is available to be received. In kernel mode, the scif_poll() |
| * function may be used for this purpose. |
| * |
| * It is recommended that scif_send()/scif_recv() only be used for short |
| * control-type message communication between SCIF endpoints. The SCIF RMA |
| * APIs are expected to provide better performance for transfer sizes of |
| * 1024 bytes or longer for the current MIC hardware and software |
| * implementation. |
| * |
| * scif_recv() will block until the entire message is received if |
| * SCIF_RECV_BLOCK is passed as the flags argument. |
| * |
| * Return: |
| * Upon successful completion, scif_recv() returns the number of bytes |
| * received; otherwise in user mode -1 is returned and errno is set to |
| * indicate the error; in kernel mode the negative of one of the following |
| * errors is returned. |
| * |
| * Errors: |
| * EAGAIN - The destination node is returning from a low power state |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EFAULT - An invalid address was specified for a parameter |
| * EINVAL - flags is invalid, or len is negative |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOMEM - Not enough space |
| * ENOTCONN - The endpoint is not connected |
| */ |
| int scif_recv(scif_epd_t epd, void *msg, int len, int flags); |
| |
| /** |
| * scif_register() - Mark a memory region for remote access. |
| * @epd: endpoint descriptor |
| * @addr: starting virtual address |
| * @len: length of range |
| * @offset: offset of window |
| * @prot_flags: read/write protection flags |
| * @map_flags: mapping flags |
| * |
| * The scif_register() function opens a window, a range of whole pages of the |
| * registered address space of the endpoint epd, starting at offset po and |
| * continuing for len bytes. The value of po, further described below, is a |
| * function of the parameters offset and len, and the value of map_flags. Each |
| * page of the window represents the physical memory page which backs the |
| * corresponding page of the range of virtual address pages starting at addr |
| * and continuing for len bytes. addr and len are constrained to be multiples |
| * of the page size. A successful scif_register() call returns po. |
| * |
| * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset |
| * exactly, and offset is constrained to be a multiple of the page size. The |
| * mapping established by scif_register() will not replace any existing |
| * registration; an error is returned if any page within the range [offset, |
| * offset + len - 1] intersects an existing window. |
| * |
| * When SCIF_MAP_FIXED is not set, the implementation uses offset in an |
| * implementation-defined manner to arrive at po. The po value so chosen will |
| * be an area of the registered address space that the implementation deems |
| * suitable for a mapping of len bytes. An offset value of 0 is interpreted as |
| * granting the implementation complete freedom in selecting po, subject to |
| * constraints described below. A non-zero value of offset is taken to be a |
| * suggestion of an offset near which the mapping should be placed. When the |
| * implementation selects a value for po, it does not replace any extant |
| * window. In all cases, po will be a multiple of the page size. |
| * |
| * The physical pages which are so represented by a window are available for |
| * access in calls to mmap(), scif_readfrom(), scif_writeto(), |
| * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the |
| * physical pages represented by the window will not be reused by the memory |
| * subsystem for any other purpose. Note that the same physical page may be |
| * represented by multiple windows. |
| * |
| * Subsequent operations which change the memory pages to which virtual |
| * addresses are mapped (such as mmap(), munmap()) have no effect on |
| * existing window. |
| * |
| * If the process will fork(), it is recommended that the registered |
| * virtual address range be marked with MADV_DONTFORK. Doing so will prevent |
| * problems due to copy-on-write semantics. |
| * |
| * The prot_flags argument is formed by OR'ing together one or more of the |
| * following values. |
| * SCIF_PROT_READ - allow read operations from the window |
| * SCIF_PROT_WRITE - allow write operations to the window |
| * |
| * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a |
| * fixed offset. |
| * |
| * Return: |
| * Upon successful completion, scif_register() returns the offset at which the |
| * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that |
| * is (off_t *)-1) is returned and errno is set to indicate the error; in |
| * kernel mode the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range |
| * [offset, offset + len -1] are already registered |
| * EAGAIN - The mapping could not be performed due to lack of resources |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid |
| * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is |
| * set in flags, and offset is not a multiple of the page size, or addr is not a |
| * multiple of the page size, or len is not a multiple of the page size, or is |
| * 0, or offset is negative |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOMEM - Not enough space |
| * ENOTCONN -The endpoint is not connected |
| */ |
| off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, |
| int prot_flags, int map_flags); |
| |
| /** |
| * scif_unregister() - Mark a memory region for remote access. |
| * @epd: endpoint descriptor |
| * @offset: start of range to unregister |
| * @len: length of range to unregister |
| * |
| * The scif_unregister() function closes those previously registered windows |
| * which are entirely within the range [offset, offset + len - 1]. It is an |
| * error to specify a range which intersects only a subrange of a window. |
| * |
| * On a successful return, pages within the window may no longer be specified |
| * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), |
| * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, |
| * however, continues to exist until all previous references against it are |
| * removed. A window is referenced if there is a mapping to it created by |
| * mmap(), or if scif_get_pages() was called against the window |
| * (and the pages have not been returned via scif_put_pages()). A window is |
| * also referenced while an RMA, in which some range of the window is a source |
| * or destination, is in progress. Finally a window is referenced while some |
| * offset in that window was specified to scif_fence_signal(), and the RMAs |
| * marked by that call to scif_fence_signal() have not completed. While a |
| * window is in this state, its registered address space pages are not |
| * available for use in a new registered window. |
| * |
| * When all such references to the window have been removed, its references to |
| * all the physical pages which it represents are removed. Similarly, the |
| * registered address space pages of the window become available for |
| * registration in a new window. |
| * |
| * Return: |
| * Upon successful completion, scif_unregister() returns 0; otherwise in user |
| * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| * the negative of one of the following errors is returned. In the event of an |
| * error, no windows are unregistered. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a |
| * window, or offset is negative |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the |
| * registered address space of epd |
| */ |
| int scif_unregister(scif_epd_t epd, off_t offset, size_t len); |
| |
| /** |
| * scif_readfrom() - Copy from a remote address space |
| * @epd: endpoint descriptor |
| * @loffset: offset in local registered address space to |
| * which to copy |
| * @len: length of range to copy |
| * @roffset: offset in remote registered address space |
| * from which to copy |
| * @rma_flags: transfer mode flags |
| * |
| * scif_readfrom() copies len bytes from the remote registered address space of |
| * the peer of endpoint epd, starting at the offset roffset to the local |
| * registered address space of epd, starting at the offset loffset. |
| * |
| * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, |
| * roffset + len - 1] must be within some registered window or windows of the |
| * local and remote nodes. A range may intersect multiple registered windows, |
| * but only if those windows are contiguous in the registered address space. |
| * |
| * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the |
| * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| * ously. The order in which any two asynchronous RMA operations complete |
| * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| * the completion of asynchronous RMA operations on the same endpoint. |
| * |
| * The DMA transfer of individual bytes is not guaranteed to complete in |
| * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| * cacheline or partial cacheline of the source range will become visible on |
| * the destination node after all other transferred data in the source |
| * range has become visible on the destination node. |
| * |
| * The optimal DMA performance will likely be realized if both |
| * loffset and roffset are cacheline aligned (are a multiple of 64). Lower |
| * performance will likely be realized if loffset and roffset are not |
| * cacheline aligned but are separated by some multiple of 64. The lowest level |
| * of performance is likely if loffset and roffset are not separated by a |
| * multiple of 64. |
| * |
| * The rma_flags argument is formed by ORing together zero or more of the |
| * following values. |
| * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA |
| * engine. |
| * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the |
| * transfer has completed. Passing this flag results in the |
| * current implementation busy waiting and consuming CPU cycles |
| * while the DMA transfer is in progress for best performance by |
| * avoiding the interrupt latency. |
| * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of |
| * the source range becomes visible on the destination node |
| * after all other transferred data in the source range has |
| * become visible on the destination |
| * |
| * Return: |
| * Upon successful completion, scif_readfrom() returns 0; otherwise in user |
| * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| * the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EACCESS - Attempt to write to a read-only range |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EINVAL - rma_flags is invalid |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered |
| * address space of epd, or, The range [roffset, roffset + len - 1] is invalid |
| * for the registered address space of the peer of epd, or loffset or roffset |
| * is negative |
| */ |
| int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t |
| roffset, int rma_flags); |
| |
| /** |
| * scif_writeto() - Copy to a remote address space |
| * @epd: endpoint descriptor |
| * @loffset: offset in local registered address space |
| * from which to copy |
| * @len: length of range to copy |
| * @roffset: offset in remote registered address space to |
| * which to copy |
| * @rma_flags: transfer mode flags |
| * |
| * scif_writeto() copies len bytes from the local registered address space of |
| * epd, starting at the offset loffset to the remote registered address space |
| * of the peer of endpoint epd, starting at the offset roffset. |
| * |
| * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, |
| * roffset + len - 1] must be within some registered window or windows of the |
| * local and remote nodes. A range may intersect multiple registered windows, |
| * but only if those windows are contiguous in the registered address space. |
| * |
| * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the |
| * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| * ously. The order in which any two asynchronous RMA operations complete |
| * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| * the completion of asynchronous RMA operations on the same endpoint. |
| * |
| * The DMA transfer of individual bytes is not guaranteed to complete in |
| * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| * cacheline or partial cacheline of the source range will become visible on |
| * the destination node after all other transferred data in the source |
| * range has become visible on the destination node. |
| * |
| * The optimal DMA performance will likely be realized if both |
| * loffset and roffset are cacheline aligned (are a multiple of 64). Lower |
| * performance will likely be realized if loffset and roffset are not cacheline |
| * aligned but are separated by some multiple of 64. The lowest level of |
| * performance is likely if loffset and roffset are not separated by a multiple |
| * of 64. |
| * |
| * The rma_flags argument is formed by ORing together zero or more of the |
| * following values. |
| * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA |
| * engine. |
| * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the |
| * transfer has completed. Passing this flag results in the |
| * current implementation busy waiting and consuming CPU cycles |
| * while the DMA transfer is in progress for best performance by |
| * avoiding the interrupt latency. |
| * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of |
| * the source range becomes visible on the destination node |
| * after all other transferred data in the source range has |
| * become visible on the destination |
| * |
| * Return: |
| * Upon successful completion, scif_readfrom() returns 0; otherwise in user |
| * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| * the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EACCESS - Attempt to write to a read-only range |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EINVAL - rma_flags is invalid |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered |
| * address space of epd, or, The range [roffset , roffset + len -1] is invalid |
| * for the registered address space of the peer of epd, or loffset or roffset |
| * is negative |
| */ |
| int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t |
| roffset, int rma_flags); |
| |
| /** |
| * scif_vreadfrom() - Copy from a remote address space |
| * @epd: endpoint descriptor |
| * @addr: address to which to copy |
| * @len: length of range to copy |
| * @roffset: offset in remote registered address space |
| * from which to copy |
| * @rma_flags: transfer mode flags |
| * |
| * scif_vreadfrom() copies len bytes from the remote registered address |
| * space of the peer of endpoint epd, starting at the offset roffset, to local |
| * memory, starting at addr. |
| * |
| * The specified range [roffset, roffset + len - 1] must be within some |
| * registered window or windows of the remote nodes. The range may |
| * intersect multiple registered windows, but only if those windows are |
| * contiguous in the registered address space. |
| * |
| * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the |
| * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| * ously. The order in which any two asynchronous RMA operations complete |
| * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| * the completion of asynchronous RMA operations on the same endpoint. |
| * |
| * The DMA transfer of individual bytes is not guaranteed to complete in |
| * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| * cacheline or partial cacheline of the source range will become visible on |
| * the destination node after all other transferred data in the source |
| * range has become visible on the destination node. |
| * |
| * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back |
| * the specified local memory range may be remain in a pinned state even after |
| * the specified transfer completes. This may reduce overhead if some or all of |
| * the same virtual address range is referenced in a subsequent call of |
| * scif_vreadfrom() or scif_vwriteto(). |
| * |
| * The optimal DMA performance will likely be realized if both |
| * addr and roffset are cacheline aligned (are a multiple of 64). Lower |
| * performance will likely be realized if addr and roffset are not |
| * cacheline aligned but are separated by some multiple of 64. The lowest level |
| * of performance is likely if addr and roffset are not separated by a |
| * multiple of 64. |
| * |
| * The rma_flags argument is formed by ORing together zero or more of the |
| * following values. |
| * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA |
| * engine. |
| * SCIF_RMA_USECACHE - enable registration caching |
| * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the |
| * transfer has completed. Passing this flag results in the |
| * current implementation busy waiting and consuming CPU cycles |
| * while the DMA transfer is in progress for best performance by |
| * avoiding the interrupt latency. |
| * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of |
| * the source range becomes visible on the destination node |
| * after all other transferred data in the source range has |
| * become visible on the destination |
| * |
| * Return: |
| * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user |
| * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| * the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EACCESS - Attempt to write to a read-only range |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid |
| * EINVAL - rma_flags is invalid |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the |
| * registered address space of epd |
| */ |
| int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, |
| int rma_flags); |
| |
| /** |
| * scif_vwriteto() - Copy to a remote address space |
| * @epd: endpoint descriptor |
| * @addr: address from which to copy |
| * @len: length of range to copy |
| * @roffset: offset in remote registered address space to |
| * which to copy |
| * @rma_flags: transfer mode flags |
| * |
| * scif_vwriteto() copies len bytes from the local memory, starting at addr, to |
| * the remote registered address space of the peer of endpoint epd, starting at |
| * the offset roffset. |
| * |
| * The specified range [roffset, roffset + len - 1] must be within some |
| * registered window or windows of the remote nodes. The range may intersect |
| * multiple registered windows, but only if those windows are contiguous in the |
| * registered address space. |
| * |
| * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the |
| * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| * ously. The order in which any two asynchronous RMA operations complete |
| * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| * the completion of asynchronous RMA operations on the same endpoint. |
| * |
| * The DMA transfer of individual bytes is not guaranteed to complete in |
| * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| * cacheline or partial cacheline of the source range will become visible on |
| * the destination node after all other transferred data in the source |
| * range has become visible on the destination node. |
| * |
| * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back |
| * the specified local memory range may be remain in a pinned state even after |
| * the specified transfer completes. This may reduce overhead if some or all of |
| * the same virtual address range is referenced in a subsequent call of |
| * scif_vreadfrom() or scif_vwriteto(). |
| * |
| * The optimal DMA performance will likely be realized if both |
| * addr and offset are cacheline aligned (are a multiple of 64). Lower |
| * performance will likely be realized if addr and offset are not cacheline |
| * aligned but are separated by some multiple of 64. The lowest level of |
| * performance is likely if addr and offset are not separated by a multiple of |
| * 64. |
| * |
| * The rma_flags argument is formed by ORing together zero or more of the |
| * following values. |
| * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA |
| * engine. |
| * SCIF_RMA_USECACHE - allow registration caching |
| * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the |
| * transfer has completed. Passing this flag results in the |
| * current implementation busy waiting and consuming CPU cycles |
| * while the DMA transfer is in progress for best performance by |
| * avoiding the interrupt latency. |
| * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of |
| * the source range becomes visible on the destination node |
| * after all other transferred data in the source range has |
| * become visible on the destination |
| * |
| * Return: |
| * Upon successful completion, scif_vwriteto() returns 0; otherwise in user |
| * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| * the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EACCESS - Attempt to write to a read-only range |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid |
| * EINVAL - rma_flags is invalid |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the |
| * registered address space of epd |
| */ |
| int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, |
| int rma_flags); |
| |
| /** |
| * scif_fence_mark() - Mark previously issued RMAs |
| * @epd: endpoint descriptor |
| * @flags: control flags |
| * @mark: marked value returned as output. |
| * |
| * scif_fence_mark() returns after marking the current set of all uncompleted |
| * RMAs initiated through the endpoint epd or the current set of all |
| * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are |
| * marked with a value returned at mark. The application may subsequently call |
| * scif_fence_wait(), passing the value returned at mark, to await completion |
| * of all RMAs so marked. |
| * |
| * The flags argument has exactly one of the following values. |
| * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint |
| * epd are marked |
| * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer |
| * of endpoint epd are marked |
| * |
| * Return: |
| * Upon successful completion, scif_fence_mark() returns 0; otherwise in user |
| * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| * the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EINVAL - flags is invalid |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENOMEM - Insufficient kernel memory was available |
| */ |
| int scif_fence_mark(scif_epd_t epd, int flags, int *mark); |
| |
| /** |
| * scif_fence_wait() - Wait for completion of marked RMAs |
| * @epd: endpoint descriptor |
| * @mark: mark request |
| * |
| * scif_fence_wait() returns after all RMAs marked with mark have completed. |
| * The value passed in mark must have been obtained in a previous call to |
| * scif_fence_mark(). |
| * |
| * Return: |
| * Upon successful completion, scif_fence_wait() returns 0; otherwise in user |
| * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| * the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENOMEM - Insufficient kernel memory was available |
| */ |
| int scif_fence_wait(scif_epd_t epd, int mark); |
| |
| /** |
| * scif_fence_signal() - Request a memory update on completion of RMAs |
| * @epd: endpoint descriptor |
| * @loff: local offset |
| * @lval: local value to write to loffset |
| * @roff: remote offset |
| * @rval: remote value to write to roffset |
| * @flags: flags |
| * |
| * scif_fence_signal() returns after marking the current set of all uncompleted |
| * RMAs initiated through the endpoint epd or marking the current set of all |
| * uncompleted RMAs initiated through the peer of endpoint epd. |
| * |
| * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the |
| * marked set, lval is written to memory at the address corresponding to offset |
| * loff in the local registered address space of epd. loff must be within a |
| * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion |
| * of the RMAs in the marked set, rval is written to memory at the address |
| * corresponding to offset roff in the remote registered address space of epd. |
| * roff must be within a remote registered window of the peer of epd. Note |
| * that any specified offset must be DWORD (4 byte / 32 bit) aligned. |
| * |
| * The flags argument is formed by OR'ing together the following. |
| * Exactly one of the following values. |
| * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint |
| * epd are marked |
| * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer |
| * of endpoint epd are marked |
| * One or more of the following values. |
| * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to |
| * memory at the address corresponding to offset loff in the local |
| * registered address space of epd. |
| * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to |
| * memory at the address corresponding to offset roff in the remote |
| * registered address space of epd. |
| * |
| * Return: |
| * Upon successful completion, scif_fence_signal() returns 0; otherwise in |
| * user mode -1 is returned and errno is set to indicate the error; in kernel |
| * mode the negative of one of the following errors is returned. |
| * |
| * Errors: |
| * EBADF, ENOTTY - epd is not a valid endpoint descriptor |
| * ECONNRESET - Connection reset by peer |
| * EINVAL - flags is invalid, or loff or roff are not DWORD aligned |
| * ENODEV - The remote node is lost or existed, but is not currently in the |
| * network since it may have crashed |
| * ENOTCONN - The endpoint is not connected |
| * ENXIO - loff is invalid for the registered address of epd, or roff is invalid |
| * for the registered address space, of the peer of epd |
| */ |
| int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, |
| u64 rval, int flags); |
| |
| /** |
| * scif_get_node_ids() - Return information about online nodes |
| * @nodes: array in which to return online node IDs |
| * @len: number of entries in the nodes array |
| * @self: address to place the node ID of the local node |
| * |
| * scif_get_node_ids() fills in the nodes array with up to len node IDs of the |
| * nodes in the SCIF network. If there is not enough space in nodes, as |
| * indicated by the len parameter, only len node IDs are returned in nodes. The |
| * return value of scif_get_node_ids() is the total number of nodes currently in |
| * the SCIF network. By checking the return value against the len parameter, |
| * the user may determine if enough space for nodes was allocated. |
| * |
| * The node ID of the local node is returned at self. |
| * |
| * Return: |
| * Upon successful completion, scif_get_node_ids() returns the actual number of |
| * online nodes in the SCIF network including 'self'; otherwise in user mode |
| * -1 is returned and errno is set to indicate the error; in kernel mode no |
| * errors are returned. |
| * |
| * Errors: |
| * EFAULT - Bad address |
| */ |
| int scif_get_node_ids(u16 *nodes, int len, u16 *self); |
| |
| #endif /* __SCIF_H__ */ |