Chris Metcalf | e5a0693 | 2010-11-01 17:00:37 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License |
| 6 | * as published by the Free Software Foundation, version 2. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, but |
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
| 11 | * NON INFRINGEMENT. See the GNU General Public License for |
| 12 | * more details. |
| 13 | */ |
| 14 | |
| 15 | /** |
| 16 | * @file drivers/xgbe/impl.h |
| 17 | * Implementation details for the NetIO library. |
| 18 | */ |
| 19 | |
| 20 | #ifndef __DRV_XGBE_IMPL_H__ |
| 21 | #define __DRV_XGBE_IMPL_H__ |
| 22 | |
| 23 | #include <hv/netio_errors.h> |
| 24 | #include <hv/netio_intf.h> |
| 25 | #include <hv/drv_xgbe_intf.h> |
| 26 | |
| 27 | |
| 28 | /** How many groups we have (log2). */ |
| 29 | #define LOG2_NUM_GROUPS (12) |
| 30 | /** How many groups we have. */ |
| 31 | #define NUM_GROUPS (1 << LOG2_NUM_GROUPS) |
| 32 | |
| 33 | /** Number of output requests we'll buffer per tile. */ |
| 34 | #define EPP_REQS_PER_TILE (32) |
| 35 | |
| 36 | /** Words used in an eDMA command without checksum acceleration. */ |
| 37 | #define EDMA_WDS_NO_CSUM 8 |
| 38 | /** Words used in an eDMA command with checksum acceleration. */ |
| 39 | #define EDMA_WDS_CSUM 10 |
| 40 | /** Total available words in the eDMA command FIFO. */ |
| 41 | #define EDMA_WDS_TOTAL 128 |
| 42 | |
| 43 | |
| 44 | /* |
| 45 | * FIXME: These definitions are internal and should have underscores! |
| 46 | * NOTE: The actual numeric values here are intentional and allow us to |
| 47 | * optimize the concept "if small ... else if large ... else ...", by |
| 48 | * checking for the low bit being set, and then for non-zero. |
| 49 | * These are used as array indices, so they must have the values (0, 1, 2) |
| 50 | * in some order. |
| 51 | */ |
| 52 | #define SIZE_SMALL (1) /**< Small packet queue. */ |
| 53 | #define SIZE_LARGE (2) /**< Large packet queue. */ |
| 54 | #define SIZE_JUMBO (0) /**< Jumbo packet queue. */ |
| 55 | |
| 56 | /** The number of "SIZE_xxx" values. */ |
| 57 | #define NETIO_NUM_SIZES 3 |
| 58 | |
| 59 | |
| 60 | /* |
| 61 | * Default numbers of packets for IPP drivers. These values are chosen |
| 62 | * such that CIPP1 will not overflow its L2 cache. |
| 63 | */ |
| 64 | |
| 65 | /** The default number of small packets. */ |
| 66 | #define NETIO_DEFAULT_SMALL_PACKETS 2750 |
| 67 | /** The default number of large packets. */ |
| 68 | #define NETIO_DEFAULT_LARGE_PACKETS 2500 |
| 69 | /** The default number of jumbo packets. */ |
| 70 | #define NETIO_DEFAULT_JUMBO_PACKETS 250 |
| 71 | |
| 72 | |
| 73 | /** Log2 of the size of a memory arena. */ |
| 74 | #define NETIO_ARENA_SHIFT 24 /* 16 MB */ |
| 75 | /** Size of a memory arena. */ |
| 76 | #define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) |
| 77 | |
| 78 | |
| 79 | /** A queue of packets. |
| 80 | * |
| 81 | * This structure partially defines a queue of packets waiting to be |
| 82 | * processed. The queue as a whole is written to by an interrupt handler and |
| 83 | * read by non-interrupt code; this data structure is what's touched by the |
| 84 | * interrupt handler. The other part of the queue state, the read offset, is |
| 85 | * kept in user space, not in hypervisor space, so it is in a separate data |
| 86 | * structure. |
| 87 | * |
| 88 | * The read offset (__packet_receive_read in the user part of the queue |
| 89 | * structure) points to the next packet to be read. When the read offset is |
| 90 | * equal to the write offset, the queue is empty; therefore the queue must |
| 91 | * contain one more slot than the required maximum queue size. |
| 92 | * |
| 93 | * Here's an example of all 3 state variables and what they mean. All |
| 94 | * pointers move left to right. |
| 95 | * |
| 96 | * @code |
| 97 | * I I V V V V I I I I |
| 98 | * 0 1 2 3 4 5 6 7 8 9 10 |
| 99 | * ^ ^ ^ ^ |
| 100 | * | | | |
| 101 | * | | __last_packet_plus_one |
| 102 | * | __buffer_write |
| 103 | * __packet_receive_read |
| 104 | * @endcode |
| 105 | * |
| 106 | * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one |
| 107 | * = 10). The read pointer is at 2, and the write pointer is at 6; thus, |
| 108 | * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining |
| 109 | * slots are invalid (do not contain a packet). |
| 110 | */ |
| 111 | typedef struct { |
| 112 | /** Byte offset of the next notify packet to be written: zero for the first |
| 113 | * packet on the queue, sizeof (netio_pkt_t) for the second packet on the |
| 114 | * queue, etc. */ |
| 115 | volatile uint32_t __packet_write; |
| 116 | |
| 117 | /** Offset of the packet after the last valid packet (i.e., when any |
| 118 | * pointer is incremented to this value, it wraps back to zero). */ |
| 119 | uint32_t __last_packet_plus_one; |
| 120 | } |
| 121 | __netio_packet_queue_t; |
| 122 | |
| 123 | |
| 124 | /** A queue of buffers. |
| 125 | * |
| 126 | * This structure partially defines a queue of empty buffers which have been |
| 127 | * obtained via requests to the IPP. (The elements of the queue are packet |
| 128 | * handles, which are transformed into a full netio_pkt_t when the buffer is |
| 129 | * retrieved.) The queue as a whole is written to by an interrupt handler and |
| 130 | * read by non-interrupt code; this data structure is what's touched by the |
| 131 | * interrupt handler. The other parts of the queue state, the read offset and |
| 132 | * requested write offset, are kept in user space, not in hypervisor space, so |
| 133 | * they are in a separate data structure. |
| 134 | * |
| 135 | * The read offset (__buffer_read in the user part of the queue structure) |
| 136 | * points to the next buffer to be read. When the read offset is equal to the |
| 137 | * write offset, the queue is empty; therefore the queue must contain one more |
| 138 | * slot than the required maximum queue size. |
| 139 | * |
| 140 | * The requested write offset (__buffer_requested_write in the user part of |
| 141 | * the queue structure) points to the slot which will hold the next buffer we |
| 142 | * request from the IPP, once we get around to sending such a request. When |
| 143 | * the requested write offset is equal to the write offset, no requests for |
| 144 | * new buffers are outstanding; when the requested write offset is one greater |
| 145 | * than the read offset, no more requests may be sent. |
| 146 | * |
| 147 | * Note that, unlike the packet_queue, the buffer_queue places incoming |
| 148 | * buffers at decreasing addresses. This makes the check for "is it time to |
| 149 | * wrap the buffer pointer" cheaper in the assembly code which receives new |
| 150 | * buffers, and means that the value which defines the queue size, |
| 151 | * __last_buffer, is different than in the packet queue. Also, the offset |
| 152 | * used in the packet_queue is already scaled by the size of a packet; here we |
| 153 | * use unscaled slot indices for the offsets. (These differences are |
| 154 | * historical, and in the future it's possible that the packet_queue will look |
| 155 | * more like this queue.) |
| 156 | * |
| 157 | * @code |
| 158 | * Here's an example of all 4 state variables and what they mean. Remember: |
| 159 | * all pointers move right to left. |
| 160 | * |
| 161 | * V V V I I R R V V V |
| 162 | * 0 1 2 3 4 5 6 7 8 9 |
| 163 | * ^ ^ ^ ^ |
| 164 | * | | | | |
| 165 | * | | | __last_buffer |
| 166 | * | | __buffer_write |
| 167 | * | __buffer_requested_write |
| 168 | * __buffer_read |
| 169 | * @endcode |
| 170 | * |
| 171 | * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). |
| 172 | * The read pointer is at 2, and the write pointer is at 6; thus, there are |
| 173 | * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write |
| 174 | * pointer is at 4; thus, requests have been made to the IPP for buffers which |
| 175 | * will be placed in slots 6 and 5 when they arrive. Finally, the remaining |
| 176 | * slots are invalid (do not contain a buffer). |
| 177 | */ |
| 178 | typedef struct |
| 179 | { |
| 180 | /** Ordinal number of the next buffer to be written: 0 for the first slot in |
| 181 | * the queue, 1 for the second slot in the queue, etc. */ |
| 182 | volatile uint32_t __buffer_write; |
| 183 | |
| 184 | /** Ordinal number of the last buffer (i.e., when any pointer is decremented |
| 185 | * below zero, it is reloaded with this value). */ |
| 186 | uint32_t __last_buffer; |
| 187 | } |
| 188 | __netio_buffer_queue_t; |
| 189 | |
| 190 | |
| 191 | /** |
| 192 | * An object for providing Ethernet packets to a process. |
| 193 | */ |
| 194 | typedef struct __netio_queue_impl_t |
| 195 | { |
| 196 | /** The queue of packets waiting to be received. */ |
| 197 | __netio_packet_queue_t __packet_receive_queue; |
| 198 | /** The intr bit mask that IDs this device. */ |
| 199 | unsigned int __intr_id; |
| 200 | /** Offset to queues of empty buffers, one per size. */ |
| 201 | uint32_t __buffer_queue[NETIO_NUM_SIZES]; |
| 202 | /** The address of the first EPP tile, or -1 if no EPP. */ |
| 203 | /* ISSUE: Actually this is always "0" or "~0". */ |
| 204 | uint32_t __epp_location; |
| 205 | /** The queue ID that this queue represents. */ |
| 206 | unsigned int __queue_id; |
| 207 | /** Number of acknowledgements received. */ |
| 208 | volatile uint32_t __acks_received; |
| 209 | /** Last completion number received for packet_sendv. */ |
| 210 | volatile uint32_t __last_completion_rcv; |
| 211 | /** Number of packets allowed to be outstanding. */ |
| 212 | uint32_t __max_outstanding; |
| 213 | /** First VA available for packets. */ |
| 214 | void* __va_0; |
| 215 | /** First VA in second range available for packets. */ |
| 216 | void* __va_1; |
| 217 | /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ |
| 218 | uint32_t __padding[3]; |
| 219 | /** The packets themselves. */ |
| 220 | netio_pkt_t __packets[0]; |
| 221 | } |
| 222 | netio_queue_impl_t; |
| 223 | |
| 224 | |
| 225 | /** |
| 226 | * An object for managing the user end of a NetIO queue. |
| 227 | */ |
| 228 | typedef struct __netio_queue_user_impl_t |
| 229 | { |
| 230 | /** The next incoming packet to be read. */ |
| 231 | uint32_t __packet_receive_read; |
| 232 | /** The next empty buffers to be read, one index per size. */ |
| 233 | uint8_t __buffer_read[NETIO_NUM_SIZES]; |
| 234 | /** Where the empty buffer we next request from the IPP will go, one index |
| 235 | * per size. */ |
| 236 | uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; |
| 237 | /** PCIe interface flag. */ |
| 238 | uint8_t __pcie; |
| 239 | /** Number of packets left to be received before we send a credit update. */ |
| 240 | uint32_t __receive_credit_remaining; |
| 241 | /** Value placed in __receive_credit_remaining when it reaches zero. */ |
| 242 | uint32_t __receive_credit_interval; |
| 243 | /** First fast I/O routine index. */ |
| 244 | uint32_t __fastio_index; |
| 245 | /** Number of acknowledgements expected. */ |
| 246 | uint32_t __acks_outstanding; |
| 247 | /** Last completion number requested. */ |
| 248 | uint32_t __last_completion_req; |
| 249 | /** File descriptor for driver. */ |
| 250 | int __fd; |
| 251 | } |
| 252 | netio_queue_user_impl_t; |
| 253 | |
| 254 | |
| 255 | #define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ |
| 256 | #define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ |
| 257 | |
| 258 | |
| 259 | /** Internal structure used to convey packet send information to the |
| 260 | * hypervisor. FIXME: Actually, it's not used for that anymore, but |
| 261 | * netio_packet_send() still uses it internally. |
| 262 | */ |
| 263 | typedef struct |
| 264 | { |
| 265 | uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ |
| 266 | uint16_t transfer_size; /**< Size of packet */ |
| 267 | uint32_t va; /**< VA of start of packet */ |
| 268 | __netio_pkt_handle_t handle; /**< Packet handle */ |
| 269 | uint32_t csum0; /**< First checksum word */ |
| 270 | uint32_t csum1; /**< Second checksum word */ |
| 271 | } |
| 272 | __netio_send_cmd_t; |
| 273 | |
| 274 | |
| 275 | /** Flags used in two contexts: |
| 276 | * - As the "flags" member in the __netio_send_cmd_t, above; used only |
| 277 | * for netio_pkt_send_{prepare,commit}. |
| 278 | * - As part of the flags passed to the various send packet fast I/O calls. |
| 279 | */ |
| 280 | |
| 281 | /** Need acknowledgement on this packet. Note that some code in the |
| 282 | * normal send_pkt fast I/O handler assumes that this is equal to 1. */ |
| 283 | #define __NETIO_SEND_FLG_ACK 0x1 |
| 284 | |
| 285 | /** Do checksum on this packet. (Only used with the __netio_send_cmd_t; |
| 286 | * normal packet sends use a special fast I/O index to denote checksumming, |
| 287 | * and multi-segment sends test the checksum descriptor.) */ |
| 288 | #define __NETIO_SEND_FLG_CSUM 0x2 |
| 289 | |
| 290 | /** Get a completion on this packet. Only used with multi-segment sends. */ |
| 291 | #define __NETIO_SEND_FLG_COMPLETION 0x4 |
| 292 | |
| 293 | /** Position of the number-of-extra-segments value in the flags word. |
| 294 | Only used with multi-segment sends. */ |
| 295 | #define __NETIO_SEND_FLG_XSEG_SHIFT 3 |
| 296 | |
| 297 | /** Width of the number-of-extra-segments value in the flags word. */ |
| 298 | #define __NETIO_SEND_FLG_XSEG_WIDTH 2 |
| 299 | |
| 300 | #endif /* __DRV_XGBE_IMPL_H__ */ |