Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 1 | /* |
| 2 | * iSER transport for the Open iSCSI Initiator & iSER transport internals |
| 3 | * |
| 4 | * Copyright (C) 2004 Dmitry Yusupov |
| 5 | * Copyright (C) 2004 Alex Aizman |
| 6 | * Copyright (C) 2005 Mike Christie |
| 7 | * based on code maintained by open-iscsi@googlegroups.com |
| 8 | * |
| 9 | * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. |
| 10 | * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. |
| 11 | * |
| 12 | * This software is available to you under a choice of one of two |
| 13 | * licenses. You may choose to be licensed under the terms of the GNU |
| 14 | * General Public License (GPL) Version 2, available from the file |
| 15 | * COPYING in the main directory of this source tree, or the |
| 16 | * OpenIB.org BSD license below: |
| 17 | * |
| 18 | * Redistribution and use in source and binary forms, with or |
| 19 | * without modification, are permitted provided that the following |
| 20 | * conditions are met: |
| 21 | * |
| 22 | * - Redistributions of source code must retain the above |
| 23 | * copyright notice, this list of conditions and the following |
| 24 | * disclaimer. |
| 25 | * |
| 26 | * - Redistributions in binary form must reproduce the above |
| 27 | * copyright notice, this list of conditions and the following |
| 28 | * disclaimer in the documentation and/or other materials |
| 29 | * provided with the distribution. |
| 30 | * |
| 31 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 32 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 33 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 34 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 35 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 36 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 37 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 38 | * SOFTWARE. |
| 39 | * |
| 40 | * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $ |
| 41 | */ |
| 42 | #ifndef __ISCSI_ISER_H__ |
| 43 | #define __ISCSI_ISER_H__ |
| 44 | |
| 45 | #include <linux/types.h> |
| 46 | #include <linux/net.h> |
| 47 | #include <scsi/libiscsi.h> |
| 48 | #include <scsi/scsi_transport_iscsi.h> |
| 49 | |
| 50 | #include <linux/wait.h> |
| 51 | #include <linux/sched.h> |
| 52 | #include <linux/list.h> |
| 53 | #include <linux/slab.h> |
| 54 | #include <linux/dma-mapping.h> |
| 55 | #include <linux/mutex.h> |
| 56 | #include <linux/mempool.h> |
| 57 | #include <linux/uio.h> |
| 58 | |
| 59 | #include <linux/socket.h> |
| 60 | #include <linux/in.h> |
| 61 | #include <linux/in6.h> |
| 62 | |
| 63 | #include <rdma/ib_verbs.h> |
| 64 | #include <rdma/ib_fmr_pool.h> |
| 65 | #include <rdma/rdma_cm.h> |
| 66 | |
| 67 | #define DRV_NAME "iser" |
| 68 | #define PFX DRV_NAME ": " |
| 69 | #define DRV_VER "0.1" |
| 70 | #define DRV_DATE "May 7th, 2006" |
| 71 | |
| 72 | #define iser_dbg(fmt, arg...) \ |
| 73 | do { \ |
| 74 | if (iser_debug_level > 0) \ |
| 75 | printk(KERN_DEBUG PFX "%s:" fmt,\ |
| 76 | __func__ , ## arg); \ |
| 77 | } while (0) |
| 78 | |
| 79 | #define iser_err(fmt, arg...) \ |
| 80 | do { \ |
| 81 | printk(KERN_ERR PFX "%s:" fmt, \ |
| 82 | __func__ , ## arg); \ |
| 83 | } while (0) |
| 84 | |
Erez Zilber | 8dfa087 | 2006-09-11 12:22:30 +0300 | [diff] [blame] | 85 | #define SHIFT_4K 12 |
| 86 | #define SIZE_4K (1UL << SHIFT_4K) |
| 87 | #define MASK_4K (~(SIZE_4K-1)) |
| 88 | |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 89 | /* support upto 512KB in one RDMA */ |
Erez Zilber | 8dfa087 | 2006-09-11 12:22:30 +0300 | [diff] [blame] | 90 | #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 91 | #define ISCSI_ISER_MAX_LUN 256 |
| 92 | #define ISCSI_ISER_MAX_CMD_LEN 16 |
| 93 | |
| 94 | /* QP settings */ |
| 95 | /* Maximal bounds on received asynchronous PDUs */ |
| 96 | #define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */ |
| 97 | |
| 98 | #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * |
| 99 | * SCSI_TMFUNC(2), LOGOUT(1) */ |
| 100 | |
| 101 | #define ISER_QP_MAX_RECV_DTOS (ISCSI_XMIT_CMDS_MAX + \ |
| 102 | ISER_MAX_RX_MISC_PDUS + \ |
| 103 | ISER_MAX_TX_MISC_PDUS) |
| 104 | |
| 105 | /* the max TX (send) WR supported by the iSER QP is defined by * |
| 106 | * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * |
| 107 | * to have at max for SCSI command. The tx posting & completion handling code * |
| 108 | * supports -EAGAIN scheme where tx is suspended till the QP has room for more * |
| 109 | * send WR. D=8 comes from 64K/8K */ |
| 110 | |
| 111 | #define ISER_INFLIGHT_DATAOUTS 8 |
| 112 | |
| 113 | #define ISER_QP_MAX_REQ_DTOS (ISCSI_XMIT_CMDS_MAX * \ |
| 114 | (1 + ISER_INFLIGHT_DATAOUTS) + \ |
| 115 | ISER_MAX_TX_MISC_PDUS + \ |
| 116 | ISER_MAX_RX_MISC_PDUS) |
| 117 | |
| 118 | #define ISER_VER 0x10 |
| 119 | #define ISER_WSV 0x08 |
| 120 | #define ISER_RSV 0x04 |
| 121 | |
| 122 | struct iser_hdr { |
| 123 | u8 flags; |
| 124 | u8 rsvd[3]; |
| 125 | __be32 write_stag; /* write rkey */ |
| 126 | __be64 write_va; |
| 127 | __be32 read_stag; /* read rkey */ |
| 128 | __be64 read_va; |
| 129 | } __attribute__((packed)); |
| 130 | |
| 131 | |
| 132 | /* Length of an object name string */ |
| 133 | #define ISER_OBJECT_NAME_SIZE 64 |
| 134 | |
| 135 | enum iser_ib_conn_state { |
| 136 | ISER_CONN_INIT, /* descriptor allocd, no conn */ |
| 137 | ISER_CONN_PENDING, /* in the process of being established */ |
| 138 | ISER_CONN_UP, /* up and running */ |
| 139 | ISER_CONN_TERMINATING, /* in the process of being terminated */ |
| 140 | ISER_CONN_DOWN, /* shut down */ |
| 141 | ISER_CONN_STATES_NUM |
| 142 | }; |
| 143 | |
| 144 | enum iser_task_status { |
| 145 | ISER_TASK_STATUS_INIT = 0, |
| 146 | ISER_TASK_STATUS_STARTED, |
| 147 | ISER_TASK_STATUS_COMPLETED |
| 148 | }; |
| 149 | |
| 150 | enum iser_data_dir { |
| 151 | ISER_DIR_IN = 0, /* to initiator */ |
| 152 | ISER_DIR_OUT, /* from initiator */ |
| 153 | ISER_DIRS_NUM |
| 154 | }; |
| 155 | |
| 156 | struct iser_data_buf { |
| 157 | void *buf; /* pointer to the sg list */ |
| 158 | unsigned int size; /* num entries of this sg */ |
| 159 | unsigned long data_len; /* total data len */ |
| 160 | unsigned int dma_nents; /* returned by dma_map_sg */ |
| 161 | char *copy_buf; /* allocated copy buf for SGs unaligned * |
| 162 | * for rdma which are copied */ |
| 163 | struct scatterlist sg_single; /* SG-ified clone of a non SG SC or * |
| 164 | * unaligned SG */ |
| 165 | }; |
| 166 | |
| 167 | /* fwd declarations */ |
| 168 | struct iser_device; |
| 169 | struct iscsi_iser_conn; |
| 170 | struct iscsi_iser_cmd_task; |
| 171 | |
| 172 | struct iser_mem_reg { |
| 173 | u32 lkey; |
| 174 | u32 rkey; |
| 175 | u64 va; |
| 176 | u64 len; |
| 177 | void *mem_h; |
Erez Zilber | d811102 | 2006-09-11 12:26:33 +0300 | [diff] [blame] | 178 | int is_fmr; |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 179 | }; |
| 180 | |
| 181 | struct iser_regd_buf { |
| 182 | struct iser_mem_reg reg; /* memory registration info */ |
| 183 | void *virt_addr; |
| 184 | struct iser_device *device; /* device->device for dma_unmap */ |
Ralph Campbell | 5180311 | 2006-12-12 14:31:00 -0800 | [diff] [blame^] | 185 | u64 dma_addr; /* if non zero, addr for dma_unmap */ |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 186 | enum dma_data_direction direction; /* direction for dma_unmap */ |
| 187 | unsigned int data_size; |
| 188 | atomic_t ref_count; /* refcount, freed when dec to 0 */ |
| 189 | }; |
| 190 | |
| 191 | #define MAX_REGD_BUF_VECTOR_LEN 2 |
| 192 | |
| 193 | struct iser_dto { |
| 194 | struct iscsi_iser_cmd_task *ctask; |
Erez Zilber | 87e8df7 | 2006-09-27 15:27:10 +0300 | [diff] [blame] | 195 | struct iser_conn *ib_conn; |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 196 | int notify_enable; |
| 197 | |
| 198 | /* vector of registered buffers */ |
| 199 | unsigned int regd_vector_len; |
| 200 | struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN]; |
| 201 | |
| 202 | /* offset into the registered buffer may be specified */ |
| 203 | unsigned int offset[MAX_REGD_BUF_VECTOR_LEN]; |
| 204 | |
| 205 | /* a smaller size may be specified, if 0, then full size is used */ |
| 206 | unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN]; |
| 207 | }; |
| 208 | |
| 209 | enum iser_desc_type { |
| 210 | ISCSI_RX, |
| 211 | ISCSI_TX_CONTROL , |
| 212 | ISCSI_TX_SCSI_COMMAND, |
| 213 | ISCSI_TX_DATAOUT |
| 214 | }; |
| 215 | |
| 216 | struct iser_desc { |
| 217 | struct iser_hdr iser_header; |
| 218 | struct iscsi_hdr iscsi_header; |
| 219 | struct iser_regd_buf hdr_regd_buf; |
| 220 | void *data; /* used by RX & TX_CONTROL */ |
| 221 | struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */ |
| 222 | enum iser_desc_type type; |
| 223 | struct iser_dto dto; |
| 224 | }; |
| 225 | |
| 226 | struct iser_device { |
| 227 | struct ib_device *ib_device; |
| 228 | struct ib_pd *pd; |
| 229 | struct ib_cq *cq; |
| 230 | struct ib_mr *mr; |
| 231 | struct tasklet_struct cq_tasklet; |
| 232 | struct list_head ig_list; /* entry in ig devices list */ |
| 233 | int refcount; |
| 234 | }; |
| 235 | |
| 236 | struct iser_conn { |
| 237 | struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ |
| 238 | enum iser_ib_conn_state state; /* rdma connection state */ |
| 239 | spinlock_t lock; /* used for state changes */ |
| 240 | struct iser_device *device; /* device context */ |
| 241 | struct rdma_cm_id *cma_id; /* CMA ID */ |
| 242 | struct ib_qp *qp; /* QP */ |
| 243 | struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ |
| 244 | int disc_evt_flag; /* disconn event delivered */ |
| 245 | wait_queue_head_t wait; /* waitq for conn/disconn */ |
| 246 | atomic_t post_recv_buf_count; /* posted rx count */ |
| 247 | atomic_t post_send_buf_count; /* posted tx count */ |
| 248 | struct work_struct comperror_work; /* conn term sleepable ctx*/ |
| 249 | char name[ISER_OBJECT_NAME_SIZE]; |
| 250 | struct iser_page_vec *page_vec; /* represents SG to fmr maps* |
| 251 | * maps serialized as tx is*/ |
| 252 | struct list_head conn_list; /* entry in ig conn list */ |
| 253 | }; |
| 254 | |
| 255 | struct iscsi_iser_conn { |
| 256 | struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ |
| 257 | struct iser_conn *ib_conn; /* iSER IB conn */ |
| 258 | |
| 259 | rwlock_t lock; |
| 260 | }; |
| 261 | |
| 262 | struct iscsi_iser_cmd_task { |
| 263 | struct iser_desc desc; |
| 264 | struct iscsi_iser_conn *iser_conn; |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 265 | enum iser_task_status status; |
| 266 | int command_sent; /* set if command sent */ |
| 267 | int dir[ISER_DIRS_NUM]; /* set if dir use*/ |
| 268 | struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ |
| 269 | struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ |
| 270 | struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ |
| 271 | }; |
| 272 | |
| 273 | struct iser_page_vec { |
| 274 | u64 *pages; |
| 275 | int length; |
| 276 | int offset; |
| 277 | int data_size; |
| 278 | }; |
| 279 | |
| 280 | struct iser_global { |
| 281 | struct mutex device_list_mutex;/* */ |
| 282 | struct list_head device_list; /* all iSER devices */ |
| 283 | struct mutex connlist_mutex; |
| 284 | struct list_head connlist; /* all iSER IB connections */ |
| 285 | |
Roland Dreier | e54f818 | 2006-11-29 15:33:07 -0800 | [diff] [blame] | 286 | struct kmem_cache *desc_cache; |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 287 | }; |
| 288 | |
| 289 | extern struct iser_global ig; |
| 290 | extern int iser_debug_level; |
| 291 | |
| 292 | /* allocate connection resources needed for rdma functionality */ |
| 293 | int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); |
| 294 | |
| 295 | int iser_send_control(struct iscsi_conn *conn, |
| 296 | struct iscsi_mgmt_task *mtask); |
| 297 | |
| 298 | int iser_send_command(struct iscsi_conn *conn, |
| 299 | struct iscsi_cmd_task *ctask); |
| 300 | |
| 301 | int iser_send_data_out(struct iscsi_conn *conn, |
| 302 | struct iscsi_cmd_task *ctask, |
| 303 | struct iscsi_data *hdr); |
| 304 | |
| 305 | void iscsi_iser_recv(struct iscsi_conn *conn, |
| 306 | struct iscsi_hdr *hdr, |
| 307 | char *rx_data, |
| 308 | int rx_data_len); |
| 309 | |
| 310 | int iser_conn_init(struct iser_conn **ib_conn); |
| 311 | |
| 312 | void iser_conn_terminate(struct iser_conn *ib_conn); |
| 313 | |
| 314 | void iser_conn_release(struct iser_conn *ib_conn); |
| 315 | |
| 316 | void iser_rcv_completion(struct iser_desc *desc, |
| 317 | unsigned long dto_xfer_len); |
| 318 | |
| 319 | void iser_snd_completion(struct iser_desc *desc); |
| 320 | |
| 321 | void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); |
| 322 | |
| 323 | void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask); |
| 324 | |
| 325 | void iser_dto_buffs_release(struct iser_dto *dto); |
| 326 | |
| 327 | int iser_regd_buff_release(struct iser_regd_buf *regd_buf); |
| 328 | |
| 329 | void iser_reg_single(struct iser_device *device, |
| 330 | struct iser_regd_buf *regd_buf, |
| 331 | enum dma_data_direction direction); |
| 332 | |
| 333 | int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, |
| 334 | enum iser_data_dir cmd_dir); |
| 335 | |
| 336 | void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, |
| 337 | enum iser_data_dir cmd_dir); |
| 338 | |
| 339 | int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *ctask, |
| 340 | enum iser_data_dir cmd_dir); |
| 341 | |
| 342 | int iser_connect(struct iser_conn *ib_conn, |
| 343 | struct sockaddr_in *src_addr, |
| 344 | struct sockaddr_in *dst_addr, |
| 345 | int non_blocking); |
| 346 | |
| 347 | int iser_reg_page_vec(struct iser_conn *ib_conn, |
| 348 | struct iser_page_vec *page_vec, |
| 349 | struct iser_mem_reg *mem_reg); |
| 350 | |
| 351 | void iser_unreg_mem(struct iser_mem_reg *mem_reg); |
| 352 | |
| 353 | int iser_post_recv(struct iser_desc *rx_desc); |
| 354 | int iser_post_send(struct iser_desc *tx_desc); |
| 355 | |
| 356 | int iser_conn_state_comp(struct iser_conn *ib_conn, |
| 357 | enum iser_ib_conn_state comp); |
Erez Zilber | 74a2078 | 2006-09-27 16:43:06 +0300 | [diff] [blame] | 358 | |
| 359 | int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, |
| 360 | struct iser_data_buf *data, |
| 361 | enum iser_data_dir iser_dir, |
| 362 | enum dma_data_direction dma_dir); |
| 363 | |
| 364 | void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask); |
Or Gerlitz | 49cd538 | 2006-05-11 10:00:21 +0300 | [diff] [blame] | 365 | #endif |