IB/core: Add support for masked atomic operations

 - Add new IB_WR_MASKED_ATOMIC_CMP_AND_SWP and IB_WR_MASKED_ATOMIC_FETCH_AND_ADD
   send opcodes that can be used to post "masked atomic compare and
   swap" and "masked atomic fetch and add" work request respectively.
 - Add masked_atomic_cap capability.
 - Add mask fields to atomic struct of ib_send_wr
 - Add new opcodes to ib_wc_opcode

The new operations are described more precisely below:

* Masked Compare and Swap (MskCmpSwap)

The MskCmpSwap atomic operation is an extension to the CmpSwap
operation defined in the IB spec.  MskCmpSwap allows the user to
select a portion of the 64 bit target data for the “compare” check as
well as to restrict the swap to a (possibly different) portion.  The
pseudo code below describes the operation:

| atomic_response = *va
| if (!((compare_add ^ *va) & compare_add_mask)) then
|     *va = (*va & ~(swap_mask)) | (swap & swap_mask)
|
| return atomic_response

The additional operands are carried in the Extended Transport Header.
Atomic response generation and packet format for MskCmpSwap is as for
standard IB Atomic operations.

* Masked Fetch and Add (MFetchAdd)

The MFetchAdd Atomic operation extends the functionality of the
standard IB FetchAdd by allowing the user to split the target into
multiple fields of selectable length. The atomic add is done
independently on each one of this fields. A bit set in the
field_boundary parameter specifies the field boundaries. The pseudo
code below describes the operation:

| bit_adder(ci, b1, b2, *co)
| {
|	value = ci + b1 + b2
|	*co = !!(value & 2)
|
|	return value & 1
| }
|
| #define MASK_IS_SET(mask, attr)      (!!((mask)&(attr)))
| bit_position = 1
| carry = 0
| atomic_response = 0
|
| for i = 0 to 63
| {
|         if ( i != 0 )
|                 bit_position =  bit_position << 1
|
|         bit_add_res = bit_adder(carry, MASK_IS_SET(*va, bit_position),
|                                 MASK_IS_SET(compare_add, bit_position), &new_carry)
|         if (bit_add_res)
|                 atomic_response |= bit_position
|
|         carry = ((new_carry) && (!MASK_IS_SET(compare_add_mask, bit_position)))
| }
|
| return atomic_response

Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a585e0f..310d314 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -136,6 +136,7 @@
 	int			max_qp_init_rd_atom;
 	int			max_ee_init_rd_atom;
 	enum ib_atomic_cap	atomic_cap;
+	enum ib_atomic_cap	masked_atomic_cap;
 	int			max_ee;
 	int			max_rdd;
 	int			max_mw;
@@ -467,6 +468,8 @@
 	IB_WC_LSO,
 	IB_WC_LOCAL_INV,
 	IB_WC_FAST_REG_MR,
+	IB_WC_MASKED_COMP_SWAP,
+	IB_WC_MASKED_FETCH_ADD,
 /*
  * Set value of IB_WC_RECV so consumers can test if a completion is a
  * receive by testing (opcode & IB_WC_RECV).
@@ -689,6 +692,8 @@
 	IB_WR_RDMA_READ_WITH_INV,
 	IB_WR_LOCAL_INV,
 	IB_WR_FAST_REG_MR,
+	IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
+	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
 };
 
 enum ib_send_flags {
@@ -731,6 +736,8 @@
 			u64	remote_addr;
 			u64	compare_add;
 			u64	swap;
+			u64	compare_add_mask;
+			u64	swap_mask;
 			u32	rkey;
 		} atomic;
 		struct {