drbd: introduce WRITE_SAME support

We will support WRITE_SAME, if
 * all peers support WRITE_SAME (both in kernel and DRBD version),
 * all peer devices support WRITE_SAME
 * logical_block_size is identical on all peers.

We may at some point introduce a fallback on the receiving side
for devices/kernels that do not support WRITE_SAME,
by open-coding a submit loop. But not yet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index 95ca458..4d29680 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -64,6 +64,11 @@
 	P_RS_THIN_REQ         = 0x32, /* Request a block for resync or reply P_RS_DEALLOCATED */
 	P_RS_DEALLOCATED      = 0x33, /* Contains only zeros on sync source node */
 
+	/* REQ_WRITE_SAME.
+	 * On a receiving side without REQ_WRITE_SAME,
+	 * we may fall back to an opencoded loop instead. */
+	P_WSAME               = 0x34,
+
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAX_OPT_CMD	      = 0x101,
 
@@ -110,8 +115,11 @@
 	u32	  pad;
 } __packed;
 
-/* these defines must not be changed without changing the protocol version */
-#define DP_HARDBARRIER	      1 /* depricated */
+/* These defines must not be changed without changing the protocol version.
+ * New defines may only be introduced together with protocol version bump or
+ * new protocol feature flags.
+ */
+#define DP_HARDBARRIER	      1 /* no longer used */
 #define DP_RW_SYNC	      2 /* equals REQ_SYNC    */
 #define DP_MAY_SET_IN_SYNC    4
 #define DP_UNPLUG             8 /* not used anymore   */
@@ -120,6 +128,7 @@
 #define DP_DISCARD           64 /* equals REQ_DISCARD */
 #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
 #define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
+#define DP_WSAME            512 /* equiv. REQ_WRITE_SAME */
 
 struct p_data {
 	u64	    sector;    /* 64 bits sector number */
@@ -133,6 +142,11 @@
 	u32	    size;	/* == bio->bi_size */
 } __packed;
 
+struct p_wsame {
+	struct p_data p_data;
+	u32           size;     /* == bio->bi_size */
+} __packed;
+
 /*
  * commands which share a struct:
  *  p_block_ack:
@@ -164,8 +178,23 @@
  *   ReportParams
  */
 
-#define FF_TRIM      1
-#define FF_THIN_RESYNC 2
+/* supports TRIM/DISCARD on the "wire" protocol */
+#define DRBD_FF_TRIM 1
+
+/* Detect all-zeros during resync, and rather TRIM/UNMAP/DISCARD those blocks
+ * instead of fully allocate a supposedly thin volume on initial resync */
+#define DRBD_FF_THIN_RESYNC 2
+
+/* supports REQ_WRITE_SAME on the "wire" protocol.
+ * Note: this flag is overloaded,
+ * its presence also
+ *   - indicates support for 128 MiB "batch bios",
+ *     max discard size of 128 MiB
+ *     instead of 4M before that.
+ *   - indicates that we exchange additional settings in p_sizes
+ *     drbd_send_sizes()/receive_sizes()
+ */
+#define DRBD_FF_WSAME 4
 
 struct p_connection_features {
 	u32 protocol_min;
@@ -240,6 +269,40 @@
 	u64	    uuid;
 } __packed;
 
+/* optional queue_limits if (agreed_features & DRBD_FF_WSAME)
+ * see also struct queue_limits, as of late 2015 */
+struct o_qlim {
+	/* we don't need it yet, but we may as well communicate it now */
+	u32 physical_block_size;
+
+	/* so the original in struct queue_limits is unsigned short,
+	 * but I'd have to put in padding anyways. */
+	u32 logical_block_size;
+
+	/* One incoming bio becomes one DRBD request,
+	 * which may be translated to several bio on the receiving side.
+	 * We don't need to communicate chunk/boundary/segment ... limits.
+	 */
+
+	/* various IO hints may be useful with "diskless client" setups */
+	u32 alignment_offset;
+	u32 io_min;
+	u32 io_opt;
+
+	/* We may need to communicate integrity stuff at some point,
+	 * but let's not get ahead of ourselves. */
+
+	/* Backend discard capabilities.
+	 * Receiving side uses "blkdev_issue_discard()", no need to communicate
+	 * more specifics.  If the backend cannot do discards, the DRBD peer
+	 * may fall back to blkdev_issue_zeroout().
+	 */
+	u8 discard_enabled;
+	u8 discard_zeroes_data;
+	u8 write_same_capable;
+	u8 _pad;
+} __packed;
+
 struct p_sizes {
 	u64	    d_size;  /* size of disk */
 	u64	    u_size;  /* user requested size */
@@ -247,6 +310,9 @@
 	u32	    max_bio_size;  /* Maximal size of a BIO */
 	u16	    queue_order_type;  /* not yet implemented in DRBD*/
 	u16	    dds_flags; /* use enum dds_flags here. */
+
+	/* optional queue_limits if (agreed_features & DRBD_FF_WSAME) */
+	struct o_qlim qlim[0];
 } __packed;
 
 struct p_state {