Merge branch 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull iov_iter updates from Al Viro:

 - bio_{map,copy}_user_iov() series; those are cleanups - fixes from the
   same pile went into mainline (and stable) in late September.

 - fs/iomap.c iov_iter-related fixes

 - new primitive - iov_iter_for_each_range(), which applies a function
   to kernel-mapped segments of an iov_iter.

   Usable for kvec and bvec ones, the latter does kmap()/kunmap() around
   the callback. _Not_ usable for iovec- or pipe-backed iov_iter; the
   latter is not hard to fix if the need ever appears, the former is by
   design.

   Another related primitive will have to wait for the next cycle - it
   passes page + offset + size instead of pointer + size, and that one
   will be usable for everything _except_ kvec. Unfortunately, that one
   didn't get exposure in -next yet, so...

 - a bit more lustre iov_iter work, including a use case for
   iov_iter_for_each_range() (checksum calculation)

 - vhost/scsi leak fix in failure exit

 - misc cleanups and detritectomy...

* 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (21 commits)
  iomap_dio_actor(): fix iov_iter bugs
  switch ksocknal_lib_recv_...() to use of iov_iter_for_each_range()
  lustre: switch struct ksock_conn to iov_iter
  vhost/scsi: switch to iov_iter_get_pages()
  fix a page leak in vhost_scsi_iov_to_sgl() error recovery
  new primitive: iov_iter_for_each_range()
  lnet_return_rx_credits_locked: don't abuse list_entry
  xen: don't open-code iov_iter_kvec()
  orangefs: remove detritus from struct orangefs_kiocb_s
  kill iov_shorten()
  bio_alloc_map_data(): do bmd->iter setup right there
  bio_copy_user_iov(): saner bio size calculation
  bio_map_user_iov(): get rid of copying iov_iter
  bio_copy_from_iter(): get rid of copying iov_iter
  move more stuff down into bio_copy_user_iov()
  blk_rq_map_user_iov(): move iov_iter_advance() down
  bio_map_user_iov(): get rid of the iov_for_each()
  bio_map_user_iov(): move alignment check into the main loop
  don't rely upon subsequent bio_add_pc_page() calls failing
  ... and with iov_iter_get_pages_alloc() it becomes even simpler
  ...
diff --git a/block/bio.c b/block/bio.c
index 459cc85..228229f 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1062,14 +1062,21 @@
 	struct iovec iov[];
 };
 
-static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
+static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
 					       gfp_t gfp_mask)
 {
-	if (iov_count > UIO_MAXIOV)
+	struct bio_map_data *bmd;
+	if (data->nr_segs > UIO_MAXIOV)
 		return NULL;
 
-	return kmalloc(sizeof(struct bio_map_data) +
-		       sizeof(struct iovec) * iov_count, gfp_mask);
+	bmd = kmalloc(sizeof(struct bio_map_data) +
+		       sizeof(struct iovec) * data->nr_segs, gfp_mask);
+	if (!bmd)
+		return NULL;
+	memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
+	bmd->iter = *data;
+	bmd->iter.iov = bmd->iov;
+	return bmd;
 }
 
 /**
@@ -1080,7 +1087,7 @@
  * Copy all pages from iov_iter to bio.
  * Returns 0 on success, or error on failure.
  */
-static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
+static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
 {
 	int i;
 	struct bio_vec *bvec;
@@ -1091,9 +1098,9 @@
 		ret = copy_page_from_iter(bvec->bv_page,
 					  bvec->bv_offset,
 					  bvec->bv_len,
-					  &iter);
+					  iter);
 
-		if (!iov_iter_count(&iter))
+		if (!iov_iter_count(iter))
 			break;
 
 		if (ret < bvec->bv_len)
@@ -1187,40 +1194,18 @@
  */
 struct bio *bio_copy_user_iov(struct request_queue *q,
 			      struct rq_map_data *map_data,
-			      const struct iov_iter *iter,
+			      struct iov_iter *iter,
 			      gfp_t gfp_mask)
 {
 	struct bio_map_data *bmd;
 	struct page *page;
 	struct bio *bio;
-	int i, ret;
-	int nr_pages = 0;
+	int i = 0, ret;
+	int nr_pages;
 	unsigned int len = iter->count;
 	unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
 
-	for (i = 0; i < iter->nr_segs; i++) {
-		unsigned long uaddr;
-		unsigned long end;
-		unsigned long start;
-
-		uaddr = (unsigned long) iter->iov[i].iov_base;
-		end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
-			>> PAGE_SHIFT;
-		start = uaddr >> PAGE_SHIFT;
-
-		/*
-		 * Overflow, abort
-		 */
-		if (end < start)
-			return ERR_PTR(-EINVAL);
-
-		nr_pages += end - start;
-	}
-
-	if (offset)
-		nr_pages++;
-
-	bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
+	bmd = bio_alloc_map_data(iter, gfp_mask);
 	if (!bmd)
 		return ERR_PTR(-ENOMEM);
 
@@ -1230,9 +1215,10 @@
 	 * shortlived one.
 	 */
 	bmd->is_our_pages = map_data ? 0 : 1;
-	memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
-	bmd->iter = *iter;
-	bmd->iter.iov = bmd->iov;
+
+	nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+	if (nr_pages > BIO_MAX_PAGES)
+		nr_pages = BIO_MAX_PAGES;
 
 	ret = -ENOMEM;
 	bio = bio_kmalloc(gfp_mask, nr_pages);
@@ -1281,17 +1267,24 @@
 	if (ret)
 		goto cleanup;
 
+	if (map_data)
+		map_data->offset += bio->bi_iter.bi_size;
+
 	/*
 	 * success
 	 */
 	if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
 	    (map_data && map_data->from_user)) {
-		ret = bio_copy_from_iter(bio, *iter);
+		ret = bio_copy_from_iter(bio, iter);
 		if (ret)
 			goto cleanup;
+	} else {
+		iov_iter_advance(iter, bio->bi_iter.bi_size);
 	}
 
 	bio->bi_private = bmd;
+	if (map_data && map_data->null_mapped)
+		bio_set_flag(bio, BIO_NULL_MAPPED);
 	return bio;
 cleanup:
 	if (!map_data)
@@ -1312,111 +1305,74 @@
  *	device. Returns an error pointer in case of error.
  */
 struct bio *bio_map_user_iov(struct request_queue *q,
-			     const struct iov_iter *iter,
+			     struct iov_iter *iter,
 			     gfp_t gfp_mask)
 {
 	int j;
-	int nr_pages = 0;
-	struct page **pages;
 	struct bio *bio;
-	int cur_page = 0;
-	int ret, offset;
-	struct iov_iter i;
-	struct iovec iov;
+	int ret;
 	struct bio_vec *bvec;
 
-	iov_for_each(iov, i, *iter) {
-		unsigned long uaddr = (unsigned long) iov.iov_base;
-		unsigned long len = iov.iov_len;
-		unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		unsigned long start = uaddr >> PAGE_SHIFT;
-
-		/*
-		 * Overflow, abort
-		 */
-		if (end < start)
-			return ERR_PTR(-EINVAL);
-
-		nr_pages += end - start;
-		/*
-		 * buffer must be aligned to at least logical block size for now
-		 */
-		if (uaddr & queue_dma_alignment(q))
-			return ERR_PTR(-EINVAL);
-	}
-
-	if (!nr_pages)
+	if (!iov_iter_count(iter))
 		return ERR_PTR(-EINVAL);
 
-	bio = bio_kmalloc(gfp_mask, nr_pages);
+	bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
 
-	ret = -ENOMEM;
-	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
-	if (!pages)
-		goto out;
+	while (iov_iter_count(iter)) {
+		struct page **pages;
+		ssize_t bytes;
+		size_t offs, added = 0;
+		int npages;
 
-	iov_for_each(iov, i, *iter) {
-		unsigned long uaddr = (unsigned long) iov.iov_base;
-		unsigned long len = iov.iov_len;
-		unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		unsigned long start = uaddr >> PAGE_SHIFT;
-		const int local_nr_pages = end - start;
-		const int page_limit = cur_page + local_nr_pages;
-
-		ret = get_user_pages_fast(uaddr, local_nr_pages,
-				(iter->type & WRITE) != WRITE,
-				&pages[cur_page]);
-		if (unlikely(ret < local_nr_pages)) {
-			for (j = cur_page; j < page_limit; j++) {
-				if (!pages[j])
-					break;
-				put_page(pages[j]);
-			}
-			ret = -EFAULT;
+		bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
+		if (unlikely(bytes <= 0)) {
+			ret = bytes ? bytes : -EFAULT;
 			goto out_unmap;
 		}
 
-		offset = offset_in_page(uaddr);
-		for (j = cur_page; j < page_limit; j++) {
-			unsigned int bytes = PAGE_SIZE - offset;
-			unsigned short prev_bi_vcnt = bio->bi_vcnt;
+		npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
 
-			if (len <= 0)
-				break;
-			
-			if (bytes > len)
-				bytes = len;
+		if (unlikely(offs & queue_dma_alignment(q))) {
+			ret = -EINVAL;
+			j = 0;
+		} else {
+			for (j = 0; j < npages; j++) {
+				struct page *page = pages[j];
+				unsigned int n = PAGE_SIZE - offs;
+				unsigned short prev_bi_vcnt = bio->bi_vcnt;
 
-			/*
-			 * sorry...
-			 */
-			if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
-					    bytes)
-				break;
+				if (n > bytes)
+					n = bytes;
 
-			/*
-			 * check if vector was merged with previous
-			 * drop page reference if needed
-			 */
-			if (bio->bi_vcnt == prev_bi_vcnt)
-				put_page(pages[j]);
+				if (!bio_add_pc_page(q, bio, page, n, offs))
+					break;
 
-			len -= bytes;
-			offset = 0;
+				/*
+				 * check if vector was merged with previous
+				 * drop page reference if needed
+				 */
+				if (bio->bi_vcnt == prev_bi_vcnt)
+					put_page(page);
+
+				added += n;
+				bytes -= n;
+				offs = 0;
+			}
+			iov_iter_advance(iter, added);
 		}
-
-		cur_page = j;
 		/*
 		 * release the pages we didn't map into the bio, if any
 		 */
-		while (j < page_limit)
+		while (j < npages)
 			put_page(pages[j++]);
+		kvfree(pages);
+		/* couldn't stuff something into bio? */
+		if (bytes)
+			break;
 	}
 
-	kfree(pages);
-
 	bio_set_flag(bio, BIO_USER_MAPPED);
 
 	/*
@@ -1432,8 +1388,6 @@
 	bio_for_each_segment_all(bvec, bio, j) {
 		put_page(bvec->bv_page);
 	}
- out:
-	kfree(pages);
 	bio_put(bio);
 	return ERR_PTR(ret);
 }
diff --git a/block/blk-map.c b/block/blk-map.c
index d5251ed..b21f8e8 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -67,13 +67,6 @@
 	bio->bi_opf &= ~REQ_OP_MASK;
 	bio->bi_opf |= req_op(rq);
 
-	if (map_data && map_data->null_mapped)
-		bio_set_flag(bio, BIO_NULL_MAPPED);
-
-	iov_iter_advance(iter, bio->bi_iter.bi_size);
-	if (map_data)
-		map_data->offset += bio->bi_iter.bi_size;
-
 	orig_bio = bio;
 
 	/*
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index f8ea523..986c2a4 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -1683,10 +1683,10 @@
 	case SOCKNAL_RX_LNET_PAYLOAD:
 		last_rcv = conn->ksnc_rx_deadline -
 			   cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
-		CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n",
+		CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
 		       libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
 		       &conn->ksnc_ipaddr, conn->ksnc_port,
-		       conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
+		       iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
 		       cfs_duration_sec(cfs_time_sub(cfs_time_current(),
 						     last_rcv)));
 		lnet_finalize(conn->ksnc_peer->ksnp_ni,
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index 35a7b39..d50ebdf 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -358,11 +358,7 @@
 	__u8               ksnc_rx_scheduled; /* being progressed */
 	__u8               ksnc_rx_state;     /* what is being read */
 	int                ksnc_rx_nob_left;  /* # bytes to next hdr/body */
-	int                ksnc_rx_nob_wanted;/* bytes actually wanted */
-	int                ksnc_rx_niov;      /* # iovec frags */
-	struct kvec        *ksnc_rx_iov;      /* the iovec frags */
-	int                ksnc_rx_nkiov;     /* # page frags */
-	struct bio_vec		*ksnc_rx_kiov;	/* the page frags */
+	struct iov_iter    ksnc_rx_to;		/* copy destination */
 	union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */
 	__u32              ksnc_rx_csum;      /* partial checksum for incoming
 					       * data
@@ -701,8 +697,7 @@
 int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
 int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
 void ksocknal_lib_eager_ack(struct ksock_conn *conn);
-int ksocknal_lib_recv_iov(struct ksock_conn *conn);
-int ksocknal_lib_recv_kiov(struct ksock_conn *conn);
+int ksocknal_lib_recv(struct ksock_conn *conn);
 int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
 				   int *rxmem, int *nagle);
 
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index a5f2ecb..27c56d5 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -250,19 +250,16 @@
 }
 
 static int
-ksocknal_recv_iov(struct ksock_conn *conn)
+ksocknal_recv_iter(struct ksock_conn *conn)
 {
-	struct kvec *iov = conn->ksnc_rx_iov;
 	int nob;
 	int rc;
 
-	LASSERT(conn->ksnc_rx_niov > 0);
-
 	/*
-	 * Never touch conn->ksnc_rx_iov or change connection
-	 * status inside ksocknal_lib_recv_iov
+	 * Never touch conn->ksnc_rx_to or change connection
+	 * status inside ksocknal_lib_recv
 	 */
-	rc = ksocknal_lib_recv_iov(conn);
+	rc = ksocknal_lib_recv(conn);
 
 	if (rc <= 0)
 		return rc;
@@ -276,69 +273,11 @@
 	mb();		       /* order with setting rx_started */
 	conn->ksnc_rx_started = 1;
 
-	conn->ksnc_rx_nob_wanted -= nob;
 	conn->ksnc_rx_nob_left -= nob;
 
-	do {
-		LASSERT(conn->ksnc_rx_niov > 0);
-
-		if (nob < (int)iov->iov_len) {
-			iov->iov_len -= nob;
-			iov->iov_base += nob;
-			return -EAGAIN;
-		}
-
-		nob -= iov->iov_len;
-		conn->ksnc_rx_iov = ++iov;
-		conn->ksnc_rx_niov--;
-	} while (nob);
-
-	return rc;
-}
-
-static int
-ksocknal_recv_kiov(struct ksock_conn *conn)
-{
-	struct bio_vec *kiov = conn->ksnc_rx_kiov;
-	int nob;
-	int rc;
-
-	LASSERT(conn->ksnc_rx_nkiov > 0);
-
-	/*
-	 * Never touch conn->ksnc_rx_kiov or change connection
-	 * status inside ksocknal_lib_recv_iov
-	 */
-	rc = ksocknal_lib_recv_kiov(conn);
-
-	if (rc <= 0)
-		return rc;
-
-	/* received something... */
-	nob = rc;
-
-	conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
-	conn->ksnc_rx_deadline =
-		cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
-	mb();		       /* order with setting rx_started */
-	conn->ksnc_rx_started = 1;
-
-	conn->ksnc_rx_nob_wanted -= nob;
-	conn->ksnc_rx_nob_left -= nob;
-
-	do {
-		LASSERT(conn->ksnc_rx_nkiov > 0);
-
-		if (nob < (int)kiov->bv_len) {
-			kiov->bv_offset += nob;
-			kiov->bv_len -= nob;
-			return -EAGAIN;
-		}
-
-		nob -= kiov->bv_len;
-		conn->ksnc_rx_kiov = ++kiov;
-		conn->ksnc_rx_nkiov--;
-	} while (nob);
+	iov_iter_advance(&conn->ksnc_rx_to, nob);
+	if (iov_iter_count(&conn->ksnc_rx_to))
+		return -EAGAIN;
 
 	return 1;
 }
@@ -348,7 +287,7 @@
 {
 	/*
 	 * Return 1 on success, 0 on EOF, < 0 on error.
-	 * Caller checks ksnc_rx_nob_wanted to determine
+	 * Caller checks ksnc_rx_to to determine
 	 * progress/completion.
 	 */
 	int rc;
@@ -365,11 +304,7 @@
 	}
 
 	for (;;) {
-		if (conn->ksnc_rx_niov)
-			rc = ksocknal_recv_iov(conn);
-		else
-			rc = ksocknal_recv_kiov(conn);
-
+		rc = ksocknal_recv_iter(conn);
 		if (rc <= 0) {
 			/* error/EOF or partial receive */
 			if (rc == -EAGAIN) {
@@ -383,7 +318,7 @@
 
 		/* Completed a fragment */
 
-		if (!conn->ksnc_rx_nob_wanted) {
+		if (!iov_iter_count(&conn->ksnc_rx_to)) {
 			rc = 1;
 			break;
 		}
@@ -1051,6 +986,7 @@
 ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
 {
 	static char ksocknal_slop_buffer[4096];
+	struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space;
 
 	int nob;
 	unsigned int niov;
@@ -1071,32 +1007,26 @@
 		case  KSOCK_PROTO_V2:
 		case  KSOCK_PROTO_V3:
 			conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
-			conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-			conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg;
-
-			conn->ksnc_rx_nob_wanted = offsetof(struct ksock_msg, ksm_u);
+			kvec->iov_base = &conn->ksnc_msg;
+			kvec->iov_len = offsetof(struct ksock_msg, ksm_u);
 			conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u);
-			conn->ksnc_rx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u);
+			iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
+					1, offsetof(struct ksock_msg, ksm_u));
 			break;
 
 		case KSOCK_PROTO_V1:
 			/* Receiving bare struct lnet_hdr */
 			conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
-			conn->ksnc_rx_nob_wanted = sizeof(struct lnet_hdr);
+			kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
+			kvec->iov_len = sizeof(struct lnet_hdr);
 			conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr);
-
-			conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-			conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
-			conn->ksnc_rx_iov[0].iov_len = sizeof(struct lnet_hdr);
+			iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
+					1, sizeof(struct lnet_hdr));
 			break;
 
 		default:
 			LBUG();
 		}
-		conn->ksnc_rx_niov = 1;
-
-		conn->ksnc_rx_kiov = NULL;
-		conn->ksnc_rx_nkiov = 0;
 		conn->ksnc_rx_csum = ~0;
 		return 1;
 	}
@@ -1107,15 +1037,14 @@
 	 */
 	conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
 	conn->ksnc_rx_nob_left = nob_to_skip;
-	conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
 	skipped = 0;
 	niov = 0;
 
 	do {
 		nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer));
 
-		conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
-		conn->ksnc_rx_iov[niov].iov_len  = nob;
+		kvec[niov].iov_base = ksocknal_slop_buffer;
+		kvec[niov].iov_len  = nob;
 		niov++;
 		skipped += nob;
 		nob_to_skip -= nob;
@@ -1123,16 +1052,14 @@
 	} while (nob_to_skip &&    /* mustn't overflow conn's rx iov */
 		 niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec));
 
-	conn->ksnc_rx_niov = niov;
-	conn->ksnc_rx_kiov = NULL;
-	conn->ksnc_rx_nkiov = 0;
-	conn->ksnc_rx_nob_wanted = skipped;
+	iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped);
 	return 0;
 }
 
 static int
 ksocknal_process_receive(struct ksock_conn *conn)
 {
+	struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space;
 	struct lnet_hdr *lhdr;
 	struct lnet_process_id *id;
 	int rc;
@@ -1146,7 +1073,7 @@
 		conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
 		conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
  again:
-	if (conn->ksnc_rx_nob_wanted) {
+	if (iov_iter_count(&conn->ksnc_rx_to)) {
 		rc = ksocknal_receive(conn);
 
 		if (rc <= 0) {
@@ -1171,7 +1098,7 @@
 			return (!rc ? -ESHUTDOWN : rc);
 		}
 
-		if (conn->ksnc_rx_nob_wanted) {
+		if (iov_iter_count(&conn->ksnc_rx_to)) {
 			/* short read */
 			return -EAGAIN;
 		}
@@ -1234,16 +1161,13 @@
 		}
 
 		conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
-		conn->ksnc_rx_nob_wanted = sizeof(struct ksock_lnet_msg);
 		conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg);
 
-		conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-		conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
-		conn->ksnc_rx_iov[0].iov_len = sizeof(struct ksock_lnet_msg);
+		kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
+		kvec->iov_len = sizeof(struct ksock_lnet_msg);
 
-		conn->ksnc_rx_niov = 1;
-		conn->ksnc_rx_kiov = NULL;
-		conn->ksnc_rx_nkiov = 0;
+		iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
+				1, sizeof(struct ksock_lnet_msg));
 
 		goto again;     /* read lnet header now */
 
@@ -1345,26 +1269,9 @@
 	LASSERT(to->nr_segs <= LNET_MAX_IOV);
 
 	conn->ksnc_cookie = msg;
-	conn->ksnc_rx_nob_wanted = iov_iter_count(to);
 	conn->ksnc_rx_nob_left = rlen;
 
-	if (to->type & ITER_KVEC) {
-		conn->ksnc_rx_nkiov = 0;
-		conn->ksnc_rx_kiov = NULL;
-		conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
-		conn->ksnc_rx_niov =
-			lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
-					 to->nr_segs, to->kvec,
-					 to->iov_offset, iov_iter_count(to));
-	} else {
-		conn->ksnc_rx_niov = 0;
-		conn->ksnc_rx_iov = NULL;
-		conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
-		conn->ksnc_rx_nkiov =
-			lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
-					 to->nr_segs, to->bvec,
-					 to->iov_offset, iov_iter_count(to));
-	}
+	conn->ksnc_rx_to = *to;
 
 	LASSERT(conn->ksnc_rx_scheduled);
 
@@ -2329,12 +2236,12 @@
 				     conn->ksnc_rx_deadline)) {
 			/* Timed out incomplete incoming message */
 			ksocknal_conn_addref(conn);
-			CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %d left %d\n",
+			CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n",
 				libcfs_id2str(peer->ksnp_id),
 				&conn->ksnc_ipaddr,
 				conn->ksnc_port,
 				conn->ksnc_rx_state,
-				conn->ksnc_rx_nob_wanted,
+				iov_iter_count(&conn->ksnc_rx_to),
 				conn->ksnc_rx_nob_left);
 			return conn;
 		}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 970140f..cb28dd2 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -162,94 +162,39 @@
 			  sizeof(opt));
 }
 
-int
-ksocknal_lib_recv_iov(struct ksock_conn *conn)
+static int lustre_csum(struct kvec *v, void *context)
 {
-	unsigned int niov = conn->ksnc_rx_niov;
-	struct kvec *iov = conn->ksnc_rx_iov;
-	struct msghdr msg = {
-		.msg_flags = 0
-	};
-	int nob;
-	int i;
-	int rc;
-	int fragnob;
-	int sum;
-	__u32 saved_csum;
-
-	LASSERT(niov > 0);
-
-	for (nob = i = 0; i < niov; i++)
-		nob += iov[i].iov_len;
-
-	LASSERT(nob <= conn->ksnc_rx_nob_wanted);
-
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob);
-	rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
-
-	saved_csum = 0;
-	if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
-		saved_csum = conn->ksnc_msg.ksm_csum;
-		conn->ksnc_msg.ksm_csum = 0;
-	}
-
-	if (saved_csum) {
-		/* accumulate checksum */
-		for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
-			LASSERT(i < niov);
-
-			fragnob = iov[i].iov_len;
-			if (fragnob > sum)
-				fragnob = sum;
-
-			conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
-						      iov[i].iov_base,
-						      fragnob);
-		}
-		conn->ksnc_msg.ksm_csum = saved_csum;
-	}
-
-	return rc;
+	struct ksock_conn *conn = context;
+	conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
+				      v->iov_base, v->iov_len);
+	return 0;
 }
 
 int
-ksocknal_lib_recv_kiov(struct ksock_conn *conn)
+ksocknal_lib_recv(struct ksock_conn *conn)
 {
-	unsigned int niov = conn->ksnc_rx_nkiov;
-	struct bio_vec *kiov = conn->ksnc_rx_kiov;
-	struct msghdr msg = {
-		.msg_flags = 0
-	};
-	int nob;
-	int i;
+	struct msghdr msg = { .msg_iter = conn->ksnc_rx_to };
+	__u32 saved_csum;
 	int rc;
-	void *base;
-	int sum;
-	int fragnob;
 
-	for (nob = i = 0; i < niov; i++)
-		nob += kiov[i].bv_len;
-
-	LASSERT(nob <= conn->ksnc_rx_nob_wanted);
-
-	iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob);
 	rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
+	if (rc <= 0)
+		return rc;
 
-	if (conn->ksnc_msg.ksm_csum) {
-		for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
-			LASSERT(i < niov);
+	saved_csum = conn->ksnc_msg.ksm_csum;
+	if (!saved_csum)
+		return rc;
 
-			base = kmap(kiov[i].bv_page) + kiov[i].bv_offset;
-			fragnob = kiov[i].bv_len;
-			if (fragnob > sum)
-				fragnob = sum;
+	/* header is included only in V2 - V3 checksums only the bulk data */
+	if (!(conn->ksnc_rx_to.type & ITER_BVEC) &&
+	     conn->ksnc_proto != &ksocknal_protocol_v2x)
+		return rc;
+		
+	/* accumulate checksum */
+	conn->ksnc_msg.ksm_csum = 0;
+	iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn);
+	conn->ksnc_msg.ksm_csum = saved_csum;
 
-			conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
-						      base, fragnob);
-
-			kunmap(kiov[i].bv_page);
-		}
-	}
 	return rc;
 }
 
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index 27848cd..68d16ff 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -890,7 +890,7 @@
 		 */
 		LASSERT(msg->msg_kiov);
 
-		rb = list_entry(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
+		rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
 		rbp = rb->rb_pool;
 
 		msg->msg_kiov = NULL;
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 1bc9be8..71517b3 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -210,12 +210,6 @@
 static DEFINE_MUTEX(vhost_scsi_mutex);
 static LIST_HEAD(vhost_scsi_list);
 
-static int iov_num_pages(void __user *iov_base, size_t iov_len)
-{
-	return (PAGE_ALIGN((unsigned long)iov_base + iov_len) -
-	       ((unsigned long)iov_base & PAGE_MASK)) >> PAGE_SHIFT;
-}
-
 static void vhost_scsi_done_inflight(struct kref *kref)
 {
 	struct vhost_scsi_inflight *inflight;
@@ -618,48 +612,31 @@
  */
 static int
 vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
-		      void __user *ptr,
-		      size_t len,
+		      struct iov_iter *iter,
 		      struct scatterlist *sgl,
 		      bool write)
 {
-	unsigned int npages = 0, offset, nbytes;
-	unsigned int pages_nr = iov_num_pages(ptr, len);
-	struct scatterlist *sg = sgl;
 	struct page **pages = cmd->tvc_upages;
-	int ret, i;
+	struct scatterlist *sg = sgl;
+	ssize_t bytes;
+	size_t offset;
+	unsigned int npages = 0;
 
-	if (pages_nr > VHOST_SCSI_PREALLOC_UPAGES) {
-		pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
-		       " preallocated VHOST_SCSI_PREALLOC_UPAGES: %u\n",
-			pages_nr, VHOST_SCSI_PREALLOC_UPAGES);
-		return -ENOBUFS;
-	}
-
-	ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages);
+	bytes = iov_iter_get_pages(iter, pages, LONG_MAX,
+				VHOST_SCSI_PREALLOC_UPAGES, &offset);
 	/* No pages were pinned */
-	if (ret < 0)
-		goto out;
-	/* Less pages pinned than wanted */
-	if (ret != pages_nr) {
-		for (i = 0; i < ret; i++)
-			put_page(pages[i]);
-		ret = -EFAULT;
-		goto out;
-	}
+	if (bytes <= 0)
+		return bytes < 0 ? bytes : -EFAULT;
 
-	while (len > 0) {
-		offset = (uintptr_t)ptr & ~PAGE_MASK;
-		nbytes = min_t(unsigned int, PAGE_SIZE - offset, len);
-		sg_set_page(sg, pages[npages], nbytes, offset);
-		ptr += nbytes;
-		len -= nbytes;
-		sg++;
-		npages++;
-	}
+	iov_iter_advance(iter, bytes);
 
-out:
-	return ret;
+	while (bytes) {
+		unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes);
+		sg_set_page(sg++, pages[npages++], n, offset);
+		bytes -= n;
+		offset = 0;
+	}
+	return npages;
 }
 
 static int
@@ -687,24 +664,20 @@
 		      struct iov_iter *iter,
 		      struct scatterlist *sg, int sg_count)
 {
-	size_t off = iter->iov_offset;
-	int i, ret;
+	struct scatterlist *p = sg;
+	int ret;
 
-	for (i = 0; i < iter->nr_segs; i++) {
-		void __user *base = iter->iov[i].iov_base + off;
-		size_t len = iter->iov[i].iov_len - off;
-
-		ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write);
+	while (iov_iter_count(iter)) {
+		ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write);
 		if (ret < 0) {
-			for (i = 0; i < sg_count; i++) {
-				struct page *page = sg_page(&sg[i]);
+			while (p < sg) {
+				struct page *page = sg_page(p++);
 				if (page)
 					put_page(page);
 			}
 			return ret;
 		}
 		sg += ret;
-		off = 0;
 	}
 	return 0;
 }
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 02cd33c..c7822d8 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -134,20 +134,16 @@
 	masked_cons = pvcalls_mask(cons, array_size);
 
 	memset(&msg, 0, sizeof(msg));
-	msg.msg_iter.type = ITER_KVEC|WRITE;
-	msg.msg_iter.count = wanted;
 	if (masked_prod < masked_cons) {
 		vec[0].iov_base = data->in + masked_prod;
 		vec[0].iov_len = wanted;
-		msg.msg_iter.kvec = vec;
-		msg.msg_iter.nr_segs = 1;
+		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted);
 	} else {
 		vec[0].iov_base = data->in + masked_prod;
 		vec[0].iov_len = array_size - masked_prod;
 		vec[1].iov_base = data->in;
 		vec[1].iov_len = wanted - vec[0].iov_len;
-		msg.msg_iter.kvec = vec;
-		msg.msg_iter.nr_segs = 2;
+		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted);
 	}
 
 	atomic_set(&map->read, 0);
@@ -196,20 +192,16 @@
 
 	memset(&msg, 0, sizeof(msg));
 	msg.msg_flags |= MSG_DONTWAIT;
-	msg.msg_iter.type = ITER_KVEC|READ;
-	msg.msg_iter.count = size;
 	if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
 		vec[0].iov_len = size;
-		msg.msg_iter.kvec = vec;
-		msg.msg_iter.nr_segs = 1;
+		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size);
 	} else {
 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
 		vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
 		vec[1].iov_base = data->out;
 		vec[1].iov_len = size - vec[0].iov_len;
-		msg.msg_iter.kvec = vec;
-		msg.msg_iter.nr_segs = 2;
+		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size);
 	}
 
 	atomic_set(&map->write, 0);
diff --git a/fs/iomap.c b/fs/iomap.c
index b9f7480..47d29ccf 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -856,6 +856,7 @@
 	struct bio *bio;
 	bool need_zeroout = false;
 	int nr_pages, ret;
+	size_t copied = 0;
 
 	if ((pos | length | align) & ((1 << blkbits) - 1))
 		return -EINVAL;
@@ -867,7 +868,7 @@
 		/*FALLTHRU*/
 	case IOMAP_UNWRITTEN:
 		if (!(dio->flags & IOMAP_DIO_WRITE)) {
-			iov_iter_zero(length, dio->submit.iter);
+			length = iov_iter_zero(length, dio->submit.iter);
 			dio->size += length;
 			return length;
 		}
@@ -904,8 +905,11 @@
 	}
 
 	do {
-		if (dio->error)
+		size_t n;
+		if (dio->error) {
+			iov_iter_revert(dio->submit.iter, copied);
 			return 0;
+		}
 
 		bio = bio_alloc(GFP_KERNEL, nr_pages);
 		bio_set_dev(bio, iomap->bdev);
@@ -918,20 +922,24 @@
 		ret = bio_iov_iter_get_pages(bio, &iter);
 		if (unlikely(ret)) {
 			bio_put(bio);
-			return ret;
+			return copied ? copied : ret;
 		}
 
+		n = bio->bi_iter.bi_size;
 		if (dio->flags & IOMAP_DIO_WRITE) {
 			bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
-			task_io_account_write(bio->bi_iter.bi_size);
+			task_io_account_write(n);
 		} else {
 			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 			if (dio->flags & IOMAP_DIO_DIRTY)
 				bio_set_pages_dirty(bio);
 		}
 
-		dio->size += bio->bi_iter.bi_size;
-		pos += bio->bi_iter.bi_size;
+		iov_iter_advance(dio->submit.iter, n);
+
+		dio->size += n;
+		pos += n;
+		copied += n;
 
 		nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
 
@@ -947,9 +955,7 @@
 		if (pad)
 			iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
 	}
-
-	iov_iter_advance(dio->submit.iter, length);
-	return length;
+	return copied;
 }
 
 ssize_t
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 004af34..f44d5eb 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -275,12 +275,6 @@
 	/* orangefs kernel operation type */
 	struct orangefs_kernel_op_s *op;
 
-	/* The user space buffers from/to which I/O is being staged */
-	struct iovec *iov;
-
-	/* number of elements in the iovector */
-	unsigned long nr_segs;
-
 	/* set to indicate the type of the operation */
 	int rw;
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 0046d72..f8547b8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -635,27 +635,6 @@
 	return ret;
 }
 
-/*
- * Reduce an iovec's length in-place.  Return the resulting number of segments
- */
-unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
-{
-	unsigned long seg = 0;
-	size_t len = 0;
-
-	while (seg < nr_segs) {
-		seg++;
-		if (len + iov->iov_len >= to) {
-			iov->iov_len = to - len;
-			break;
-		}
-		len += iov->iov_len;
-		iov++;
-	}
-	return seg;
-}
-EXPORT_SYMBOL(iov_shorten);
-
 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 		loff_t *ppos, int type, rwf_t flags)
 {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d4eec19..82f0c8fd 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -450,7 +450,7 @@
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
-				    const struct iov_iter *, gfp_t);
+				    struct iov_iter *, gfp_t);
 extern void bio_unmap_user(struct bio *);
 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
 				gfp_t);
@@ -482,7 +482,7 @@
 
 extern struct bio *bio_copy_user_iov(struct request_queue *,
 				     struct rq_map_data *,
-				     const struct iov_iter *,
+				     struct iov_iter *,
 				     gfp_t);
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 8a642cd..e67e12a 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -80,8 +80,6 @@
 	     ((iov = iov_iter_iovec(&(iter))), 1);		\
 	     iov_iter_advance(&(iter), (iov).iov_len))
 
-unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
-
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
@@ -246,4 +244,8 @@
 int import_single_range(int type, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i);
 
+int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
+			    int (*f)(struct kvec *vec, void *context),
+			    void *context);
+
 #endif
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1c1c06d..9702126 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1446,3 +1446,25 @@
 	return 0;
 }
 EXPORT_SYMBOL(import_single_range);
+
+int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
+			    int (*f)(struct kvec *vec, void *context),
+			    void *context)
+{
+	struct kvec w;
+	int err = -EINVAL;
+	if (!bytes)
+		return 0;
+
+	iterate_all_kinds(i, bytes, v, -EINVAL, ({
+		w.iov_base = kmap(v.bv_page) + v.bv_offset;
+		w.iov_len = v.bv_len;
+		err = f(&w, context);
+		kunmap(v.bv_page);
+		err;}), ({
+		w = v;
+		err = f(&w, context);})
+	)
+	return err;
+}
+EXPORT_SYMBOL(iov_iter_for_each_range);