blob: 25cbcc9aa87ebc12d62d751f93db8d8baf11f43b [file] [log] [blame]
subrata_modakabe017e2009-01-27 13:50:05 +00001/******************************************************************************/
2/* Copyright (c) Tim LaBerge <tim.laberge@quantum.com>, 2009 */
3/* */
4/* This program is free software; you can redistribute it and/or modify */
5/* it under the terms of the GNU General Public License as published by */
6/* the Free Software Foundation; either version 2 of the License, or */
7/* (at your option) any later version. */
8/* */
9/* This program is distributed in the hope that it will be useful, */
10/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */
12/* the GNU General Public License for more details. */
13/* */
14/* You should have received a copy of the GNU General Public License */
Xiaoguang Wang52de6e22014-05-08 20:28:31 +080015/* along with this program; if not, write to the Free Software Foundation, */
16/* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
subrata_modakabe017e2009-01-27 13:50:05 +000017/* */
18/******************************************************************************/
19
20/******************************************************************************/
21/* */
22/* File: dma_thread_diotest7.c */
23/* */
24/* Description: The man page for open(2) states the following: */
25/* O_DIRECT (Since Linux 2.6.10). Try to minimize cache effects of the I/O */
26/* to and from this file. In general this will degrade performance, but it */
27/* is useful in special situations, such as when applications do their own */
28/* caching. File I/O is done directly to/from user space buffers. The I/O is*/
29/* synchronous, that is, at the completion of a read(2) or write(2), data is*/
30/* guranteed to have been transferred. Under Linux 2.4 transfer sizes, and */
31/* the alignment of user buffer and file offset must all be multiples of */
32/* the logical block size of the file system. Under Linux 2.6 alignment to */
33/* 512-byte bound-aries suffices. */
34/* However, it appears that data corruption may occur when a multithreaded */
35/* process reads into a non-page size aligned user buffer. A test program */
36/* which reliably reproduces the problem on ext3 and xfs is attached. The */
37/* program creates, patterns, reads, and verify a series of files. In the */
38/* read phase, a file is opened with O_DIRECT n times, where n is the */
Garrett Cooper2c282152010-12-16 00:55:50 -080039/* number of cpu's. A single buffer large enough to contain the file is */
subrata_modakabe017e2009-01-27 13:50:05 +000040/* allocated and patterned with data not found in any of the files. The */
41/* alignment of the buffer is controlled by a command line option. Each file*/
42/* is read in parallel by n threads, where n is the number of cpu's. Thread */
43/* 0 reads the first page of data from the file into the first page of the */
44/* buffer, thread 1 reads the second page of data in to the second page of */
45/* the buffer, and so on. Thread n - 1 reads the remainder of the file into*/
46/* the remainder of the buffer. */
47/* After a thread reads data into the buffer, it immediately verifies that */
48/* the contents of the buffer are correct. If the buffer contains corrupt */
49/* data, the thread dumps the data surrounding the corruption and calls */
50/* abort(). Otherwise, the thread exits. */
51/* Crucially, before the reader threads are dispatched, another thread is */
52/* started which calls fork()/msleep() in a loop until all reads are compl- */
53/* eted. The child created by fork() does nothing but call exit(0). A comm- */
54/* and line option controls whether the buffer is aligned. In the case wh- */
55/* ere the buffer is aligned on a page boundary, all is well. In the case */
56/* where the buffer is aligned on a page + 512 byte offset, corruption is */
57/* seen frequently. */
58/* I believe that what is happening is that in the direct IO path, because */
59/* the user's buffer is not aligned, some user pages are being mapped twice.*/
60/* When a fork() happens in between the calls to map the page, the page will*/
61/* be marked as COW. When the second map happens (via get_user_pages()), a */
62/* new physical page will be allocated and copied. Thus, there is a race */
63/* between the completion of the first read from disk (and write to the user*/
64/* page) and get_user_pages() mapping the page for the second time. If the */
65/* write does not complete before the page is copied, the user will see */
66/* stale data in the first 512 bytes of this page of their buffer. Indeed, */
67/* this is corruption most frequently seen. (It's also possible for the race*/
68/* to be lost the other way, so that the last 3584 bytes of the page are */
69/* stale.) */
70/* The attached program (which is a heavily modified version of a program */
71/* provided by a customer seeing this problem) reliably reproduces the pro- */
72/* blem on any multicore linux machine on both ext3 and xfs, although any */
73/* filesystem using the generic blockdev_direct_IO() routine is probably */
74/* vulnerable. I've seen a few threads that mention the potential for this */
75/* kind of problem, but no definitive solution or workaround (other than */
76/* "Don't do that"). */
77/* http://marc.info/?l=linux-mm&m=122668235304637&w=2 */
78/* */
79/* Total Tests: 1 */
80/* */
81/* Test Name: dma_thread_diotest7 */
82/* */
83/* Author: Tim LaBerge <tim.laberge@quantum.com> */
84/* */
85/* History: Reported - Jan 07 2009 - Li Zefan <lizf@cn.fujitsu.com> */
86/* Ported - Jan 23 2009 - Subrata <subrata@linux.vnet.ibm.com> */
87/* */
88/******************************************************************************/
89
90#define _GNU_SOURCE 1
91
92#include <stdio.h>
Xiaoguang Wang52de6e22014-05-08 20:28:31 +080093#include <stdint.h>
subrata_modakabe017e2009-01-27 13:50:05 +000094#include <stdlib.h>
95#include <fcntl.h>
96#include <unistd.h>
97#include <memory.h>
98#include <pthread.h>
99#include <getopt.h>
100#include <errno.h>
101#include <sys/types.h>
102#include <sys/wait.h>
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800103#include <sys/mount.h>
subrata_modakabe017e2009-01-27 13:50:05 +0000104
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800105#include "test.h"
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800106#include "safe_macros.h"
107#include "tst_fs_type.h"
subrata_modakabe017e2009-01-27 13:50:05 +0000108
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800109#define FILESIZE (12*1024*1024)
110#define READSIZE (1024*1024)
subrata_modakabe017e2009-01-27 13:50:05 +0000111
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800112#define MNT_POINT "mntpoint"
113#define FILE_BASEPATH MNT_POINT "/_dma_thread_test_%.04d.tmp"
114#define DIR_MODE (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP| \
115 S_IXGRP|S_IROTH|S_IXOTH)
116#define FILECOUNT 100
117#define MIN_WORKERS 2
118#define MAX_WORKERS 256
119#define PATTERN (0xfa)
120#define PAGE_SIZE getpagesize()
subrata_modakabe017e2009-01-27 13:50:05 +0000121
Cyril Hrubis06b527d2014-08-11 17:48:19 +0200122char *TCID = "dma_thread_diotest";
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800123int TST_TOTAL = 1;
subrata_modakabe017e2009-01-27 13:50:05 +0000124
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800125static void setup(void);
126static void dma_thread_diotest_verify(void);
127static void cleanup(void);
128static void help(void);
subrata_modakabe017e2009-01-27 13:50:05 +0000129
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800130static unsigned char *buffer;
subrata_modakabe017e2009-01-27 13:50:05 +0000131
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800132static char *align_str;
133static int align;
134static char *workers_str;
135static int workers;
136static char *device;
137static int mount_flag;
138static option_t options[] = {
139 {"a:", NULL, &align_str},
140 {"w:", NULL, &workers_str},
141 {NULL, NULL, NULL}
142};
143
144static volatile int done;
145static volatile int tst_result;
subrata_modakabe017e2009-01-27 13:50:05 +0000146
147typedef struct {
Wanlong Gao354ebb42012-12-07 10:10:04 +0800148 pthread_t tid;
149 int worker_number;
150 int fd;
151 int offset;
152 int length;
153 int pattern;
154 unsigned char *buffer;
subrata_modakabe017e2009-01-27 13:50:05 +0000155} worker_t;
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800156static worker_t *worker;
subrata_modakabe017e2009-01-27 13:50:05 +0000157
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800158static void *worker_thread(void *arg)
subrata_modakabe017e2009-01-27 13:50:05 +0000159{
Wanlong Gao354ebb42012-12-07 10:10:04 +0800160 int i, k;
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800161 int nread;
162 worker_t *worker = (worker_t *)arg;
Wanlong Gao354ebb42012-12-07 10:10:04 +0800163 int offset = worker->offset;
164 int fd = worker->fd;
165 unsigned char *buffer = worker->buffer;
166 int pattern = worker->pattern;
167 int length = worker->length;
Garrett Cooper2c282152010-12-16 00:55:50 -0800168
Wanlong Gao354ebb42012-12-07 10:10:04 +0800169 if (lseek(fd, offset, SEEK_SET) < 0) {
170 fprintf(stderr, "Failed to lseek to %d on fd %d: %s.\n",
subrata_modakabe017e2009-01-27 13:50:05 +0000171 offset, fd, strerror(errno));
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800172 return (void *) 1;
subrata_modakabe017e2009-01-27 13:50:05 +0000173 }
subrata_modakabe017e2009-01-27 13:50:05 +0000174
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800175 nread = read(fd, buffer, length);
176 if (nread == -1 || nread != length) {
177 fprintf(stderr, "read failed in worker thread%d: %s",
178 worker->worker_number, strerror(errno));
179 return (void *) 1;
Wanlong Gao354ebb42012-12-07 10:10:04 +0800180 }
181
182 /* Corruption check */
183 for (i = 0; i < length; i++) {
184 if (buffer[i] != pattern) {
185 printf("Bad data at 0x%.06x: %p, \n", i, buffer + i);
186 printf("Data dump starting at 0x%.06x:\n", i - 8);
187 printf("Expect 0x%x followed by 0x%x:\n",
188 pattern, PATTERN);
189
190 for (k = 0; k < 16; k++) {
191 printf("%02x ", buffer[i - 8 + k]);
192 if (k == 7) {
193 printf("\n");
194 }
195 }
196
197 printf("\n");
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800198 tst_result = 1;
199 return NULL;
Wanlong Gao354ebb42012-12-07 10:10:04 +0800200 }
201 }
202
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800203 return NULL;
subrata_modakabe017e2009-01-27 13:50:05 +0000204}
205
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800206static void *fork_thread(void *arg)
subrata_modakabe017e2009-01-27 13:50:05 +0000207{
Wanlong Gao354ebb42012-12-07 10:10:04 +0800208 pid_t pid;
subrata_modakabe017e2009-01-27 13:50:05 +0000209
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800210 (void) arg;
211
Wanlong Gao354ebb42012-12-07 10:10:04 +0800212 while (!done) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800213 pid = tst_fork();
Wanlong Gao354ebb42012-12-07 10:10:04 +0800214 if (pid == 0) {
215 exit(0);
216 } else if (pid < 0) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800217 fprintf(stderr, "Failed to fork child: %s.\n",
218 strerror(errno));
219 return (void *) 1;
Wanlong Gao354ebb42012-12-07 10:10:04 +0800220 }
221 waitpid(pid, NULL, 0);
222 usleep(100);
subrata_modak4bb656a2009-02-26 12:02:09 +0000223 }
subrata_modakabe017e2009-01-27 13:50:05 +0000224
Wanlong Gao354ebb42012-12-07 10:10:04 +0800225 return NULL;
subrata_modakabe017e2009-01-27 13:50:05 +0000226}
227
228int main(int argc, char *argv[])
229{
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800230 int i, lc;
231 const char *msg;
subrata_modakabe017e2009-01-27 13:50:05 +0000232
Wanlong Gao354ebb42012-12-07 10:10:04 +0800233 workers = sysconf(_SC_NPROCESSORS_ONLN);
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800234 msg = parse_opts(argc, argv, options, help);
235 if (msg != NULL)
236 tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);
subrata_modakabe017e2009-01-27 13:50:05 +0000237
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800238 setup();
subrata_modakabe017e2009-01-27 13:50:05 +0000239
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800240 for (lc = 0; TEST_LOOPING(lc); lc++) {
241 tst_count = 0;
subrata_modakabe017e2009-01-27 13:50:05 +0000242
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800243 for (i = 0; i < TST_TOTAL; i++)
244 dma_thread_diotest_verify();
Wanlong Gao354ebb42012-12-07 10:10:04 +0800245 }
246
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800247 cleanup();
248 tst_exit();
249}
subrata_modakabe017e2009-01-27 13:50:05 +0000250
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800251static void dma_thread_diotest_verify(void)
252{
253 int n, j, offset, rc;
254 void *retval;
255 char filename[PATH_MAX];
256 pthread_t fork_tid;
Wanlong Gao354ebb42012-12-07 10:10:04 +0800257
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800258 tst_result = 0;
Wanlong Gao354ebb42012-12-07 10:10:04 +0800259
subrata_modakabe017e2009-01-27 13:50:05 +0000260 for (n = 1; n <= FILECOUNT; n++) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800261 snprintf(filename, sizeof(filename), FILE_BASEPATH, n);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800262 for (j = 0; j < workers; j++) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800263 worker[j].fd = SAFE_OPEN(cleanup, filename,
264 O_RDONLY | O_DIRECT);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800265 worker[j].pattern = n;
subrata_modakabe017e2009-01-27 13:50:05 +0000266 }
subrata_modakabe017e2009-01-27 13:50:05 +0000267
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800268 tst_resm(TINFO, "Reading file %d.", n);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800269
270 for (offset = 0; offset < FILESIZE; offset += READSIZE) {
271 memset(buffer, PATTERN, READSIZE + align);
272 for (j = 0; j < workers; j++) {
273 worker[j].offset = offset + j * PAGE_SIZE;
274 worker[j].buffer =
275 buffer + align + j * PAGE_SIZE;
276 worker[j].length = PAGE_SIZE;
277 }
278 /* The final worker reads whatever is left over. */
279 worker[workers - 1].length =
280 READSIZE - PAGE_SIZE * (workers - 1);
281
282 done = 0;
283
284 rc = pthread_create(&fork_tid, NULL, fork_thread, NULL);
285 if (rc != 0) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800286 tst_brkm(TBROK, cleanup, "pthread_create "
287 "failed: %s", strerror(rc));
Wanlong Gao354ebb42012-12-07 10:10:04 +0800288 }
289
290 for (j = 0; j < workers; j++) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800291 rc = pthread_create(&worker[j].tid, NULL,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800292 worker_thread, worker + j);
293 if (rc != 0) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800294 tst_brkm(TBROK, cleanup, "Can't create"
295 "worker thread %d: %s",
296 j, strerror(rc));
Wanlong Gao354ebb42012-12-07 10:10:04 +0800297 }
298 }
299
300 for (j = 0; j < workers; j++) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800301 rc = pthread_join(worker[j].tid, &retval);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800302 if (rc != 0) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800303 tst_brkm(TBROK, cleanup, "Failed to "
304 "join worker thread %d: %s.",
305 j, strerror(rc));
306 }
307 if ((intptr_t)retval != 0) {
308 tst_brkm(TBROK, cleanup, "there is"
309 "some errors in worker[%d],"
310 "return value: %ld",
311 j, (intptr_t)retval);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800312 }
313 }
314
315 /* Let the fork thread know it's ok to exit */
316 done = 1;
317
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800318 rc = pthread_join(fork_tid, &retval);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800319 if (rc != 0) {
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800320 tst_brkm(TBROK, cleanup,
321 "Failed to join fork thread: %s.",
322 strerror(rc));
323 }
324 if ((intptr_t)retval != 0) {
325 tst_brkm(TBROK, cleanup,
326 "fork() failed in fork thread:"
327 "return value: %ld", (intptr_t)retval);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800328 }
subrata_modakabe017e2009-01-27 13:50:05 +0000329 }
subrata_modakabe017e2009-01-27 13:50:05 +0000330
Wanlong Gao354ebb42012-12-07 10:10:04 +0800331 /* Close the fd's for the next file. */
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800332 for (j = 0; j < workers; j++)
333 SAFE_CLOSE(cleanup, worker[j].fd);
334 if (tst_result)
335 break;
336 }
337
338 if (tst_result)
339 tst_resm(TFAIL, "data corruption is detected");
340 else
341 tst_resm(TPASS, "data corruption is not detected");
342}
343
344static void setup(void)
345{
346 char filename[PATH_MAX];
347 int n, j, fd, directflag = 1;
348 long type;
349
350 if (align_str) {
351 align = atoi(align_str);
352 if (align < 0 || align > PAGE_SIZE)
353 tst_brkm(TCONF, NULL, "Bad alignment %d.", align);
354 }
355 tst_resm(TINFO, "using alignment %d", align);
356
357 if (workers_str) {
358 workers = atoi(workers_str);
359 if (workers < MIN_WORKERS || workers > MAX_WORKERS) {
360 tst_brkm(TCONF, NULL, "Worker count %d not between "
361 "%d and %d, inclusive",
362 workers, MIN_WORKERS, MAX_WORKERS);
363 }
364 }
365 tst_resm(TINFO, "using %d workers.", workers);
366
367 tst_sig(FORK, DEF_HANDLER, NULL);
368 tst_require_root(NULL);
369
370 TEST_PAUSE;
371
372 tst_tmpdir();
373
374 /*
375 * Some file systems may not implement the O_DIRECT flag and open() will
376 * fail with EINVAL if it is used. So add this check for current
377 * filesystem current directory is in, if not supported, we choose to
378 * have this test in LTP_BIG_DEV and mkfs it as ext3.
379 */
380 fd = open("testfile", O_CREAT | O_DIRECT, 0644);
381 if (fd < 0 && errno == EINVAL) {
382 type = tst_fs_type(NULL, ".");
383 tst_resm(TINFO, "O_DIRECT flag is not supported on %s "
384 "filesystem", tst_fs_type_name(type));
385 directflag = 0;
386 } else if (fd > 0) {
387 SAFE_CLOSE(NULL, fd);
388 }
389
390 SAFE_MKDIR(cleanup, MNT_POINT, DIR_MODE);
391
392 /*
393 * verify whether the current directory has enough free space,
394 * if it is not satisfied, we will use the LTP_BIG_DEV, which
395 * will be exported by runltp with "-z" option.
396 */
397 if (!directflag || !tst_fs_has_free(NULL, ".", 1300, TST_MB)) {
398 device = getenv("LTP_BIG_DEV");
399 if (device == NULL) {
400 tst_brkm(TCONF, NULL,
401 "you must specify a big blockdevice(>1.3G)");
402 } else {
403 tst_mkfs(NULL, device, "ext3", NULL);
404 }
405
406 if (mount(device, MNT_POINT, "ext3", 0, NULL) < 0) {
407 tst_brkm(TBROK | TERRNO, NULL,
408 "mount device:%s failed", device);
409 }
410 mount_flag = 1;
411 }
412
413 worker = SAFE_MALLOC(cleanup, workers * sizeof(worker_t));
414
415 for (j = 0; j < workers; j++)
416 worker[j].worker_number = j;
417
418 for (n = 1; n <= FILECOUNT; n++) {
419 snprintf(filename, sizeof(filename), FILE_BASEPATH, n);
420
421 if (tst_fill_file(filename, n, FILESIZE, 1)) {
422 tst_brkm(TBROK, cleanup, "failed to create file: %s",
423 filename);
Wanlong Gao354ebb42012-12-07 10:10:04 +0800424 }
subrata_modakabe017e2009-01-27 13:50:05 +0000425 }
426
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800427 if (posix_memalign((void **)&buffer, PAGE_SIZE, READSIZE + align) != 0)
428 tst_brkm(TBROK, cleanup, "call posix_memalign failed");
429}
430
431static void cleanup(void)
432{
Xiaoguang Wang52de6e22014-05-08 20:28:31 +0800433 free(buffer);
434
435 if (mount_flag && umount(MNT_POINT) < 0)
436 tst_resm(TWARN | TERRNO, "umount device:%s failed", device);
437
438 free(worker);
439
440 tst_rmdir();
441}
442
443static void help(void)
444{
445 printf("-a align read buffer to offset <alignment>.\n");
446 printf("-w number of worker threads, 2 (default) to 256,"
447 " defaults to number of cores.\n");
Chris Dearmanec6edca2012-10-17 19:54:01 -0700448}