| /******************************************************************************/ |
| /* */ |
| /* Copyright (s) Ying Han <yinghan@google.com>, 2009 */ |
| /* */ |
| /* This program is free software; you can redistribute it and/or modify */ |
| /* it under the terms of the GNU General Public License as published by */ |
| /* the Free Software Foundation; either version 2 of the License, or */ |
| /* (at your option) any later version. */ |
| /* */ |
| /* This program is distributed in the hope that it will be useful, */ |
| /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ |
| /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */ |
| /* the GNU General Public License for more details. */ |
| /* */ |
| /* You should have received a copy of the GNU General Public License */ |
| /* along with this program; if not, write to the Free Software */ |
| /* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ |
| /* */ |
| /******************************************************************************/ |
| /* |
| ftruncate-mmap: pages are lost after writing to mmaped file, |
| |
| We triggered the failure during some internal experiment with |
| ftruncate/mmap/write/read sequence. And we found that some pages are |
| "lost" after writing to the mmaped file. which in the following test |
| cases (count >= 0). |
| |
| First we deployed the test cases into group of machines and see about |
| >20% failure rate on average. Then, I did couple of experiment to try |
| to reproduce it on a single machine. what i found is that: |
| 1. add a fsync after write the file, i can not reproduce this issue. |
| 2. add memory pressure(mmap/mlock) while run the test in infinite |
| loop, the failure is reproduced quickly. ( background flushing ? ) |
| |
| The "bad pages" count differs each time from one digit to 4,5 digit |
| for 128M ftruncated file. and what i also found that the bad page |
| number are contiguous for each segment which total bad pages container |
| several segments. ext "1-4, 9-20, 48-50" ( batch flushing ? ) |
| |
| (The failure is reproduced based on 2.6.29-rc8, also happened on |
| 2.6.18 kernel. . Here is the simple test case to reproduce it with |
| memory pressure. ) |
| */ |
| |
| #include <sys/mman.h> |
| #include <sys/types.h> |
| #include <fcntl.h> |
| #include <unistd.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <signal.h> |
| |
| #include "test.h" |
| #include "usctest.h" |
| |
| /* Extern Global Variables */ |
| extern int tst_count; |
| |
| /* Global Variables */ |
| char *TCID = "mmap-corruption01"; /* test program identifier. */ |
| int TST_TOTAL = 1; /* total number of tests in this file. */ |
| |
| long kMemSize = 128 << 20; |
| int kPageSize = 4096; |
| |
| char *usage = "-h hours -m minutes -s secs\n"; |
| |
| int anyfail() |
| { |
| tst_resm(TFAIL, "Test failed\n"); |
| tst_rmdir(); |
| tst_exit(); |
| } |
| |
| int main(int argc, char **argv) |
| { |
| char *progname; |
| int status; |
| int count = 0; |
| int i, c; |
| char *fname = "test.mmap-corruption"; |
| char *mem; |
| unsigned long alarmtime = 0; |
| struct sigaction sa; |
| void finish(int sig); |
| |
| progname = *argv; |
| while ((c = getopt(argc, argv, ":h:m:s:")) != -1) { |
| switch (c) { |
| case 'h': |
| alarmtime += atoi(optarg) * 60 * 60; |
| break; |
| case 'm': |
| alarmtime += atoi(optarg) * 60; |
| break; |
| case 's': |
| alarmtime += atoi(optarg); |
| break; |
| default: |
| (void)fprintf(stderr, "usage: %s %s\n", progname, |
| usage); |
| anyfail(); |
| } |
| } |
| |
| /* |
| * Plan for death by signal. User may have specified |
| * a time limit, in which case set an alarm and catch SIGALRM. |
| * Also catch and cleanup with SIGINT, SIGQUIT, and SIGTERM. |
| */ |
| sa.sa_handler = finish; |
| sa.sa_flags = 0; |
| if (sigemptyset(&sa.sa_mask)) { |
| perror("sigempty error"); |
| exit(1); |
| } |
| |
| if (sigaction(SIGINT, &sa, 0) == -1) { |
| perror("sigaction error SIGINT"); |
| exit(1); |
| } |
| if (alarmtime) { |
| if (sigaction(SIGALRM, &sa, 0) == -1) { |
| perror("sigaction error"); |
| exit(1); |
| } |
| (void)alarm(alarmtime); |
| printf("mmap-corruption will run for=> %ld, seconds\n", |
| alarmtime); |
| } else { //Run for 5 secs only |
| if (sigaction(SIGALRM, &sa, 0) == -1) { |
| perror("sigaction error"); |
| exit(1); |
| } |
| (void)alarm(5); |
| printf("mmap-corruption will run for=> 5, seconds\n"); |
| } |
| /* If we get a SIGQUIT or SIGTERM, clean up and exit immediately. */ |
| sa.sa_handler = finish; |
| if (sigaction(SIGQUIT, &sa, 0) == -1) { |
| perror("sigaction error SIGQUIT"); |
| exit(1); |
| } |
| if (sigaction(SIGTERM, &sa, 0) == -1) { |
| perror("sigaction error SIGTERM"); |
| exit(1); |
| } |
| |
| tst_tmpdir(); |
| while (1) { |
| unlink(fname); |
| int fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600); |
| status = ftruncate(fd, kMemSize); |
| |
| mem = |
| mmap(0, kMemSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, |
| 0); |
| // Fill the memory with 1s. |
| memset(mem, 1, kMemSize); |
| |
| for (i = 0; i < kMemSize; i++) { |
| int byte_good = mem[i] != 0; |
| if (!byte_good && ((i % kPageSize) == 0)) { |
| //printf("%d ", i / kPageSize); |
| count++; |
| } |
| } |
| munmap(mem, kMemSize); |
| close(fd); |
| unlink(fname); |
| if (count > 0) { |
| printf("Running %d bad page\n", count); |
| return 1; |
| } |
| count = 0; |
| } |
| return 0; |
| } |
| |
| void finish(int sig) |
| { |
| printf("mmap-corruption PASSED\n"); |
| exit(0); |
| } |