blob: 1fa9fa157c125652fea7cd314534992c4875a73b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/drivers/char/mem.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * Added devfs support.
7 * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8 * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
9 */
10
11#include <linux/config.h>
12#include <linux/mm.h>
13#include <linux/miscdevice.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16#include <linux/mman.h>
17#include <linux/random.h>
18#include <linux/init.h>
19#include <linux/raw.h>
20#include <linux/tty.h>
21#include <linux/capability.h>
22#include <linux/smp_lock.h>
23#include <linux/devfs_fs_kernel.h>
24#include <linux/ptrace.h>
25#include <linux/device.h>
Vivek Goyal50b1fdb2005-06-25 14:58:23 -070026#include <linux/highmem.h>
27#include <linux/crash_dump.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/backing-dev.h>
Vivek Goyal315c2152005-06-25 14:58:24 -070029#include <linux/bootmem.h>
Jens Axboe1ebd32f2006-04-26 14:40:08 +020030#include <linux/pipe_fs_i.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <asm/uaccess.h>
33#include <asm/io.h>
34
35#ifdef CONFIG_IA64
36# include <linux/efi.h>
37#endif
38
Linus Torvalds1da177e2005-04-16 15:20:36 -070039/*
40 * Architectures vary in how they handle caching for addresses
41 * outside of main memory.
42 *
43 */
44static inline int uncached_access(struct file *file, unsigned long addr)
45{
46#if defined(__i386__)
47 /*
48 * On the PPro and successors, the MTRRs are used to set
49 * memory types for physical addresses outside main memory,
50 * so blindly setting PCD or PWT on those pages is wrong.
51 * For Pentiums and earlier, the surround logic should disable
52 * caching for the high addresses through the KEN pin, but
53 * we maintain the tradition of paranoia in this code.
54 */
55 if (file->f_flags & O_SYNC)
56 return 1;
57 return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
58 test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
59 test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
60 test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) )
61 && addr >= __pa(high_memory);
62#elif defined(__x86_64__)
63 /*
64 * This is broken because it can generate memory type aliases,
65 * which can cause cache corruptions
66 * But it is only available for root and we have to be bug-to-bug
67 * compatible with i386.
68 */
69 if (file->f_flags & O_SYNC)
70 return 1;
71 /* same behaviour as i386. PAT always set to cached and MTRRs control the
72 caching behaviour.
73 Hopefully a full PAT implementation will fix that soon. */
74 return 0;
75#elif defined(CONFIG_IA64)
76 /*
77 * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
78 */
79 return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
80#else
81 /*
82 * Accessing memory above the top the kernel knows about or through a file pointer
83 * that was marked O_SYNC will be done non-cached.
84 */
85 if (file->f_flags & O_SYNC)
86 return 1;
87 return addr >= __pa(high_memory);
88#endif
89}
90
91#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
Bjorn Helgaas136939a2006-03-26 01:37:05 -080092static inline int valid_phys_addr_range(unsigned long addr, size_t count)
Linus Torvalds1da177e2005-04-16 15:20:36 -070093{
Bjorn Helgaas136939a2006-03-26 01:37:05 -080094 if (addr + count > __pa(high_memory))
Linus Torvalds1da177e2005-04-16 15:20:36 -070095 return 0;
96
Linus Torvalds1da177e2005-04-16 15:20:36 -070097 return 1;
98}
Bjorn Helgaas80851ef2006-01-08 01:04:13 -080099
Bjorn Helgaas136939a2006-03-26 01:37:05 -0800100static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t size)
Bjorn Helgaas80851ef2006-01-08 01:04:13 -0800101{
102 return 1;
103}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#endif
105
106/*
107 * This funcion reads the *physical* memory. The f_pos points directly to the
108 * memory location.
109 */
110static ssize_t read_mem(struct file * file, char __user * buf,
111 size_t count, loff_t *ppos)
112{
113 unsigned long p = *ppos;
114 ssize_t read, sz;
115 char *ptr;
116
Bjorn Helgaas136939a2006-03-26 01:37:05 -0800117 if (!valid_phys_addr_range(p, count))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 return -EFAULT;
119 read = 0;
120#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
121 /* we don't have page 0 mapped on sparc and m68k.. */
122 if (p < PAGE_SIZE) {
123 sz = PAGE_SIZE - p;
124 if (sz > count)
125 sz = count;
126 if (sz > 0) {
127 if (clear_user(buf, sz))
128 return -EFAULT;
129 buf += sz;
130 p += sz;
131 count -= sz;
132 read += sz;
133 }
134 }
135#endif
136
137 while (count > 0) {
138 /*
139 * Handle first page in case it's not aligned
140 */
141 if (-p & (PAGE_SIZE - 1))
142 sz = -p & (PAGE_SIZE - 1);
143 else
144 sz = PAGE_SIZE;
145
146 sz = min_t(unsigned long, sz, count);
147
148 /*
149 * On ia64 if a page has been mapped somewhere as
150 * uncached, then it must also be accessed uncached
151 * by the kernel or data corruption may occur
152 */
153 ptr = xlate_dev_mem_ptr(p);
154
155 if (copy_to_user(buf, ptr, sz))
156 return -EFAULT;
157 buf += sz;
158 p += sz;
159 count -= sz;
160 read += sz;
161 }
162
163 *ppos += read;
164 return read;
165}
166
167static ssize_t write_mem(struct file * file, const char __user * buf,
168 size_t count, loff_t *ppos)
169{
170 unsigned long p = *ppos;
171 ssize_t written, sz;
172 unsigned long copied;
173 void *ptr;
174
Bjorn Helgaas136939a2006-03-26 01:37:05 -0800175 if (!valid_phys_addr_range(p, count))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 return -EFAULT;
177
178 written = 0;
179
180#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
181 /* we don't have page 0 mapped on sparc and m68k.. */
182 if (p < PAGE_SIZE) {
183 unsigned long sz = PAGE_SIZE - p;
184 if (sz > count)
185 sz = count;
186 /* Hmm. Do something? */
187 buf += sz;
188 p += sz;
189 count -= sz;
190 written += sz;
191 }
192#endif
193
194 while (count > 0) {
195 /*
196 * Handle first page in case it's not aligned
197 */
198 if (-p & (PAGE_SIZE - 1))
199 sz = -p & (PAGE_SIZE - 1);
200 else
201 sz = PAGE_SIZE;
202
203 sz = min_t(unsigned long, sz, count);
204
205 /*
206 * On ia64 if a page has been mapped somewhere as
207 * uncached, then it must also be accessed uncached
208 * by the kernel or data corruption may occur
209 */
210 ptr = xlate_dev_mem_ptr(p);
211
212 copied = copy_from_user(ptr, buf, sz);
213 if (copied) {
Jan Beulichc654d602006-03-25 03:07:31 -0800214 written += sz - copied;
215 if (written)
216 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 return -EFAULT;
218 }
219 buf += sz;
220 p += sz;
221 count -= sz;
222 written += sz;
223 }
224
225 *ppos += written;
226 return written;
227}
228
Bjorn Helgaas44ac8412006-01-08 01:04:10 -0800229#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
230static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
231 unsigned long size, pgprot_t vma_prot)
232{
233#ifdef pgprot_noncached
234 unsigned long offset = pfn << PAGE_SHIFT;
235
236 if (uncached_access(file, offset))
237 return pgprot_noncached(vma_prot);
238#endif
239 return vma_prot;
240}
241#endif
242
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243static int mmap_mem(struct file * file, struct vm_area_struct * vma)
244{
Bjorn Helgaas80851ef2006-01-08 01:04:13 -0800245 size_t size = vma->vm_end - vma->vm_start;
246
Bjorn Helgaas136939a2006-03-26 01:37:05 -0800247 if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size))
Bjorn Helgaas80851ef2006-01-08 01:04:13 -0800248 return -EINVAL;
249
Roland Dreier8b150472005-10-28 17:46:18 -0700250 vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
Bjorn Helgaas80851ef2006-01-08 01:04:13 -0800251 size,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 vma->vm_page_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
254 /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
255 if (remap_pfn_range(vma,
256 vma->vm_start,
257 vma->vm_pgoff,
Bjorn Helgaas80851ef2006-01-08 01:04:13 -0800258 size,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 vma->vm_page_prot))
260 return -EAGAIN;
261 return 0;
262}
263
264static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
265{
Linus Torvalds4bb82552005-08-13 14:22:59 -0700266 unsigned long pfn;
267
268 /* Turn a kernel-virtual address into a physical page frame */
269 pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
270
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 /*
272 * RED-PEN: on some architectures there is more mapped memory
273 * than available in mem_map which pfn_valid checks
274 * for. Perhaps should add a new macro here.
275 *
276 * RED-PEN: vmalloc is not supported right now.
277 */
Linus Torvalds4bb82552005-08-13 14:22:59 -0700278 if (!pfn_valid(pfn))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 return -EIO;
Linus Torvalds4bb82552005-08-13 14:22:59 -0700280
281 vma->vm_pgoff = pfn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 return mmap_mem(file, vma);
283}
284
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700285#ifdef CONFIG_CRASH_DUMP
286/*
287 * Read memory corresponding to the old kernel.
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700288 */
Vivek Goyal315c2152005-06-25 14:58:24 -0700289static ssize_t read_oldmem(struct file *file, char __user *buf,
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700290 size_t count, loff_t *ppos)
291{
Vivek Goyal315c2152005-06-25 14:58:24 -0700292 unsigned long pfn, offset;
293 size_t read = 0, csize;
294 int rc = 0;
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700295
Maneesh Soni72414d32005-06-25 14:58:28 -0700296 while (count) {
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700297 pfn = *ppos / PAGE_SIZE;
Vivek Goyal315c2152005-06-25 14:58:24 -0700298 if (pfn > saved_max_pfn)
299 return read;
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700300
Vivek Goyal315c2152005-06-25 14:58:24 -0700301 offset = (unsigned long)(*ppos % PAGE_SIZE);
302 if (count > PAGE_SIZE - offset)
303 csize = PAGE_SIZE - offset;
304 else
305 csize = count;
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700306
Vivek Goyal315c2152005-06-25 14:58:24 -0700307 rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
308 if (rc < 0)
309 return rc;
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700310 buf += csize;
311 *ppos += csize;
312 read += csize;
313 count -= csize;
314 }
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700315 return read;
316}
317#endif
318
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319extern long vread(char *buf, char *addr, unsigned long count);
320extern long vwrite(char *buf, char *addr, unsigned long count);
321
322/*
323 * This function reads the *virtual* memory as seen by the kernel.
324 */
325static ssize_t read_kmem(struct file *file, char __user *buf,
326 size_t count, loff_t *ppos)
327{
328 unsigned long p = *ppos;
329 ssize_t low_count, read, sz;
330 char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
331
332 read = 0;
333 if (p < (unsigned long) high_memory) {
334 low_count = count;
335 if (count > (unsigned long) high_memory - p)
336 low_count = (unsigned long) high_memory - p;
337
338#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
339 /* we don't have page 0 mapped on sparc and m68k.. */
340 if (p < PAGE_SIZE && low_count > 0) {
341 size_t tmp = PAGE_SIZE - p;
342 if (tmp > low_count) tmp = low_count;
343 if (clear_user(buf, tmp))
344 return -EFAULT;
345 buf += tmp;
346 p += tmp;
347 read += tmp;
348 low_count -= tmp;
349 count -= tmp;
350 }
351#endif
352 while (low_count > 0) {
353 /*
354 * Handle first page in case it's not aligned
355 */
356 if (-p & (PAGE_SIZE - 1))
357 sz = -p & (PAGE_SIZE - 1);
358 else
359 sz = PAGE_SIZE;
360
361 sz = min_t(unsigned long, sz, low_count);
362
363 /*
364 * On ia64 if a page has been mapped somewhere as
365 * uncached, then it must also be accessed uncached
366 * by the kernel or data corruption may occur
367 */
368 kbuf = xlate_dev_kmem_ptr((char *)p);
369
370 if (copy_to_user(buf, kbuf, sz))
371 return -EFAULT;
372 buf += sz;
373 p += sz;
374 read += sz;
375 low_count -= sz;
376 count -= sz;
377 }
378 }
379
380 if (count > 0) {
381 kbuf = (char *)__get_free_page(GFP_KERNEL);
382 if (!kbuf)
383 return -ENOMEM;
384 while (count > 0) {
385 int len = count;
386
387 if (len > PAGE_SIZE)
388 len = PAGE_SIZE;
389 len = vread(kbuf, (char *)p, len);
390 if (!len)
391 break;
392 if (copy_to_user(buf, kbuf, len)) {
393 free_page((unsigned long)kbuf);
394 return -EFAULT;
395 }
396 count -= len;
397 buf += len;
398 read += len;
399 p += len;
400 }
401 free_page((unsigned long)kbuf);
402 }
403 *ppos = p;
404 return read;
405}
406
407
408static inline ssize_t
409do_write_kmem(void *p, unsigned long realp, const char __user * buf,
410 size_t count, loff_t *ppos)
411{
412 ssize_t written, sz;
413 unsigned long copied;
414
415 written = 0;
416#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
417 /* we don't have page 0 mapped on sparc and m68k.. */
418 if (realp < PAGE_SIZE) {
419 unsigned long sz = PAGE_SIZE - realp;
420 if (sz > count)
421 sz = count;
422 /* Hmm. Do something? */
423 buf += sz;
424 p += sz;
425 realp += sz;
426 count -= sz;
427 written += sz;
428 }
429#endif
430
431 while (count > 0) {
432 char *ptr;
433 /*
434 * Handle first page in case it's not aligned
435 */
436 if (-realp & (PAGE_SIZE - 1))
437 sz = -realp & (PAGE_SIZE - 1);
438 else
439 sz = PAGE_SIZE;
440
441 sz = min_t(unsigned long, sz, count);
442
443 /*
444 * On ia64 if a page has been mapped somewhere as
445 * uncached, then it must also be accessed uncached
446 * by the kernel or data corruption may occur
447 */
448 ptr = xlate_dev_kmem_ptr(p);
449
450 copied = copy_from_user(ptr, buf, sz);
451 if (copied) {
Jan Beulichc654d602006-03-25 03:07:31 -0800452 written += sz - copied;
453 if (written)
454 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 return -EFAULT;
456 }
457 buf += sz;
458 p += sz;
459 realp += sz;
460 count -= sz;
461 written += sz;
462 }
463
464 *ppos += written;
465 return written;
466}
467
468
469/*
470 * This function writes to the *virtual* memory as seen by the kernel.
471 */
472static ssize_t write_kmem(struct file * file, const char __user * buf,
473 size_t count, loff_t *ppos)
474{
475 unsigned long p = *ppos;
476 ssize_t wrote = 0;
477 ssize_t virtr = 0;
478 ssize_t written;
479 char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
480
481 if (p < (unsigned long) high_memory) {
482
483 wrote = count;
484 if (count > (unsigned long) high_memory - p)
485 wrote = (unsigned long) high_memory - p;
486
487 written = do_write_kmem((void*)p, p, buf, wrote, ppos);
488 if (written != wrote)
489 return written;
490 wrote = written;
491 p += wrote;
492 buf += wrote;
493 count -= wrote;
494 }
495
496 if (count > 0) {
497 kbuf = (char *)__get_free_page(GFP_KERNEL);
498 if (!kbuf)
499 return wrote ? wrote : -ENOMEM;
500 while (count > 0) {
501 int len = count;
502
503 if (len > PAGE_SIZE)
504 len = PAGE_SIZE;
505 if (len) {
506 written = copy_from_user(kbuf, buf, len);
507 if (written) {
Jan Beulichc654d602006-03-25 03:07:31 -0800508 if (wrote + virtr)
509 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 free_page((unsigned long)kbuf);
Jan Beulichc654d602006-03-25 03:07:31 -0800511 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 }
513 }
514 len = vwrite(kbuf, (char *)p, len);
515 count -= len;
516 buf += len;
517 virtr += len;
518 p += len;
519 }
520 free_page((unsigned long)kbuf);
521 }
522
523 *ppos = p;
524 return virtr + wrote;
525}
526
Stephen Rothwellee2cdec2006-01-12 13:54:20 +1100527#if defined(CONFIG_ISA) || !defined(__mc68000__)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528static ssize_t read_port(struct file * file, char __user * buf,
529 size_t count, loff_t *ppos)
530{
531 unsigned long i = *ppos;
532 char __user *tmp = buf;
533
534 if (!access_ok(VERIFY_WRITE, buf, count))
535 return -EFAULT;
536 while (count-- > 0 && i < 65536) {
537 if (__put_user(inb(i),tmp) < 0)
538 return -EFAULT;
539 i++;
540 tmp++;
541 }
542 *ppos = i;
543 return tmp-buf;
544}
545
546static ssize_t write_port(struct file * file, const char __user * buf,
547 size_t count, loff_t *ppos)
548{
549 unsigned long i = *ppos;
550 const char __user * tmp = buf;
551
552 if (!access_ok(VERIFY_READ,buf,count))
553 return -EFAULT;
554 while (count-- > 0 && i < 65536) {
555 char c;
Jan Beulichc654d602006-03-25 03:07:31 -0800556 if (__get_user(c, tmp)) {
557 if (tmp > buf)
558 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 return -EFAULT;
Jan Beulichc654d602006-03-25 03:07:31 -0800560 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 outb(c,i);
562 i++;
563 tmp++;
564 }
565 *ppos = i;
566 return tmp-buf;
567}
568#endif
569
570static ssize_t read_null(struct file * file, char __user * buf,
571 size_t count, loff_t *ppos)
572{
573 return 0;
574}
575
576static ssize_t write_null(struct file * file, const char __user * buf,
577 size_t count, loff_t *ppos)
578{
579 return count;
580}
581
Jens Axboe1ebd32f2006-04-26 14:40:08 +0200582static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
583 struct splice_desc *sd)
584{
585 return sd->len;
586}
587
588static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
589 loff_t *ppos, size_t len, unsigned int flags)
590{
591 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
592}
593
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594#ifdef CONFIG_MMU
595/*
596 * For fun, we are using the MMU for this.
597 */
598static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
599{
600 struct mm_struct *mm;
601 struct vm_area_struct * vma;
602 unsigned long addr=(unsigned long)buf;
603
604 mm = current->mm;
605 /* Oops, this was forgotten before. -ben */
606 down_read(&mm->mmap_sem);
607
608 /* For private mappings, just map in zero pages. */
609 for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
610 unsigned long count;
611
612 if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
613 goto out_up;
Linus Torvalds6aab3412005-11-28 14:34:23 -0800614 if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 break;
616 count = vma->vm_end - addr;
617 if (count > size)
618 count = size;
619
620 zap_page_range(vma, addr, count, NULL);
621 zeromap_page_range(vma, addr, count, PAGE_COPY);
622
623 size -= count;
624 buf += count;
625 addr += count;
626 if (size == 0)
627 goto out_up;
628 }
629
630 up_read(&mm->mmap_sem);
631
632 /* The shared case is hard. Let's do the conventional zeroing. */
633 do {
634 unsigned long unwritten = clear_user(buf, PAGE_SIZE);
635 if (unwritten)
636 return size + unwritten - PAGE_SIZE;
637 cond_resched();
638 buf += PAGE_SIZE;
639 size -= PAGE_SIZE;
640 } while (size);
641
642 return size;
643out_up:
644 up_read(&mm->mmap_sem);
645 return size;
646}
647
648static ssize_t read_zero(struct file * file, char __user * buf,
649 size_t count, loff_t *ppos)
650{
651 unsigned long left, unwritten, written = 0;
652
653 if (!count)
654 return 0;
655
656 if (!access_ok(VERIFY_WRITE, buf, count))
657 return -EFAULT;
658
659 left = count;
660
661 /* do we want to be clever? Arbitrary cut-off */
662 if (count >= PAGE_SIZE*4) {
663 unsigned long partial;
664
665 /* How much left of the page? */
666 partial = (PAGE_SIZE-1) & -(unsigned long) buf;
667 unwritten = clear_user(buf, partial);
668 written = partial - unwritten;
669 if (unwritten)
670 goto out;
671 left -= partial;
672 buf += partial;
673 unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
674 written += (left & PAGE_MASK) - unwritten;
675 if (unwritten)
676 goto out;
677 buf += left & PAGE_MASK;
678 left &= ~PAGE_MASK;
679 }
680 unwritten = clear_user(buf, left);
681 written += left - unwritten;
682out:
683 return written ? written : -EFAULT;
684}
685
686static int mmap_zero(struct file * file, struct vm_area_struct * vma)
687{
688 if (vma->vm_flags & VM_SHARED)
689 return shmem_zero_setup(vma);
690 if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
691 return -EAGAIN;
692 return 0;
693}
694#else /* CONFIG_MMU */
695static ssize_t read_zero(struct file * file, char * buf,
696 size_t count, loff_t *ppos)
697{
698 size_t todo = count;
699
700 while (todo) {
701 size_t chunk = todo;
702
703 if (chunk > 4096)
704 chunk = 4096; /* Just for latency reasons */
705 if (clear_user(buf, chunk))
706 return -EFAULT;
707 buf += chunk;
708 todo -= chunk;
709 cond_resched();
710 }
711 return count;
712}
713
714static int mmap_zero(struct file * file, struct vm_area_struct * vma)
715{
716 return -ENOSYS;
717}
718#endif /* CONFIG_MMU */
719
720static ssize_t write_full(struct file * file, const char __user * buf,
721 size_t count, loff_t *ppos)
722{
723 return -ENOSPC;
724}
725
726/*
727 * Special lseek() function for /dev/null and /dev/zero. Most notably, you
728 * can fopen() both devices with "a" now. This was previously impossible.
729 * -- SRB.
730 */
731
732static loff_t null_lseek(struct file * file, loff_t offset, int orig)
733{
734 return file->f_pos = 0;
735}
736
737/*
738 * The memory devices use the full 32/64 bits of the offset, and so we cannot
739 * check against negative addresses: they are ok. The return value is weird,
740 * though, in that case (0).
741 *
742 * also note that seeking relative to the "end of file" isn't supported:
743 * it has no meaning, so it returns -EINVAL.
744 */
745static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
746{
747 loff_t ret;
748
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800749 mutex_lock(&file->f_dentry->d_inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 switch (orig) {
751 case 0:
752 file->f_pos = offset;
753 ret = file->f_pos;
754 force_successful_syscall_return();
755 break;
756 case 1:
757 file->f_pos += offset;
758 ret = file->f_pos;
759 force_successful_syscall_return();
760 break;
761 default:
762 ret = -EINVAL;
763 }
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800764 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 return ret;
766}
767
768static int open_port(struct inode * inode, struct file * filp)
769{
770 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
771}
772
773#define zero_lseek null_lseek
774#define full_lseek null_lseek
775#define write_zero write_null
776#define read_full read_zero
777#define open_mem open_port
778#define open_kmem open_mem
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700779#define open_oldmem open_mem
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
781static struct file_operations mem_fops = {
782 .llseek = memory_lseek,
783 .read = read_mem,
784 .write = write_mem,
785 .mmap = mmap_mem,
786 .open = open_mem,
787};
788
789static struct file_operations kmem_fops = {
790 .llseek = memory_lseek,
791 .read = read_kmem,
792 .write = write_kmem,
793 .mmap = mmap_kmem,
794 .open = open_kmem,
795};
796
797static struct file_operations null_fops = {
798 .llseek = null_lseek,
799 .read = read_null,
800 .write = write_null,
Jens Axboe1ebd32f2006-04-26 14:40:08 +0200801 .splice_write = splice_write_null,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802};
803
Stephen Rothwellee2cdec2006-01-12 13:54:20 +1100804#if defined(CONFIG_ISA) || !defined(__mc68000__)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805static struct file_operations port_fops = {
806 .llseek = memory_lseek,
807 .read = read_port,
808 .write = write_port,
809 .open = open_port,
810};
811#endif
812
813static struct file_operations zero_fops = {
814 .llseek = zero_lseek,
815 .read = read_zero,
816 .write = write_zero,
817 .mmap = mmap_zero,
818};
819
820static struct backing_dev_info zero_bdi = {
821 .capabilities = BDI_CAP_MAP_COPY,
822};
823
824static struct file_operations full_fops = {
825 .llseek = full_lseek,
826 .read = read_full,
827 .write = write_full,
828};
829
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700830#ifdef CONFIG_CRASH_DUMP
831static struct file_operations oldmem_fops = {
832 .read = read_oldmem,
833 .open = open_oldmem,
834};
835#endif
836
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837static ssize_t kmsg_write(struct file * file, const char __user * buf,
838 size_t count, loff_t *ppos)
839{
840 char *tmp;
Guillaume Chazaraincd140a52006-01-08 01:02:43 -0800841 ssize_t ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842
843 tmp = kmalloc(count + 1, GFP_KERNEL);
844 if (tmp == NULL)
845 return -ENOMEM;
846 ret = -EFAULT;
847 if (!copy_from_user(tmp, buf, count)) {
848 tmp[count] = 0;
849 ret = printk("%s", tmp);
Guillaume Chazaraincd140a52006-01-08 01:02:43 -0800850 if (ret > count)
851 /* printk can add a prefix */
852 ret = count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 }
854 kfree(tmp);
855 return ret;
856}
857
858static struct file_operations kmsg_fops = {
859 .write = kmsg_write,
860};
861
862static int memory_open(struct inode * inode, struct file * filp)
863{
864 switch (iminor(inode)) {
865 case 1:
866 filp->f_op = &mem_fops;
867 break;
868 case 2:
869 filp->f_op = &kmem_fops;
870 break;
871 case 3:
872 filp->f_op = &null_fops;
873 break;
Stephen Rothwellee2cdec2006-01-12 13:54:20 +1100874#if defined(CONFIG_ISA) || !defined(__mc68000__)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 case 4:
876 filp->f_op = &port_fops;
877 break;
878#endif
879 case 5:
880 filp->f_mapping->backing_dev_info = &zero_bdi;
881 filp->f_op = &zero_fops;
882 break;
883 case 7:
884 filp->f_op = &full_fops;
885 break;
886 case 8:
887 filp->f_op = &random_fops;
888 break;
889 case 9:
890 filp->f_op = &urandom_fops;
891 break;
892 case 11:
893 filp->f_op = &kmsg_fops;
894 break;
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700895#ifdef CONFIG_CRASH_DUMP
896 case 12:
897 filp->f_op = &oldmem_fops;
898 break;
899#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 default:
901 return -ENXIO;
902 }
903 if (filp->f_op && filp->f_op->open)
904 return filp->f_op->open(inode,filp);
905 return 0;
906}
907
908static struct file_operations memory_fops = {
909 .open = memory_open, /* just a selector for the real open */
910};
911
912static const struct {
913 unsigned int minor;
914 char *name;
915 umode_t mode;
Arjan van de Ven99ac48f2006-03-28 01:56:41 -0800916 const struct file_operations *fops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917} devlist[] = { /* list of minor devices */
918 {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
919 {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
920 {3, "null", S_IRUGO | S_IWUGO, &null_fops},
Stephen Rothwellee2cdec2006-01-12 13:54:20 +1100921#if defined(CONFIG_ISA) || !defined(__mc68000__)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
923#endif
924 {5, "zero", S_IRUGO | S_IWUGO, &zero_fops},
925 {7, "full", S_IRUGO | S_IWUGO, &full_fops},
926 {8, "random", S_IRUGO | S_IWUSR, &random_fops},
927 {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops},
928 {11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops},
Vivek Goyal50b1fdb2005-06-25 14:58:23 -0700929#ifdef CONFIG_CRASH_DUMP
930 {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
931#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932};
933
gregkh@suse.deca8eca62005-03-23 09:53:09 -0800934static struct class *mem_class;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
936static int __init chr_dev_init(void)
937{
938 int i;
939
940 if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
941 printk("unable to get major %d for memory devs\n", MEM_MAJOR);
942
gregkh@suse.deca8eca62005-03-23 09:53:09 -0800943 mem_class = class_create(THIS_MODULE, "mem");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 for (i = 0; i < ARRAY_SIZE(devlist); i++) {
Greg Kroah-Hartman53f46542005-10-27 22:25:43 -0700945 class_device_create(mem_class, NULL,
946 MKDEV(MEM_MAJOR, devlist[i].minor),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 NULL, devlist[i].name);
948 devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
949 S_IFCHR | devlist[i].mode, devlist[i].name);
950 }
951
952 return 0;
953}
954
955fs_initcall(chr_dev_init);