blob: 7d97032c65081c837f8ccd314323c3d8b52e3990 [file] [log] [blame]
Rebecca Schultzb6aba852008-07-24 11:22:53 -07001/* pmem.c
2 *
3 * Copyright (C) 2007 Google, Inc.
4 *
5 * This software is licensed under the terms of the GNU General Public
6 * License version 2, as published by the Free Software Foundation, and
7 * may be copied, distributed, and modified under those terms.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 */
15
16#include <linux/miscdevice.h>
17#include <linux/platform_device.h>
18#include <linux/fs.h>
19#include <linux/file.h>
20#include <linux/mm.h>
21#include <linux/list.h>
22#include <linux/mutex.h>
23#include <linux/debugfs.h>
24#include <linux/mempolicy.h>
25#include <linux/sched.h>
26#include <asm/io.h>
27#include <asm/uaccess.h>
28#include <asm/cacheflush.h>
29#include "android_pmem.h"
30
31#define PMEM_MAX_DEVICES 10
32#define PMEM_MAX_ORDER 128
33#define PMEM_MIN_ALLOC PAGE_SIZE
34
35#define PMEM_DEBUG 1
36
37/* indicates that a refernce to this file has been taken via get_pmem_file,
38 * the file should not be released until put_pmem_file is called */
39#define PMEM_FLAGS_BUSY 0x1
40/* indicates that this is a suballocation of a larger master range */
41#define PMEM_FLAGS_CONNECTED 0x1 << 1
42/* indicates this is a master and not a sub allocation and that it is mmaped */
43#define PMEM_FLAGS_MASTERMAP 0x1 << 2
44/* submap and unsubmap flags indicate:
45 * 00: subregion has never been mmaped
46 * 10: subregion has been mmaped, reference to the mm was taken
47 * 11: subretion has ben released, refernece to the mm still held
48 * 01: subretion has been released, reference to the mm has been released
49 */
50#define PMEM_FLAGS_SUBMAP 0x1 << 3
51#define PMEM_FLAGS_UNSUBMAP 0x1 << 4
52
53
54struct pmem_data {
55 /* in alloc mode: an index into the bitmap
56 * in no_alloc mode: the size of the allocation */
57 int index;
58 /* see flags above for descriptions */
59 unsigned int flags;
60 /* protects this data field, if the mm_mmap sem will be held at the
61 * same time as this sem, the mm sem must be taken first (as this is
62 * the order for vma_open and vma_close ops */
63 struct rw_semaphore sem;
64 /* info about the mmaping process */
65 struct vm_area_struct *vma;
66 /* task struct of the mapping process */
67 struct task_struct *task;
68 /* process id of teh mapping process */
69 pid_t pid;
70 /* file descriptor of the master */
71 int master_fd;
72 /* file struct of the master */
73 struct file *master_file;
74 /* a list of currently available regions if this is a suballocation */
75 struct list_head region_list;
76 /* a linked list of data so we can access them for debugging */
77 struct list_head list;
78#if PMEM_DEBUG
79 int ref;
80#endif
81};
82
83struct pmem_bits {
84 unsigned allocated:1; /* 1 if allocated, 0 if free */
85 unsigned order:7; /* size of the region in pmem space */
86};
87
88struct pmem_region_node {
89 struct pmem_region region;
90 struct list_head list;
91};
92
93#define PMEM_DEBUG_MSGS 0
94#if PMEM_DEBUG_MSGS
95#define DLOG(fmt,args...) \
96 do { printk(KERN_INFO "[%s:%s:%d] "fmt, __FILE__, __func__, __LINE__, \
97 ##args); } \
98 while (0)
99#else
100#define DLOG(x...) do {} while (0)
101#endif
102
103struct pmem_info {
104 struct miscdevice dev;
105 /* physical start address of the remaped pmem space */
106 unsigned long base;
107 /* vitual start address of the remaped pmem space */
108 unsigned char __iomem *vbase;
109 /* total size of the pmem space */
110 unsigned long size;
111 /* number of entries in the pmem space */
112 unsigned long num_entries;
113 /* pfn of the garbage page in memory */
114 unsigned long garbage_pfn;
115 /* index of the garbage page in the pmem space */
116 int garbage_index;
117 /* the bitmap for the region indicating which entries are allocated
118 * and which are free */
119 struct pmem_bits *bitmap;
120 /* indicates the region should not be managed with an allocator */
121 unsigned no_allocator;
122 /* indicates maps of this region should be cached, if a mix of
123 * cached and uncached is desired, set this and open the device with
124 * O_SYNC to get an uncached region */
125 unsigned cached;
126 unsigned buffered;
127 /* in no_allocator mode the first mapper gets the whole space and sets
128 * this flag */
129 unsigned allocated;
130 /* for debugging, creates a list of pmem file structs, the
131 * data_list_lock should be taken before pmem_data->sem if both are
132 * needed */
133 struct mutex data_list_lock;
134 struct list_head data_list;
135 /* pmem_sem protects the bitmap array
136 * a write lock should be held when modifying entries in bitmap
137 * a read lock should be held when reading data from bits or
138 * dereferencing a pointer into bitmap
139 *
140 * pmem_data->sem protects the pmem data of a particular file
141 * Many of the function that require the pmem_data->sem have a non-
142 * locking version for when the caller is already holding that sem.
143 *
144 * IF YOU TAKE BOTH LOCKS TAKE THEM IN THIS ORDER:
145 * down(pmem_data->sem) => down(bitmap_sem)
146 */
147 struct rw_semaphore bitmap_sem;
148
149 long (*ioctl)(struct file *, unsigned int, unsigned long);
150 int (*release)(struct inode *, struct file *);
151};
152
153static struct pmem_info pmem[PMEM_MAX_DEVICES];
154static int id_count;
155
156#define PMEM_IS_FREE(id, index) !(pmem[id].bitmap[index].allocated)
157#define PMEM_ORDER(id, index) pmem[id].bitmap[index].order
158#define PMEM_BUDDY_INDEX(id, index) (index ^ (1 << PMEM_ORDER(id, index)))
159#define PMEM_NEXT_INDEX(id, index) (index + (1 << PMEM_ORDER(id, index)))
160#define PMEM_OFFSET(index) (index * PMEM_MIN_ALLOC)
161#define PMEM_START_ADDR(id, index) (PMEM_OFFSET(index) + pmem[id].base)
162#define PMEM_LEN(id, index) ((1 << PMEM_ORDER(id, index)) * PMEM_MIN_ALLOC)
163#define PMEM_END_ADDR(id, index) (PMEM_START_ADDR(id, index) + \
164 PMEM_LEN(id, index))
165#define PMEM_START_VADDR(id, index) (PMEM_OFFSET(id, index) + pmem[id].vbase)
166#define PMEM_END_VADDR(id, index) (PMEM_START_VADDR(id, index) + \
167 PMEM_LEN(id, index))
168#define PMEM_REVOKED(data) (data->flags & PMEM_FLAGS_REVOKED)
169#define PMEM_IS_PAGE_ALIGNED(addr) (!((addr) & (~PAGE_MASK)))
170#define PMEM_IS_SUBMAP(data) ((data->flags & PMEM_FLAGS_SUBMAP) && \
171 (!(data->flags & PMEM_FLAGS_UNSUBMAP)))
172
173static int pmem_release(struct inode *, struct file *);
174static int pmem_mmap(struct file *, struct vm_area_struct *);
175static int pmem_open(struct inode *, struct file *);
176static long pmem_ioctl(struct file *, unsigned int, unsigned long);
177
178struct file_operations pmem_fops = {
179 .release = pmem_release,
180 .mmap = pmem_mmap,
181 .open = pmem_open,
182 .unlocked_ioctl = pmem_ioctl,
183};
184
185static int get_id(struct file *file)
186{
187 return MINOR(file->f_dentry->d_inode->i_rdev);
188}
189
190int is_pmem_file(struct file *file)
191{
192 int id;
193
194 if (unlikely(!file || !file->f_dentry || !file->f_dentry->d_inode))
195 return 0;
196 id = get_id(file);
197 if (unlikely(id >= PMEM_MAX_DEVICES))
198 return 0;
199 if (unlikely(file->f_dentry->d_inode->i_rdev !=
200 MKDEV(MISC_MAJOR, pmem[id].dev.minor)))
201 return 0;
202 return 1;
203}
204
205static int has_allocation(struct file *file)
206{
207 struct pmem_data *data;
208 /* check is_pmem_file first if not accessed via pmem_file_ops */
209
210 if (unlikely(!file->private_data))
211 return 0;
212 data = (struct pmem_data *)file->private_data;
213 if (unlikely(data->index < 0))
214 return 0;
215 return 1;
216}
217
218static int is_master_owner(struct file *file)
219{
220 struct file *master_file;
221 struct pmem_data *data;
222 int put_needed, ret = 0;
223
224 if (!is_pmem_file(file) || !has_allocation(file))
225 return 0;
226 data = (struct pmem_data *)file->private_data;
227 if (PMEM_FLAGS_MASTERMAP & data->flags)
228 return 1;
229 master_file = fget_light(data->master_fd, &put_needed);
230 if (master_file && data->master_file == master_file)
231 ret = 1;
232 fput_light(master_file, put_needed);
233 return ret;
234}
235
236static int pmem_free(int id, int index)
237{
238 /* caller should hold the write lock on pmem_sem! */
239 int buddy, curr = index;
240 DLOG("index %d\n", index);
241
242 if (pmem[id].no_allocator) {
243 pmem[id].allocated = 0;
244 return 0;
245 }
246 /* clean up the bitmap, merging any buddies */
247 pmem[id].bitmap[curr].allocated = 0;
248 /* find a slots buddy Buddy# = Slot# ^ (1 << order)
249 * if the buddy is also free merge them
250 * repeat until the buddy is not free or end of the bitmap is reached
251 */
252 do {
253 buddy = PMEM_BUDDY_INDEX(id, curr);
254 if (PMEM_IS_FREE(id, buddy) &&
255 PMEM_ORDER(id, buddy) == PMEM_ORDER(id, curr)) {
256 PMEM_ORDER(id, buddy)++;
257 PMEM_ORDER(id, curr)++;
258 curr = min(buddy, curr);
259 } else {
260 break;
261 }
262 } while (curr < pmem[id].num_entries);
263
264 return 0;
265}
266
267static void pmem_revoke(struct file *file, struct pmem_data *data);
268
269static int pmem_release(struct inode *inode, struct file *file)
270{
271 struct pmem_data *data = (struct pmem_data *)file->private_data;
272 struct pmem_region_node *region_node;
273 struct list_head *elt, *elt2;
274 int id = get_id(file), ret = 0;
275
276
277 mutex_lock(&pmem[id].data_list_lock);
278 /* if this file is a master, revoke all the memory in the connected
279 * files */
280 if (PMEM_FLAGS_MASTERMAP & data->flags) {
281 struct pmem_data *sub_data;
282 list_for_each(elt, &pmem[id].data_list) {
283 sub_data = list_entry(elt, struct pmem_data, list);
284 down_read(&sub_data->sem);
285 if (PMEM_IS_SUBMAP(sub_data) &&
286 file == sub_data->master_file) {
287 up_read(&sub_data->sem);
288 pmem_revoke(file, sub_data);
289 } else
290 up_read(&sub_data->sem);
291 }
292 }
293 list_del(&data->list);
294 mutex_unlock(&pmem[id].data_list_lock);
295
296
297 down_write(&data->sem);
298
299 /* if its not a conencted file and it has an allocation, free it */
300 if (!(PMEM_FLAGS_CONNECTED & data->flags) && has_allocation(file)) {
301 down_write(&pmem[id].bitmap_sem);
302 ret = pmem_free(id, data->index);
303 up_write(&pmem[id].bitmap_sem);
304 }
305
306 /* if this file is a submap (mapped, connected file), downref the
307 * task struct */
308 if (PMEM_FLAGS_SUBMAP & data->flags)
309 if (data->task) {
310 put_task_struct(data->task);
311 data->task = NULL;
312 }
313
314 file->private_data = NULL;
315
316 list_for_each_safe(elt, elt2, &data->region_list) {
317 region_node = list_entry(elt, struct pmem_region_node, list);
318 list_del(elt);
319 kfree(region_node);
320 }
321 BUG_ON(!list_empty(&data->region_list));
322
323 up_write(&data->sem);
324 kfree(data);
325 if (pmem[id].release)
326 ret = pmem[id].release(inode, file);
327
328 return ret;
329}
330
331static int pmem_open(struct inode *inode, struct file *file)
332{
333 struct pmem_data *data;
334 int id = get_id(file);
335 int ret = 0;
336
337 DLOG("current %u file %p(%d)\n", current->pid, file, file_count(file));
338 /* setup file->private_data to indicate its unmapped */
339 /* you can only open a pmem device one time */
340 if (file->private_data != NULL)
341 return -1;
342 data = kmalloc(sizeof(struct pmem_data), GFP_KERNEL);
343 if (!data) {
344 printk("pmem: unable to allocate memory for pmem metadata.");
345 return -1;
346 }
347 data->flags = 0;
348 data->index = -1;
349 data->task = NULL;
350 data->vma = NULL;
351 data->pid = 0;
352 data->master_file = NULL;
353#if PMEM_DEBUG
354 data->ref = 0;
355#endif
356 INIT_LIST_HEAD(&data->region_list);
357 init_rwsem(&data->sem);
358
359 file->private_data = data;
360 INIT_LIST_HEAD(&data->list);
361
362 mutex_lock(&pmem[id].data_list_lock);
363 list_add(&data->list, &pmem[id].data_list);
364 mutex_unlock(&pmem[id].data_list_lock);
365 return ret;
366}
367
368static unsigned long pmem_order(unsigned long len)
369{
370 int i;
371
372 len = (len + PMEM_MIN_ALLOC - 1)/PMEM_MIN_ALLOC;
373 len--;
374 for (i = 0; i < sizeof(len)*8; i++)
375 if (len >> i == 0)
376 break;
377 return i;
378}
379
380static int pmem_allocate(int id, unsigned long len)
381{
382 /* caller should hold the write lock on pmem_sem! */
383 /* return the corresponding pdata[] entry */
384 int curr = 0;
385 int end = pmem[id].num_entries;
386 int best_fit = -1;
387 unsigned long order = pmem_order(len);
388
389 if (pmem[id].no_allocator) {
390 DLOG("no allocator");
391 if ((len > pmem[id].size) || pmem[id].allocated)
392 return -1;
393 pmem[id].allocated = 1;
394 return len;
395 }
396
397 if (order > PMEM_MAX_ORDER)
398 return -1;
399 DLOG("order %lx\n", order);
400
401 /* look through the bitmap:
402 * if you find a free slot of the correct order use it
403 * otherwise, use the best fit (smallest with size > order) slot
404 */
405 while (curr < end) {
406 if (PMEM_IS_FREE(id, curr)) {
407 if (PMEM_ORDER(id, curr) == (unsigned char)order) {
408 /* set the not free bit and clear others */
409 best_fit = curr;
410 break;
411 }
412 if (PMEM_ORDER(id, curr) > (unsigned char)order &&
413 (best_fit < 0 ||
414 PMEM_ORDER(id, curr) < PMEM_ORDER(id, best_fit)))
415 best_fit = curr;
416 }
417 curr = PMEM_NEXT_INDEX(id, curr);
418 }
419
420 /* if best_fit < 0, there are no suitable slots,
421 * return an error
422 */
423 if (best_fit < 0) {
424 printk("pmem: no space left to allocate!\n");
425 return -1;
426 }
427
428 /* now partition the best fit:
429 * split the slot into 2 buddies of order - 1
430 * repeat until the slot is of the correct order
431 */
432 while (PMEM_ORDER(id, best_fit) > (unsigned char)order) {
433 int buddy;
434 PMEM_ORDER(id, best_fit) -= 1;
435 buddy = PMEM_BUDDY_INDEX(id, best_fit);
436 PMEM_ORDER(id, buddy) = PMEM_ORDER(id, best_fit);
437 }
438 pmem[id].bitmap[best_fit].allocated = 1;
439 return best_fit;
440}
441
442static pgprot_t pmem_access_prot(struct file *file, pgprot_t vma_prot)
443{
444 int id = get_id(file);
445#ifdef pgprot_noncached
446 if (pmem[id].cached == 0 || file->f_flags & O_SYNC)
447 return pgprot_noncached(vma_prot);
448#endif
449#ifdef pgprot_ext_buffered
450 else if (pmem[id].buffered)
451 return pgprot_ext_buffered(vma_prot);
452#endif
453 return vma_prot;
454}
455
456static unsigned long pmem_start_addr(int id, struct pmem_data *data)
457{
458 if (pmem[id].no_allocator)
459 return PMEM_START_ADDR(id, 0);
460 else
461 return PMEM_START_ADDR(id, data->index);
462
463}
464
465static void *pmem_start_vaddr(int id, struct pmem_data *data)
466{
467 return pmem_start_addr(id, data) - pmem[id].base + pmem[id].vbase;
468}
469
470static unsigned long pmem_len(int id, struct pmem_data *data)
471{
472 if (pmem[id].no_allocator)
473 return data->index;
474 else
475 return PMEM_LEN(id, data->index);
476}
477
478static int pmem_map_garbage(int id, struct vm_area_struct *vma,
479 struct pmem_data *data, unsigned long offset,
480 unsigned long len)
481{
482 int i, garbage_pages = len >> PAGE_SHIFT;
483
484 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP | VM_SHARED | VM_WRITE;
485 for (i = 0; i < garbage_pages; i++) {
486 if (vm_insert_pfn(vma, vma->vm_start + offset + (i * PAGE_SIZE),
487 pmem[id].garbage_pfn))
488 return -EAGAIN;
489 }
490 return 0;
491}
492
493static int pmem_unmap_pfn_range(int id, struct vm_area_struct *vma,
494 struct pmem_data *data, unsigned long offset,
495 unsigned long len)
496{
497 int garbage_pages;
498 DLOG("unmap offset %lx len %lx\n", offset, len);
499
500 BUG_ON(!PMEM_IS_PAGE_ALIGNED(len));
501
502 garbage_pages = len >> PAGE_SHIFT;
503 zap_page_range(vma, vma->vm_start + offset, len, NULL);
504 pmem_map_garbage(id, vma, data, offset, len);
505 return 0;
506}
507
508static int pmem_map_pfn_range(int id, struct vm_area_struct *vma,
509 struct pmem_data *data, unsigned long offset,
510 unsigned long len)
511{
512 DLOG("map offset %lx len %lx\n", offset, len);
513 BUG_ON(!PMEM_IS_PAGE_ALIGNED(vma->vm_start));
514 BUG_ON(!PMEM_IS_PAGE_ALIGNED(vma->vm_end));
515 BUG_ON(!PMEM_IS_PAGE_ALIGNED(len));
516 BUG_ON(!PMEM_IS_PAGE_ALIGNED(offset));
517
518 if (io_remap_pfn_range(vma, vma->vm_start + offset,
519 (pmem_start_addr(id, data) + offset) >> PAGE_SHIFT,
520 len, vma->vm_page_prot)) {
521 return -EAGAIN;
522 }
523 return 0;
524}
525
526static int pmem_remap_pfn_range(int id, struct vm_area_struct *vma,
527 struct pmem_data *data, unsigned long offset,
528 unsigned long len)
529{
530 /* hold the mm semp for the vma you are modifying when you call this */
531 BUG_ON(!vma);
532 zap_page_range(vma, vma->vm_start + offset, len, NULL);
533 return pmem_map_pfn_range(id, vma, data, offset, len);
534}
535
536static void pmem_vma_open(struct vm_area_struct *vma)
537{
538 struct file *file = vma->vm_file;
539 struct pmem_data *data = file->private_data;
540 int id = get_id(file);
541 /* this should never be called as we don't support copying pmem
542 * ranges via fork */
543 BUG_ON(!has_allocation(file));
544 down_write(&data->sem);
545 /* remap the garbage pages, forkers don't get access to the data */
546 pmem_unmap_pfn_range(id, vma, data, 0, vma->vm_start - vma->vm_end);
547 up_write(&data->sem);
548}
549
550static void pmem_vma_close(struct vm_area_struct *vma)
551{
552 struct file *file = vma->vm_file;
553 struct pmem_data *data = file->private_data;
554
555 DLOG("current %u ppid %u file %p count %d\n", current->pid,
556 current->parent->pid, file, file_count(file));
557 if (unlikely(!is_pmem_file(file) || !has_allocation(file))) {
558 printk(KERN_WARNING "pmem: something is very wrong, you are "
559 "closing a vm backing an allocation that doesn't "
560 "exist!\n");
561 return;
562 }
563 down_write(&data->sem);
564 if (data->vma == vma) {
565 data->vma = NULL;
566 if ((data->flags & PMEM_FLAGS_CONNECTED) &&
567 (data->flags & PMEM_FLAGS_SUBMAP))
568 data->flags |= PMEM_FLAGS_UNSUBMAP;
569 }
570 /* the kernel is going to free this vma now anyway */
571 up_write(&data->sem);
572}
573
574static struct vm_operations_struct vm_ops = {
575 .open = pmem_vma_open,
576 .close = pmem_vma_close,
577};
578
579static int pmem_mmap(struct file *file, struct vm_area_struct *vma)
580{
581 struct pmem_data *data;
582 int index;
583 unsigned long vma_size = vma->vm_end - vma->vm_start;
584 int ret = 0, id = get_id(file);
585
586 if (vma->vm_pgoff || !PMEM_IS_PAGE_ALIGNED(vma_size)) {
587#if PMEM_DEBUG
588 printk(KERN_ERR "pmem: mmaps must be at offset zero, aligned"
589 " and a multiple of pages_size.\n");
590#endif
591 return -EINVAL;
592 }
593
594 data = (struct pmem_data *)file->private_data;
595 down_write(&data->sem);
596 /* check this file isn't already mmaped, for submaps check this file
597 * has never been mmaped */
598 if ((data->flags & PMEM_FLAGS_SUBMAP) ||
599 (data->flags & PMEM_FLAGS_UNSUBMAP)) {
600#if PMEM_DEBUG
601 printk(KERN_ERR "pmem: you can only mmap a pmem file once, "
602 "this file is already mmaped. %x\n", data->flags);
603#endif
604 ret = -EINVAL;
605 goto error;
606 }
607 /* if file->private_data == unalloced, alloc*/
608 if (data && data->index == -1) {
609 down_write(&pmem[id].bitmap_sem);
610 index = pmem_allocate(id, vma->vm_end - vma->vm_start);
611 up_write(&pmem[id].bitmap_sem);
612 data->index = index;
613 }
614 /* either no space was available or an error occured */
615 if (!has_allocation(file)) {
616 ret = -EINVAL;
617 printk("pmem: could not find allocation for map.\n");
618 goto error;
619 }
620
621 if (pmem_len(id, data) < vma_size) {
622#if PMEM_DEBUG
623 printk(KERN_WARNING "pmem: mmap size [%lu] does not match"
624 "size of backing region [%lu].\n", vma_size,
625 pmem_len(id, data));
626#endif
627 ret = -EINVAL;
628 goto error;
629 }
630
631 vma->vm_pgoff = pmem_start_addr(id, data) >> PAGE_SHIFT;
632 vma->vm_page_prot = pmem_access_prot(file, vma->vm_page_prot);
633
634 if (data->flags & PMEM_FLAGS_CONNECTED) {
635 struct pmem_region_node *region_node;
636 struct list_head *elt;
637 if (pmem_map_garbage(id, vma, data, 0, vma_size)) {
638 printk("pmem: mmap failed in kernel!\n");
639 ret = -EAGAIN;
640 goto error;
641 }
642 list_for_each(elt, &data->region_list) {
643 region_node = list_entry(elt, struct pmem_region_node,
644 list);
645 DLOG("remapping file: %p %lx %lx\n", file,
646 region_node->region.offset,
647 region_node->region.len);
648 if (pmem_remap_pfn_range(id, vma, data,
649 region_node->region.offset,
650 region_node->region.len)) {
651 ret = -EAGAIN;
652 goto error;
653 }
654 }
655 data->flags |= PMEM_FLAGS_SUBMAP;
656 get_task_struct(current->group_leader);
657 data->task = current->group_leader;
658 data->vma = vma;
659#if PMEM_DEBUG
660 data->pid = current->pid;
661#endif
662 DLOG("submmapped file %p vma %p pid %u\n", file, vma,
663 current->pid);
664 } else {
665 if (pmem_map_pfn_range(id, vma, data, 0, vma_size)) {
666 printk(KERN_INFO "pmem: mmap failed in kernel!\n");
667 ret = -EAGAIN;
668 goto error;
669 }
670 data->flags |= PMEM_FLAGS_MASTERMAP;
671 data->pid = current->pid;
672 }
673 vma->vm_ops = &vm_ops;
674error:
675 up_write(&data->sem);
676 return ret;
677}
678
679/* the following are the api for accessing pmem regions by other drivers
680 * from inside the kernel */
681int get_pmem_user_addr(struct file *file, unsigned long *start,
682 unsigned long *len)
683{
684 struct pmem_data *data;
685 if (!is_pmem_file(file) || !has_allocation(file)) {
686#if PMEM_DEBUG
687 printk(KERN_INFO "pmem: requested pmem data from invalid"
688 "file.\n");
689#endif
690 return -1;
691 }
692 data = (struct pmem_data *)file->private_data;
693 down_read(&data->sem);
694 if (data->vma) {
695 *start = data->vma->vm_start;
696 *len = data->vma->vm_end - data->vma->vm_start;
697 } else {
698 *start = 0;
699 *len = 0;
700 }
701 up_read(&data->sem);
702 return 0;
703}
704
705int get_pmem_addr(struct file *file, unsigned long *start,
706 unsigned long *vstart, unsigned long *len)
707{
708 struct pmem_data *data;
709 int id;
710
711 if (!is_pmem_file(file) || !has_allocation(file)) {
712 return -1;
713 }
714
715 data = (struct pmem_data *)file->private_data;
716 if (data->index == -1) {
717#if PMEM_DEBUG
718 printk(KERN_INFO "pmem: requested pmem data from file with no "
719 "allocation.\n");
720 return -1;
721#endif
722 }
723 id = get_id(file);
724
725 down_read(&data->sem);
726 *start = pmem_start_addr(id, data);
727 *len = pmem_len(id, data);
728 *vstart = (unsigned long)pmem_start_vaddr(id, data);
729 up_read(&data->sem);
730#if PMEM_DEBUG
731 down_write(&data->sem);
732 data->ref++;
733 up_write(&data->sem);
734#endif
735 return 0;
736}
737
738int get_pmem_file(int fd, unsigned long *start, unsigned long *vstart,
739 unsigned long *len, struct file **filp)
740{
741 struct file *file;
742
743 file = fget(fd);
744 if (unlikely(file == NULL)) {
745 printk(KERN_INFO "pmem: requested data from file descriptor "
746 "that doesn't exist.");
747 return -1;
748 }
749
750 if (get_pmem_addr(file, start, vstart, len))
751 goto end;
752
753 if (filp)
754 *filp = file;
755 return 0;
756end:
757 fput(file);
758 return -1;
759}
760
761void put_pmem_file(struct file *file)
762{
763 struct pmem_data *data;
764 int id;
765
766 if (!is_pmem_file(file))
767 return;
768 id = get_id(file);
769 data = (struct pmem_data *)file->private_data;
770#if PMEM_DEBUG
771 down_write(&data->sem);
772 if (data->ref == 0) {
773 printk("pmem: pmem_put > pmem_get %s (pid %d)\n",
774 pmem[id].dev.name, data->pid);
775 BUG();
776 }
777 data->ref--;
778 up_write(&data->sem);
779#endif
780 fput(file);
781}
782
783void flush_pmem_file(struct file *file, unsigned long offset, unsigned long len)
784{
785 struct pmem_data *data;
786 int id;
787 void *vaddr;
788 struct pmem_region_node *region_node;
789 struct list_head *elt;
790 void *flush_start, *flush_end;
791
792 if (!is_pmem_file(file) || !has_allocation(file)) {
793 return;
794 }
795
796 id = get_id(file);
797 data = (struct pmem_data *)file->private_data;
798 if (!pmem[id].cached || file->f_flags & O_SYNC)
799 return;
800
801 down_read(&data->sem);
802 vaddr = pmem_start_vaddr(id, data);
803 /* if this isn't a submmapped file, flush the whole thing */
804 if (unlikely(!(data->flags & PMEM_FLAGS_CONNECTED))) {
805 dmac_flush_range(vaddr, vaddr + pmem_len(id, data));
806 goto end;
807 }
808 /* otherwise, flush the region of the file we are drawing */
809 list_for_each(elt, &data->region_list) {
810 region_node = list_entry(elt, struct pmem_region_node, list);
811 if ((offset >= region_node->region.offset) &&
812 ((offset + len) <= (region_node->region.offset +
813 region_node->region.len))) {
814 flush_start = vaddr + region_node->region.offset;
815 flush_end = flush_start + region_node->region.len;
816 dmac_flush_range(flush_start, flush_end);
817 break;
818 }
819 }
820end:
821 up_read(&data->sem);
822}
823
824static int pmem_connect(unsigned long connect, struct file *file)
825{
826 struct pmem_data *data = (struct pmem_data *)file->private_data;
827 struct pmem_data *src_data;
828 struct file *src_file;
829 int ret = 0, put_needed;
830
831 down_write(&data->sem);
832 /* retrieve the src file and check it is a pmem file with an alloc */
833 src_file = fget_light(connect, &put_needed);
834 DLOG("connect %p to %p\n", file, src_file);
835 if (!src_file) {
836 printk("pmem: src file not found!\n");
837 ret = -EINVAL;
838 goto err_no_file;
839 }
840 if (unlikely(!is_pmem_file(src_file) || !has_allocation(src_file))) {
841 printk(KERN_INFO "pmem: src file is not a pmem file or has no "
842 "alloc!\n");
843 ret = -EINVAL;
844 goto err_bad_file;
845 }
846 src_data = (struct pmem_data *)src_file->private_data;
847
848 if (has_allocation(file) && (data->index != src_data->index)) {
849 printk("pmem: file is already mapped but doesn't match this"
850 " src_file!\n");
851 ret = -EINVAL;
852 goto err_bad_file;
853 }
854 data->index = src_data->index;
855 data->flags |= PMEM_FLAGS_CONNECTED;
856 data->master_fd = connect;
857 data->master_file = src_file;
858
859err_bad_file:
860 fput_light(src_file, put_needed);
861err_no_file:
862 up_write(&data->sem);
863 return ret;
864}
865
866static void pmem_unlock_data_and_mm(struct pmem_data *data,
867 struct mm_struct *mm)
868{
869 up_write(&data->sem);
870 if (mm != NULL) {
871 up_write(&mm->mmap_sem);
872 mmput(mm);
873 }
874}
875
876static int pmem_lock_data_and_mm(struct file *file, struct pmem_data *data,
877 struct mm_struct **locked_mm)
878{
879 int ret = 0;
880 struct mm_struct *mm = NULL;
881 *locked_mm = NULL;
882lock_mm:
883 down_read(&data->sem);
884 if (PMEM_IS_SUBMAP(data)) {
885 mm = get_task_mm(data->task);
886 if (!mm) {
887#if PMEM_DEBUG
888 printk("pmem: can't remap task is gone!\n");
889#endif
890 up_read(&data->sem);
891 return -1;
892 }
893 }
894 up_read(&data->sem);
895
896 if (mm)
897 down_write(&mm->mmap_sem);
898
899 down_write(&data->sem);
900 /* check that the file didn't get mmaped before we could take the
901 * data sem, this should be safe b/c you can only submap each file
902 * once */
903 if (PMEM_IS_SUBMAP(data) && !mm) {
904 pmem_unlock_data_and_mm(data, mm);
905 up_write(&data->sem);
906 goto lock_mm;
907 }
908 /* now check that vma.mm is still there, it could have been
909 * deleted by vma_close before we could get the data->sem */
910 if ((data->flags & PMEM_FLAGS_UNSUBMAP) && (mm != NULL)) {
911 /* might as well release this */
912 if (data->flags & PMEM_FLAGS_SUBMAP) {
913 put_task_struct(data->task);
914 data->task = NULL;
915 /* lower the submap flag to show the mm is gone */
916 data->flags &= ~(PMEM_FLAGS_SUBMAP);
917 }
918 pmem_unlock_data_and_mm(data, mm);
919 return -1;
920 }
921 *locked_mm = mm;
922 return ret;
923}
924
925int pmem_remap(struct pmem_region *region, struct file *file,
926 unsigned operation)
927{
928 int ret;
929 struct pmem_region_node *region_node;
930 struct mm_struct *mm = NULL;
931 struct list_head *elt, *elt2;
932 int id = get_id(file);
933 struct pmem_data *data = (struct pmem_data *)file->private_data;
934
935 /* pmem region must be aligned on a page boundry */
936 if (unlikely(!PMEM_IS_PAGE_ALIGNED(region->offset) ||
937 !PMEM_IS_PAGE_ALIGNED(region->len))) {
938#if PMEM_DEBUG
939 printk("pmem: request for unaligned pmem suballocation "
940 "%lx %lx\n", region->offset, region->len);
941#endif
942 return -EINVAL;
943 }
944
945 /* if userspace requests a region of len 0, there's nothing to do */
946 if (region->len == 0)
947 return 0;
948
949 /* lock the mm and data */
950 ret = pmem_lock_data_and_mm(file, data, &mm);
951 if (ret)
952 return 0;
953
954 /* only the owner of the master file can remap the client fds
955 * that back in it */
956 if (!is_master_owner(file)) {
957#if PMEM_DEBUG
958 printk("pmem: remap requested from non-master process\n");
959#endif
960 ret = -EINVAL;
961 goto err;
962 }
963
964 /* check that the requested range is within the src allocation */
965 if (unlikely((region->offset > pmem_len(id, data)) ||
966 (region->len > pmem_len(id, data)) ||
967 (region->offset + region->len > pmem_len(id, data)))) {
968#if PMEM_DEBUG
969 printk(KERN_INFO "pmem: suballoc doesn't fit in src_file!\n");
970#endif
971 ret = -EINVAL;
972 goto err;
973 }
974
975 if (operation == PMEM_MAP) {
976 region_node = kmalloc(sizeof(struct pmem_region_node),
977 GFP_KERNEL);
978 if (!region_node) {
979 ret = -ENOMEM;
980#if PMEM_DEBUG
981 printk(KERN_INFO "No space to allocate metadata!");
982#endif
983 goto err;
984 }
985 region_node->region = *region;
986 list_add(&region_node->list, &data->region_list);
987 } else if (operation == PMEM_UNMAP) {
988 int found = 0;
989 list_for_each_safe(elt, elt2, &data->region_list) {
990 region_node = list_entry(elt, struct pmem_region_node,
991 list);
992 if (region->len == 0 ||
993 (region_node->region.offset == region->offset &&
994 region_node->region.len == region->len)) {
995 list_del(elt);
996 kfree(region_node);
997 found = 1;
998 }
999 }
1000 if (!found) {
1001#if PMEM_DEBUG
1002 printk("pmem: Unmap region does not map any mapped "
1003 "region!");
1004#endif
1005 ret = -EINVAL;
1006 goto err;
1007 }
1008 }
1009
1010 if (data->vma && PMEM_IS_SUBMAP(data)) {
1011 if (operation == PMEM_MAP)
1012 ret = pmem_remap_pfn_range(id, data->vma, data,
1013 region->offset, region->len);
1014 else if (operation == PMEM_UNMAP)
1015 ret = pmem_unmap_pfn_range(id, data->vma, data,
1016 region->offset, region->len);
1017 }
1018
1019err:
1020 pmem_unlock_data_and_mm(data, mm);
1021 return ret;
1022}
1023
1024static void pmem_revoke(struct file *file, struct pmem_data *data)
1025{
1026 struct pmem_region_node *region_node;
1027 struct list_head *elt, *elt2;
1028 struct mm_struct *mm = NULL;
1029 int id = get_id(file);
1030 int ret = 0;
1031
1032 data->master_file = NULL;
1033 ret = pmem_lock_data_and_mm(file, data, &mm);
1034 /* if lock_data_and_mm fails either the task that mapped the fd, or
1035 * the vma that mapped it have already gone away, nothing more
1036 * needs to be done */
1037 if (ret)
1038 return;
1039 /* unmap everything */
1040 /* delete the regions and region list nothing is mapped any more */
1041 if (data->vma)
1042 list_for_each_safe(elt, elt2, &data->region_list) {
1043 region_node = list_entry(elt, struct pmem_region_node,
1044 list);
1045 pmem_unmap_pfn_range(id, data->vma, data,
1046 region_node->region.offset,
1047 region_node->region.len);
1048 list_del(elt);
1049 kfree(region_node);
1050 }
1051 /* delete the master file */
1052 pmem_unlock_data_and_mm(data, mm);
1053}
1054
1055static void pmem_get_size(struct pmem_region *region, struct file *file)
1056{
1057 struct pmem_data *data = (struct pmem_data *)file->private_data;
1058 int id = get_id(file);
1059
1060 if (!has_allocation(file)) {
1061 region->offset = 0;
1062 region->len = 0;
1063 return;
1064 } else {
1065 region->offset = pmem_start_addr(id, data);
1066 region->len = pmem_len(id, data);
1067 }
1068 DLOG("offset %lx len %lx\n", region->offset, region->len);
1069}
1070
1071
1072static long pmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1073{
1074 struct pmem_data *data;
1075 int id = get_id(file);
1076
1077 switch (cmd) {
1078 case PMEM_GET_PHYS:
1079 {
1080 struct pmem_region region;
1081 DLOG("get_phys\n");
1082 if (!has_allocation(file)) {
1083 region.offset = 0;
1084 region.len = 0;
1085 } else {
1086 data = (struct pmem_data *)file->private_data;
1087 region.offset = pmem_start_addr(id, data);
1088 region.len = pmem_len(id, data);
1089 }
1090 printk(KERN_INFO "pmem: request for physical address of pmem region "
1091 "from process %d.\n", current->pid);
1092 if (copy_to_user((void __user *)arg, &region,
1093 sizeof(struct pmem_region)))
1094 return -EFAULT;
1095 break;
1096 }
1097 case PMEM_MAP:
1098 {
1099 struct pmem_region region;
1100 if (copy_from_user(&region, (void __user *)arg,
1101 sizeof(struct pmem_region)))
1102 return -EFAULT;
1103 data = (struct pmem_data *)file->private_data;
1104 return pmem_remap(&region, file, PMEM_MAP);
1105 }
1106 break;
1107 case PMEM_UNMAP:
1108 {
1109 struct pmem_region region;
1110 if (copy_from_user(&region, (void __user *)arg,
1111 sizeof(struct pmem_region)))
1112 return -EFAULT;
1113 data = (struct pmem_data *)file->private_data;
1114 return pmem_remap(&region, file, PMEM_UNMAP);
1115 break;
1116 }
1117 case PMEM_GET_SIZE:
1118 {
1119 struct pmem_region region;
1120 DLOG("get_size\n");
1121 pmem_get_size(&region, file);
1122 if (copy_to_user((void __user *)arg, &region,
1123 sizeof(struct pmem_region)))
1124 return -EFAULT;
1125 break;
1126 }
1127 case PMEM_GET_TOTAL_SIZE:
1128 {
1129 struct pmem_region region;
1130 DLOG("get total size\n");
1131 region.offset = 0;
1132 get_id(file);
1133 region.len = pmem[id].size;
1134 if (copy_to_user((void __user *)arg, &region,
1135 sizeof(struct pmem_region)))
1136 return -EFAULT;
1137 break;
1138 }
1139 case PMEM_ALLOCATE:
1140 {
1141 if (has_allocation(file))
1142 return -EINVAL;
1143 data = (struct pmem_data *)file->private_data;
1144 data->index = pmem_allocate(id, arg);
1145 break;
1146 }
1147 case PMEM_CONNECT:
1148 DLOG("connect\n");
1149 return pmem_connect(arg, file);
1150 break;
1151 case PMEM_CACHE_FLUSH:
1152 {
1153 struct pmem_region region;
1154 DLOG("flush\n");
1155 if (copy_from_user(&region, (void __user *)arg,
1156 sizeof(struct pmem_region)))
1157 return -EFAULT;
1158 flush_pmem_file(file, region.offset, region.len);
1159 break;
1160 }
1161 default:
1162 if (pmem[id].ioctl)
1163 return pmem[id].ioctl(file, cmd, arg);
1164 return -EINVAL;
1165 }
1166 return 0;
1167}
1168
1169#if PMEM_DEBUG
1170static ssize_t debug_open(struct inode *inode, struct file *file)
1171{
1172 file->private_data = inode->i_private;
1173 return 0;
1174}
1175
1176static ssize_t debug_read(struct file *file, char __user *buf, size_t count,
1177 loff_t *ppos)
1178{
1179 struct list_head *elt, *elt2;
1180 struct pmem_data *data;
1181 struct pmem_region_node *region_node;
1182 int id = (int)file->private_data;
1183 const int debug_bufmax = 4096;
1184 static char buffer[4096];
1185 int n = 0;
1186
1187 DLOG("debug open\n");
1188 n = scnprintf(buffer, debug_bufmax,
1189 "pid #: mapped regions (offset, len) (offset,len)...\n");
1190
1191 mutex_lock(&pmem[id].data_list_lock);
1192 list_for_each(elt, &pmem[id].data_list) {
1193 data = list_entry(elt, struct pmem_data, list);
1194 down_read(&data->sem);
1195 n += scnprintf(buffer + n, debug_bufmax - n, "pid %u:",
1196 data->pid);
1197 list_for_each(elt2, &data->region_list) {
1198 region_node = list_entry(elt2, struct pmem_region_node,
1199 list);
1200 n += scnprintf(buffer + n, debug_bufmax - n,
1201 "(%lx,%lx) ",
1202 region_node->region.offset,
1203 region_node->region.len);
1204 }
1205 n += scnprintf(buffer + n, debug_bufmax - n, "\n");
1206 up_read(&data->sem);
1207 }
1208 mutex_unlock(&pmem[id].data_list_lock);
1209
1210 n++;
1211 buffer[n] = 0;
1212 return simple_read_from_buffer(buf, count, ppos, buffer, n);
1213}
1214
1215static struct file_operations debug_fops = {
1216 .read = debug_read,
1217 .open = debug_open,
1218};
1219#endif
1220
1221#if 0
1222static struct miscdevice pmem_dev = {
1223 .name = "pmem",
1224 .fops = &pmem_fops,
1225};
1226#endif
1227
1228int pmem_setup(struct android_pmem_platform_data *pdata,
1229 long (*ioctl)(struct file *, unsigned int, unsigned long),
1230 int (*release)(struct inode *, struct file *))
1231{
1232 int err = 0;
1233 int i, index = 0;
1234 int id = id_count;
1235 id_count++;
1236
1237 pmem[id].no_allocator = pdata->no_allocator;
1238 pmem[id].cached = pdata->cached;
1239 pmem[id].buffered = pdata->buffered;
1240 pmem[id].base = pdata->start;
1241 pmem[id].size = pdata->size;
1242 pmem[id].ioctl = ioctl;
1243 pmem[id].release = release;
1244 init_rwsem(&pmem[id].bitmap_sem);
1245 mutex_init(&pmem[id].data_list_lock);
1246 INIT_LIST_HEAD(&pmem[id].data_list);
1247 pmem[id].dev.name = pdata->name;
1248 pmem[id].dev.minor = id;
1249 pmem[id].dev.fops = &pmem_fops;
1250 printk(KERN_INFO "%s: %d init\n", pdata->name, pdata->cached);
1251
1252 err = misc_register(&pmem[id].dev);
1253 if (err) {
1254 printk(KERN_ALERT "Unable to register pmem driver!\n");
1255 goto err_cant_register_device;
1256 }
1257 pmem[id].num_entries = pmem[id].size / PMEM_MIN_ALLOC;
1258
1259 pmem[id].bitmap = kmalloc(pmem[id].num_entries *
1260 sizeof(struct pmem_bits), GFP_KERNEL);
1261 if (!pmem[id].bitmap)
1262 goto err_no_mem_for_metadata;
1263
1264 memset(pmem[id].bitmap, 0, sizeof(struct pmem_bits) *
1265 pmem[id].num_entries);
1266
1267 for (i = sizeof(pmem[id].num_entries) * 8 - 1; i >= 0; i--) {
1268 if ((pmem[id].num_entries) & 1<<i) {
1269 PMEM_ORDER(id, index) = i;
1270 index = PMEM_NEXT_INDEX(id, index);
1271 }
1272 }
1273
1274 if (pmem[id].cached)
1275 pmem[id].vbase = ioremap_cached(pmem[id].base,
1276 pmem[id].size);
1277#ifdef ioremap_ext_buffered
1278 else if (pmem[id].buffered)
1279 pmem[id].vbase = ioremap_ext_buffered(pmem[id].base,
1280 pmem[id].size);
1281#endif
1282 else
1283 pmem[id].vbase = ioremap(pmem[id].base, pmem[id].size);
1284
1285 if (pmem[id].vbase == 0)
1286 goto error_cant_remap;
1287
1288 pmem[id].garbage_pfn = page_to_pfn(alloc_page(GFP_KERNEL));
1289 if (pmem[id].no_allocator)
1290 pmem[id].allocated = 0;
1291
1292#if PMEM_DEBUG
1293 debugfs_create_file(pdata->name, S_IFREG | S_IRUGO, NULL, (void *)id,
1294 &debug_fops);
1295#endif
1296 return 0;
1297error_cant_remap:
1298 kfree(pmem[id].bitmap);
1299err_no_mem_for_metadata:
1300 misc_deregister(&pmem[id].dev);
1301err_cant_register_device:
1302 return -1;
1303}
1304
1305static int pmem_probe(struct platform_device *pdev)
1306{
1307 struct android_pmem_platform_data *pdata;
1308
1309 if (!pdev || !pdev->dev.platform_data) {
1310 printk(KERN_ALERT "Unable to probe pmem!\n");
1311 return -1;
1312 }
1313 pdata = pdev->dev.platform_data;
1314 return pmem_setup(pdata, NULL, NULL);
1315}
1316
1317
1318static int pmem_remove(struct platform_device *pdev)
1319{
1320 int id = pdev->id;
1321 __free_page(pfn_to_page(pmem[id].garbage_pfn));
1322 misc_deregister(&pmem[id].dev);
1323 return 0;
1324}
1325
1326static struct platform_driver pmem_driver = {
1327 .probe = pmem_probe,
1328 .remove = pmem_remove,
1329 .driver = { .name = "android_pmem" }
1330};
1331
1332
1333static int __init pmem_init(void)
1334{
1335 return platform_driver_register(&pmem_driver);
1336}
1337
1338static void __exit pmem_exit(void)
1339{
1340 platform_driver_unregister(&pmem_driver);
1341}
1342
1343module_init(pmem_init);
1344module_exit(pmem_exit);
1345