| Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 1 | #ifndef _LGUEST_H | 
|  | 2 | #define _LGUEST_H | 
|  | 3 |  | 
|  | 4 | #include <asm/desc.h> | 
|  | 5 |  | 
|  | 6 | #define GDT_ENTRY_LGUEST_CS	10 | 
|  | 7 | #define GDT_ENTRY_LGUEST_DS	11 | 
|  | 8 | #define LGUEST_CS		(GDT_ENTRY_LGUEST_CS * 8) | 
|  | 9 | #define LGUEST_DS		(GDT_ENTRY_LGUEST_DS * 8) | 
|  | 10 |  | 
|  | 11 | #ifndef __ASSEMBLY__ | 
|  | 12 | #include <linux/types.h> | 
|  | 13 | #include <linux/init.h> | 
|  | 14 | #include <linux/stringify.h> | 
|  | 15 | #include <linux/binfmts.h> | 
|  | 16 | #include <linux/futex.h> | 
|  | 17 | #include <linux/lguest.h> | 
|  | 18 | #include <linux/lguest_launcher.h> | 
|  | 19 | #include <linux/wait.h> | 
|  | 20 | #include <linux/err.h> | 
|  | 21 | #include <asm/semaphore.h> | 
|  | 22 | #include "irq_vectors.h" | 
|  | 23 |  | 
|  | 24 | #define GUEST_PL 1 | 
|  | 25 |  | 
|  | 26 | struct lguest_regs | 
|  | 27 | { | 
|  | 28 | /* Manually saved part. */ | 
|  | 29 | unsigned long ebx, ecx, edx; | 
|  | 30 | unsigned long esi, edi, ebp; | 
|  | 31 | unsigned long gs; | 
|  | 32 | unsigned long eax; | 
|  | 33 | unsigned long fs, ds, es; | 
|  | 34 | unsigned long trapnum, errcode; | 
|  | 35 | /* Trap pushed part */ | 
|  | 36 | unsigned long eip; | 
|  | 37 | unsigned long cs; | 
|  | 38 | unsigned long eflags; | 
|  | 39 | unsigned long esp; | 
|  | 40 | unsigned long ss; | 
|  | 41 | }; | 
|  | 42 |  | 
|  | 43 | void free_pagetables(void); | 
|  | 44 | int init_pagetables(struct page **switcher_page, unsigned int pages); | 
|  | 45 |  | 
|  | 46 | /* Full 4G segment descriptors, suitable for CS and DS. */ | 
|  | 47 | #define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00}) | 
|  | 48 | #define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300}) | 
|  | 49 |  | 
|  | 50 | struct lguest_dma_info | 
|  | 51 | { | 
|  | 52 | struct list_head list; | 
|  | 53 | union futex_key key; | 
|  | 54 | unsigned long dmas; | 
|  | 55 | u16 next_dma; | 
|  | 56 | u16 num_dmas; | 
|  | 57 | u16 guestid; | 
|  | 58 | u8 interrupt; 	/* 0 when not registered */ | 
|  | 59 | }; | 
|  | 60 |  | 
| Rusty Russell | bff672e | 2007-07-26 10:41:04 -0700 | [diff] [blame] | 61 | /*H:310 The page-table code owes a great debt of gratitude to Andi Kleen.  He | 
|  | 62 | * reviewed the original code which used "u32" for all page table entries, and | 
|  | 63 | * insisted that it would be far clearer with explicit typing.  I thought it | 
|  | 64 | * was overkill, but he was right: it is much clearer than it was before. | 
|  | 65 | * | 
|  | 66 | * We have separate types for the Guest's ptes & pgds and the shadow ptes & | 
|  | 67 | * pgds.  There's already a Linux type for these (pte_t and pgd_t) but they | 
|  | 68 | * change depending on kernel config options (PAE). */ | 
|  | 69 |  | 
|  | 70 | /* Each entry is identical: lower 12 bits of flags and upper 20 bits for the | 
|  | 71 | * "page frame number" (0 == first physical page, etc).  They are different | 
|  | 72 | * types so the compiler will warn us if we mix them improperly. */ | 
| Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 73 | typedef union { | 
|  | 74 | struct { unsigned flags:12, pfn:20; }; | 
|  | 75 | struct { unsigned long val; } raw; | 
|  | 76 | } spgd_t; | 
|  | 77 | typedef union { | 
|  | 78 | struct { unsigned flags:12, pfn:20; }; | 
|  | 79 | struct { unsigned long val; } raw; | 
|  | 80 | } spte_t; | 
|  | 81 | typedef union { | 
|  | 82 | struct { unsigned flags:12, pfn:20; }; | 
|  | 83 | struct { unsigned long val; } raw; | 
|  | 84 | } gpgd_t; | 
|  | 85 | typedef union { | 
|  | 86 | struct { unsigned flags:12, pfn:20; }; | 
|  | 87 | struct { unsigned long val; } raw; | 
|  | 88 | } gpte_t; | 
| Rusty Russell | bff672e | 2007-07-26 10:41:04 -0700 | [diff] [blame] | 89 |  | 
|  | 90 | /* We have two convenient macros to convert a "raw" value as handed to us by | 
|  | 91 | * the Guest into the correct Guest PGD or PTE type. */ | 
| Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 92 | #define mkgpte(_val) ((gpte_t){.raw.val = _val}) | 
|  | 93 | #define mkgpgd(_val) ((gpgd_t){.raw.val = _val}) | 
| Rusty Russell | bff672e | 2007-07-26 10:41:04 -0700 | [diff] [blame] | 94 | /*:*/ | 
| Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 95 |  | 
|  | 96 | struct pgdir | 
|  | 97 | { | 
|  | 98 | unsigned long cr3; | 
|  | 99 | spgd_t *pgdir; | 
|  | 100 | }; | 
|  | 101 |  | 
|  | 102 | /* This is a guest-specific page (mapped ro) into the guest. */ | 
|  | 103 | struct lguest_ro_state | 
|  | 104 | { | 
|  | 105 | /* Host information we need to restore when we switch back. */ | 
|  | 106 | u32 host_cr3; | 
|  | 107 | struct Xgt_desc_struct host_idt_desc; | 
|  | 108 | struct Xgt_desc_struct host_gdt_desc; | 
|  | 109 | u32 host_sp; | 
|  | 110 |  | 
|  | 111 | /* Fields which are used when guest is running. */ | 
|  | 112 | struct Xgt_desc_struct guest_idt_desc; | 
|  | 113 | struct Xgt_desc_struct guest_gdt_desc; | 
|  | 114 | struct i386_hw_tss guest_tss; | 
|  | 115 | struct desc_struct guest_idt[IDT_ENTRIES]; | 
|  | 116 | struct desc_struct guest_gdt[GDT_ENTRIES]; | 
|  | 117 | }; | 
|  | 118 |  | 
|  | 119 | /* We have two pages shared with guests, per cpu.  */ | 
|  | 120 | struct lguest_pages | 
|  | 121 | { | 
|  | 122 | /* This is the stack page mapped rw in guest */ | 
|  | 123 | char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; | 
|  | 124 | struct lguest_regs regs; | 
|  | 125 |  | 
|  | 126 | /* This is the host state & guest descriptor page, ro in guest */ | 
|  | 127 | struct lguest_ro_state state; | 
|  | 128 | } __attribute__((aligned(PAGE_SIZE))); | 
|  | 129 |  | 
|  | 130 | #define CHANGED_IDT		1 | 
|  | 131 | #define CHANGED_GDT		2 | 
|  | 132 | #define CHANGED_GDT_TLS		4 /* Actually a subset of CHANGED_GDT */ | 
|  | 133 | #define CHANGED_ALL	        3 | 
|  | 134 |  | 
|  | 135 | /* The private info the thread maintains about the guest. */ | 
|  | 136 | struct lguest | 
|  | 137 | { | 
|  | 138 | /* At end of a page shared mapped over lguest_pages in guest.  */ | 
|  | 139 | unsigned long regs_page; | 
|  | 140 | struct lguest_regs *regs; | 
|  | 141 | struct lguest_data __user *lguest_data; | 
|  | 142 | struct task_struct *tsk; | 
|  | 143 | struct mm_struct *mm; 	/* == tsk->mm, but that becomes NULL on exit */ | 
|  | 144 | u16 guestid; | 
|  | 145 | u32 pfn_limit; | 
|  | 146 | u32 page_offset; | 
|  | 147 | u32 cr2; | 
|  | 148 | int halted; | 
|  | 149 | int ts; | 
|  | 150 | u32 next_hcall; | 
|  | 151 | u32 esp1; | 
|  | 152 | u8 ss1; | 
|  | 153 |  | 
|  | 154 | /* Do we need to stop what we're doing and return to userspace? */ | 
|  | 155 | int break_out; | 
|  | 156 | wait_queue_head_t break_wq; | 
|  | 157 |  | 
|  | 158 | /* Bitmap of what has changed: see CHANGED_* above. */ | 
|  | 159 | int changed; | 
|  | 160 | struct lguest_pages *last_pages; | 
|  | 161 |  | 
|  | 162 | /* We keep a small number of these. */ | 
|  | 163 | u32 pgdidx; | 
|  | 164 | struct pgdir pgdirs[4]; | 
|  | 165 |  | 
|  | 166 | /* Cached wakeup: we hold a reference to this task. */ | 
|  | 167 | struct task_struct *wake; | 
|  | 168 |  | 
|  | 169 | unsigned long noirq_start, noirq_end; | 
|  | 170 | int dma_is_pending; | 
|  | 171 | unsigned long pending_dma; /* struct lguest_dma */ | 
|  | 172 | unsigned long pending_key; /* address they're sending to */ | 
|  | 173 |  | 
|  | 174 | unsigned int stack_pages; | 
|  | 175 | u32 tsc_khz; | 
|  | 176 |  | 
|  | 177 | struct lguest_dma_info dma[LGUEST_MAX_DMA]; | 
|  | 178 |  | 
|  | 179 | /* Dead? */ | 
|  | 180 | const char *dead; | 
|  | 181 |  | 
|  | 182 | /* The GDT entries copied into lguest_ro_state when running. */ | 
|  | 183 | struct desc_struct gdt[GDT_ENTRIES]; | 
|  | 184 |  | 
|  | 185 | /* The IDT entries: some copied into lguest_ro_state when running. */ | 
|  | 186 | struct desc_struct idt[FIRST_EXTERNAL_VECTOR+LGUEST_IRQS]; | 
|  | 187 | struct desc_struct syscall_idt; | 
|  | 188 |  | 
|  | 189 | /* Virtual clock device */ | 
|  | 190 | struct hrtimer hrt; | 
|  | 191 |  | 
|  | 192 | /* Pending virtual interrupts */ | 
|  | 193 | DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); | 
|  | 194 | }; | 
|  | 195 |  | 
|  | 196 | extern struct lguest lguests[]; | 
|  | 197 | extern struct mutex lguest_lock; | 
|  | 198 |  | 
|  | 199 | /* core.c: */ | 
|  | 200 | u32 lgread_u32(struct lguest *lg, unsigned long addr); | 
|  | 201 | void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val); | 
|  | 202 | void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len); | 
|  | 203 | void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len); | 
|  | 204 | int find_free_guest(void); | 
|  | 205 | int lguest_address_ok(const struct lguest *lg, | 
|  | 206 | unsigned long addr, unsigned long len); | 
|  | 207 | int run_guest(struct lguest *lg, unsigned long __user *user); | 
|  | 208 |  | 
|  | 209 |  | 
|  | 210 | /* interrupts_and_traps.c: */ | 
|  | 211 | void maybe_do_interrupt(struct lguest *lg); | 
|  | 212 | int deliver_trap(struct lguest *lg, unsigned int num); | 
|  | 213 | void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); | 
|  | 214 | void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); | 
|  | 215 | void pin_stack_pages(struct lguest *lg); | 
|  | 216 | void setup_default_idt_entries(struct lguest_ro_state *state, | 
|  | 217 | const unsigned long *def); | 
|  | 218 | void copy_traps(const struct lguest *lg, struct desc_struct *idt, | 
|  | 219 | const unsigned long *def); | 
|  | 220 | void guest_set_clockevent(struct lguest *lg, unsigned long delta); | 
|  | 221 | void init_clockdev(struct lguest *lg); | 
|  | 222 |  | 
|  | 223 | /* segments.c: */ | 
|  | 224 | void setup_default_gdt_entries(struct lguest_ro_state *state); | 
|  | 225 | void setup_guest_gdt(struct lguest *lg); | 
|  | 226 | void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num); | 
|  | 227 | void guest_load_tls(struct lguest *lg, unsigned long tls_array); | 
|  | 228 | void copy_gdt(const struct lguest *lg, struct desc_struct *gdt); | 
|  | 229 | void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt); | 
|  | 230 |  | 
|  | 231 | /* page_tables.c: */ | 
|  | 232 | int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); | 
|  | 233 | void free_guest_pagetable(struct lguest *lg); | 
|  | 234 | void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); | 
|  | 235 | void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 i); | 
|  | 236 | void guest_pagetable_clear_all(struct lguest *lg); | 
|  | 237 | void guest_pagetable_flush_user(struct lguest *lg); | 
|  | 238 | void guest_set_pte(struct lguest *lg, unsigned long cr3, | 
|  | 239 | unsigned long vaddr, gpte_t val); | 
|  | 240 | void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); | 
|  | 241 | int demand_page(struct lguest *info, unsigned long cr2, int errcode); | 
|  | 242 | void pin_page(struct lguest *lg, unsigned long vaddr); | 
|  | 243 |  | 
|  | 244 | /* lguest_user.c: */ | 
|  | 245 | int lguest_device_init(void); | 
|  | 246 | void lguest_device_remove(void); | 
|  | 247 |  | 
|  | 248 | /* io.c: */ | 
|  | 249 | void lguest_io_init(void); | 
|  | 250 | int bind_dma(struct lguest *lg, | 
|  | 251 | unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt); | 
|  | 252 | void send_dma(struct lguest *info, unsigned long key, unsigned long udma); | 
|  | 253 | void release_all_dma(struct lguest *lg); | 
|  | 254 | unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, | 
|  | 255 | unsigned long *interrupt); | 
|  | 256 |  | 
|  | 257 | /* hypercalls.c: */ | 
|  | 258 | void do_hypercalls(struct lguest *lg); | 
| Rusty Russell | 6c8dca5 | 2007-07-27 13:42:52 +1000 | [diff] [blame^] | 259 | void write_timestamp(struct lguest *lg); | 
| Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 260 |  | 
| Rusty Russell | dde7978 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 261 | /*L:035 | 
|  | 262 | * Let's step aside for the moment, to study one important routine that's used | 
|  | 263 | * widely in the Host code. | 
|  | 264 | * | 
|  | 265 | * There are many cases where the Guest does something invalid, like pass crap | 
|  | 266 | * to a hypercall.  Since only the Guest kernel can make hypercalls, it's quite | 
|  | 267 | * acceptable to simply terminate the Guest and give the Launcher a nicely | 
|  | 268 | * formatted reason.  It's also simpler for the Guest itself, which doesn't | 
|  | 269 | * need to check most hypercalls for "success"; if you're still running, it | 
|  | 270 | * succeeded. | 
|  | 271 | * | 
|  | 272 | * Once this is called, the Guest will never run again, so most Host code can | 
|  | 273 | * call this then continue as if nothing had happened.  This means many | 
|  | 274 | * functions don't have to explicitly return an error code, which keeps the | 
|  | 275 | * code simple. | 
|  | 276 | * | 
|  | 277 | * It also means that this can be called more than once: only the first one is | 
|  | 278 | * remembered.  The only trick is that we still need to kill the Guest even if | 
|  | 279 | * we can't allocate memory to store the reason.  Linux has a neat way of | 
|  | 280 | * packing error codes into invalid pointers, so we use that here. | 
|  | 281 | * | 
|  | 282 | * Like any macro which uses an "if", it is safely wrapped in a run-once "do { | 
|  | 283 | * } while(0)". | 
|  | 284 | */ | 
| Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 285 | #define kill_guest(lg, fmt...)					\ | 
|  | 286 | do {								\ | 
|  | 287 | if (!(lg)->dead) {					\ | 
|  | 288 | (lg)->dead = kasprintf(GFP_ATOMIC, fmt);	\ | 
|  | 289 | if (!(lg)->dead)				\ | 
|  | 290 | (lg)->dead = ERR_PTR(-ENOMEM);		\ | 
|  | 291 | }							\ | 
|  | 292 | } while(0) | 
| Rusty Russell | dde7978 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 293 | /* (End of aside) :*/ | 
| Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 294 |  | 
|  | 295 | static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) | 
|  | 296 | { | 
|  | 297 | return vaddr - lg->page_offset; | 
|  | 298 | } | 
|  | 299 | #endif	/* __ASSEMBLY__ */ | 
|  | 300 | #endif	/* _LGUEST_H */ |