From f56a384e98aa81065038c4e16f39ed989ccae687 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Jul 2007 10:41:05 -0700 Subject: lguest: documentation VII: FIXMEs Documentation: The FIXMEs Signed-off-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/lguest/interrupts_and_traps.c | 14 ++++++++++++++ drivers/lguest/io.c | 10 ++++++++++ drivers/lguest/lguest.c | 8 ++++++++ drivers/lguest/lguest_asm.S | 14 ++++++++++++++ drivers/lguest/page_tables.c | 5 +++++ drivers/lguest/segments.c | 4 ++++ 6 files changed, 55 insertions(+) (limited to 'drivers/lguest') diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 3d9830322646..bd0091bf79ec 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -231,6 +231,20 @@ static int direct_trap(const struct lguest *lg, * go direct, of course 8) */ return idt_type(trap->a, trap->b) == 0xF; } +/*:*/ + +/*M:005 The Guest has the ability to turn its interrupt gates into trap gates, + * if it is careful. The Host will let trap gates can go directly to the + * Guest, but the Guest needs the interrupts atomically disabled for an + * interrupt gate. It can do this by pointing the trap gate at instructions + * within noirq_start and noirq_end, where it can safely disable interrupts. */ + +/*M:006 The Guests do not use the sysenter (fast system call) instruction, + * because it's hardcoded to enter privilege level 0 and so can't go direct. + * It's about twice as fast as the older "int 0x80" system call, so it might + * still be worthwhile to handle it in the Switcher and lcall down to the + * Guest. The sysenter semantics are hairy tho: search for that keyword in + * entry.S :*/ /*H:260 When we make traps go directly into the Guest, we need to make sure * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c index da288128e44f..ea68613b43f6 100644 --- a/drivers/lguest/io.c +++ b/drivers/lguest/io.c @@ -553,6 +553,16 @@ void release_all_dma(struct lguest *lg) up_read(&lg->mm->mmap_sem); } +/*M:007 We only return a single DMA buffer to the Launcher, but it would be + * more efficient to return a pointer to the entire array of DMA buffers, which + * it can cache and choose one whenever it wants. + * + * Currently the Launcher uses a write to /dev/lguest, and the return value is + * the address of the DMA structure with the interrupt number placed in + * dma->used_len. If we wanted to return the entire array, we need to return + * the address, array size and interrupt number: this seems to require an + * ioctl(). :*/ + /*L:320 This routine looks for a DMA buffer registered by the Guest on the * given key (using the BIND_DMA hypercall). */ unsigned long get_dma_buffer(struct lguest *lg, diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 7e7e9fb3aefd..6dfe568523a2 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c @@ -250,6 +250,14 @@ static void irq_enable(void) { lguest_data.irq_enabled = X86_EFLAGS_IF; } +/*:*/ +/*M:003 Note that we don't check for outstanding interrupts when we re-enable + * them (or when we unmask an interrupt). This seems to work for the moment, + * since interrupts are rare and we'll just get the interrupt on the next timer + * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way + * would be to put the "irq_enabled" field in a page by itself, and have the + * Host write-protect it when an interrupt comes in when irqs are disabled. + * There will then be a page fault as soon as interrupts are re-enabled. :*/ /*G:034 * The Interrupt Descriptor Table (IDT). diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S index 3126ae923cc0..f182c6a36209 100644 --- a/drivers/lguest/lguest_asm.S +++ b/drivers/lguest/lguest_asm.S @@ -39,6 +39,20 @@ LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) .global lguest_noirq_start .global lguest_noirq_end +/*M:004 When the Host reflects a trap or injects an interrupt into the Guest, + * it sets the eflags interrupt bit on the stack based on + * lguest_data.irq_enabled, so the Guest iret logic does the right thing when + * restoring it. However, when the Host sets the Guest up for direct traps, + * such as system calls, the processor is the one to push eflags onto the + * stack, and the interrupt bit will be 1 (in reality, interrupts are always + * enabled in the Guest). + * + * This turns out to be harmless: the only trap which should happen under Linux + * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc + * regions), which has to be reflected through the Host anyway. If another + * trap *does* go off when interrupts are disabled, the Guest will panic, and + * we'll never get to this iret! :*/ + /*G:045 There is one final paravirt_op that the Guest implements, and glancing * at it you can see why I left it to last. It's *cool*! It's in *assembler*! * diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index cd047e81cd63..b7a924ace684 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -15,6 +15,11 @@ #include #include "lg.h" +/*M:008 We hold reference to pages, which prevents them from being swapped. + * It'd be nice to have a callback in the "struct mm_struct" when Linux wants + * to swap out. If we had this, and a shrinker callback to trim PTE pages, we + * could probably consider launching Guests as non-root. :*/ + /*H:300 * The Page Table Code * diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 4d4e5a4586f9..f675a41a80da 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c @@ -94,6 +94,10 @@ static void check_segment_use(struct lguest *lg, unsigned int desc) || lg->regs->ss / 8 == desc) kill_guest(lg, "Removed live GDT entry %u", desc); } +/*:*/ +/*M:009 We wouldn't need to check for removal of in-use segments if we handled + * faults in the Switcher. However, it's probably not a worthwhile + * optimization. :*/ /*H:610 Once the GDT has been changed, we look through the changed entries and * see if they're OK. If not, we'll call kill_guest() and the Guest will never -- cgit v1.2.3-55-g7522