diff --git a/Cargo.toml b/Cargo.toml
index da0706ea..c0613da1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,13 +15,17 @@ panic = "abort"
 lto = true
 
 [features]
-default = ["log-serial", "log-panic"]
+default = ["log-serial", "log-panic", "ram32", "rom"]
 # Have the log! macro write to serial output. Disabling this significantly
 # reduces code size, but makes debugging essentially impossible
 log-serial = []
 # Log panics to serial output. Disabling this (without disabling log-serial)
 # gets you most of the code size reduction, without losing _all_ debugging.
 log-panic = ["log-serial"]
+# Support launching the firmware from 32-bit unpaged protected mode.
+ram32 = []
+# Support builing the firmware as a BIOS ROM (i.e. starting in real mode).
+rom = ["ram32"]
 
 [dependencies]
 cpuio = "*"
diff --git a/layout.ld b/layout.ld
index 43da9791..be09730f 100644
--- a/layout.ld
+++ b/layout.ld
@@ -13,7 +13,7 @@ ram_max = 2M;
 /* Our stack grows down from ram_max. TODO: Add a guard for stack overflows. */
 stack_size = 64K;
 
-/* Pagetable locations loaded by Firecracker/cloud-hypervisor */
+/* Pagetable locations loaded by crosvm/Firecracker/cloud-hypervisor */
 pml4t = 0x9000;
 pml3t = 0xa000;
 
@@ -28,10 +28,20 @@ SECTIONS
 	.text   : {
 		*(.text .text.*)
 		*(.ram64)
+		*(.ram32)
 	} :text
 
 	firmware_ram_size = . - ram_min;
 
+	/* The ROM code must be at the end of the file (for the reset vector), */
+	/* and the filesize must a multiple of 64K to boot on QEMU. */
+	. = ALIGN(. + SIZEOF(.rom), 64K) - SIZEOF(.rom);
+	/* Avoid explictly setting a PHDR, allowing the linker to remove the .rom */
+	/* section if it's not emmitted (i.e. the "rom" cargo feature is off). */
+	.rom : { KEEP(*(.rom)) }
+
+	firmware_rom_size = . - ram_min;
+
 	/* Memory for identity mapping, keep synced with ADDRESS_SPACE_GIB */
 	address_space_gib = 4;
 	. = ALIGN(4K);
diff --git a/src/asm/ram32.s b/src/asm/ram32.s
new file mode 100644
index 00000000..ff2cb097
--- /dev/null
+++ b/src/asm/ram32.s
@@ -0,0 +1,60 @@
+.section .ram32, "ax"
+.code32
+
+ram32_start:
+    # Indicate (via serial) that we are executing out of RAM
+    movw $0x3f8, %dx
+    movb $'R', %al
+    outb %al, %dx
+
+setup_page_tables:
+    # First PML2 entry identity maps [0, 2 MiB)
+    movl $0b10000011, (pml2t) # huge (bit 7), writable (bit 1), present (bit 0)
+    # First PML3 entry points to PML2 table
+    movl $pml2t, %eax
+    orb  $0b00000011, %al # writable (bit 1), present (bit 0)
+    movl %eax, (pml3t)
+    # First PML4 entry points to PML3 table
+    movl $pml3t, %eax
+    orb  $0b00000011, %al # writable (bit 1), present (bit 0)
+    movl %eax, (pml4t)
+
+enable_paging:
+    # Load page table root into CR3
+    movl $pml4t, %eax
+    movl %eax, %cr3
+
+    # Set CR4.PAE (Physical Address Extension)
+    movl %cr4, %eax
+    orb  $0b00100000, %al # Set bit 5
+    movl %eax, %cr4
+    # Set EFER.LME (Long Mode Enable)
+    movl $0xC0000080, %ecx
+    rdmsr
+    orb  $0b00000001, %ah # Set bit 8
+    wrmsr
+    # Set CRO.PG (Paging)
+    movl %cr0, %eax
+    orl  $(1 << 31), %eax
+    movl %eax, %cr0
+
+    # Indicate (via serial) that we have enabled paging
+    movw $0x3f8, %dx
+    movb $'P', %al
+    outb %al, %dx
+
+jump_to_64bit:
+    # We are now in 32-bit compatibility mode. To enter 64-bit mode, we need to
+    # load a 64-bit code segment into our GDT.
+    lgdtl gdt64_ptr
+    # Set CS to a 64-bit segment and jump to 64-bit code.
+    ljmpl $(code64_desc - gdt64_start), $ram64_start
+
+gdt64_ptr:
+    .short gdt64_end - gdt64_start - 1 # GDT length is actually (length - 1)
+    .long gdt64_start
+gdt64_start:
+    .quad 0                                     # First descriptor is null
+code64_desc:
+    .quad (1<<43) | (1<<44) | (1<<47) | (1<<53) # Only these bits do anything
+gdt64_end:
diff --git a/src/asm/ram64.s b/src/asm/ram64.s
index 4f9ca392..d67d7aae 100644
--- a/src/asm/ram64.s
+++ b/src/asm/ram64.s
@@ -26,4 +26,4 @@ ram64_start:
 
 halt_loop:
     hlt
-    jmp halt_loop
\ No newline at end of file
+    jmp halt_loop
diff --git a/src/asm/rom.s b/src/asm/rom.s
new file mode 100644
index 00000000..6517fa2d
--- /dev/null
+++ b/src/asm/rom.s
@@ -0,0 +1,91 @@
+.section .rom, "ax"
+
+# This ROM will be mapped right at the end of the 32-bit address space, but the
+# linker assumes all code executes in RAM, and gives symbols addresses in that
+# range. To get around this, we manully compute ROM addresses.
+gdt32_addr32      = (1 << 32) - (rom_end - gdt32_start)
+rom32_addr32      = (1 << 32) - (rom_end - rom32_start)
+gdt32_ptr_addr16  = (1 << 16) - (rom_end - gdt32_ptr)
+
+gdt32_ptr:
+    .short gdt32_end - gdt32_start - 1 # GDT length is actually (length - 1)
+    .long gdt32_addr32
+# Note: Out GDT descriptors must be marked "accessed", or the processor will
+#       hang when it attempts to update them (as the gdt32 is in ROM).
+gdt32_start:
+    .quad 0          # First descriptor is always unused
+code32_desc: # base = 0x00000000, limit = 0xfffff x 4K
+    .short 0xffff    # limit[0..16] = 0xffff
+    .short 0x0000    # base [0..16] = 0x0000
+    .byte 0x00       # base[16..24] = 0x00
+    .byte 0b10011011 # present, DPL = 0, system, code seg, grows up, readable, accessed
+    .byte 0b11001111 # 4K gran, 32-bit, limit[16..20] = 0x1111 = 0xf
+    .byte 0x00       # base[24..32] = 0x00
+data32_desc: # base = 0x00000000, limit = 0xfffff x 4K
+    .short 0xffff    # limit 15:0
+    .short 0x0000    # base 15:0
+    .byte 0x00       # base[16..24] = 0x00
+    .byte 0b10010011 # present, DPL = 0, system, data seg, ring0 only, writable, accessed
+    .byte 0b11001111 # 4K gran, 32-bit, limit[16..20] = 0x1111 = 0xf
+    .byte 0x00       # base[24..32] = 0x00
+gdt32_end:
+
+.code32
+rom32_start:
+    # Now that we are in 32-bit mode, setup all the data segments to be 32-bit.
+    movw $(data32_desc - gdt32_start), %ax
+    movw %ax, %ds
+    movw %ax, %es
+    movw %ax, %ss
+    movw %ax, %fs
+    movw %ax, %gs
+
+    # The rest of the firmware assumes it executes from RAM in a region just
+    # above ram_min, so we copy all of that code into RAM and jump to it.
+    movl $ram_min, %edi
+    # Ideally we would define:
+    #   rom_min = (1 << 32) - firmware_rom_size
+    # above, and just do
+    #   movl $rom_min, %esi
+    # However, firmware_rom_size is not known until link time, so the assembler
+    # can't handle such code. Thus, the firmware has to do the addreess math.
+    xorl %esi, %esi
+    # For 32-bit registers: 0 - offset = (1 << 32) - offset
+    subl $firmware_rom_size, %esi
+    movl $firmware_ram_size, %ecx
+
+    # This code is essentially: memcpy(ram_min, rom_min, firmware_ram_size)
+    cld
+    rep movsb (%esi), (%edi)
+
+    # Jumping all that way from ROM (~4 GiB) to RAM (~1 MiB) is too far for a
+    # relative jump, so we use an aboslute jump.
+    movl $ram32_start, %eax
+    jmpl *%eax
+
+.code16
+rom16_protected:
+    # We are now in 16-bit protected mode, To enter 32-bit protected mode, we
+    # need to load 32-bit code/data segments into our GDT. The gdt32 in ROM is
+    # at too high of an address (4 GiB - offset) for the data segment to reach.
+    # So, we load gdt32 via the 16-bit code segement, using a 16-bit address.
+    movw  $gdt32_ptr_addr16, %bx
+    lgdtl %cs:(%bx)
+
+    # Set CS to a 32-bit segment and jump to 32-bit code.
+    ljmpl $(code32_desc - gdt32_start), $rom32_addr32
+
+.align 16
+reset_vector: # 0xffff_fff0
+    # This code must be 16 bytes or less, so be careful when adding anyting.
+    cli
+
+    # Set CRO.PE (Protected Mode Enable)
+    movl %cr0, %eax
+    orb  $0b00000001, %al # Set bit 0
+    movl %eax, %cr0
+
+    jmp rom16_protected
+
+.align 16
+rom_end: # 0x1_0000_0000
diff --git a/src/main.rs b/src/main.rs
index 47b1ca80..3342fae2 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -38,6 +38,10 @@ mod pci;
 mod pe;
 mod virtio;
 
+#[cfg(all(not(test), feature = "rom"))]
+global_asm!(include_str!("asm/rom.s"));
+#[cfg(all(not(test), feature = "ram32"))]
+global_asm!(include_str!("asm/ram32.s"));
 #[cfg(not(test))]
 global_asm!(include_str!("asm/ram64.s"));