Post by Peter LeiSome recent UEFI implementations have begun to leave the CPU with page
write protection enabled in CR0.
With r330539 which enables kernel page protections, interesting things
happen during boot (aka panic) when protection is already enabled,
including a write protection fault from an explicit .text fixup write
from xsave->xsaveopt by fpuinit().
- QEMU with recent OVMF EDK2 builds: this is certainly due to UEFI
enabling paging and page protections.
- VMWare Fusion 10.1.x on Mac: no specific insight on what's going
inside the implementation, but CR0_WP is definitely left enabled before
the kernel is booted.
I have patched my kernel build to explicitly clear CR0_WP (e.g. in
initializecpu) prior to creating the page tables to get around this, but
someone might have a cleaner/better solution...
Try this.
diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c
index 9dfd44cf82c..1ecec02835c 100644
--- a/sys/amd64/amd64/db_interface.c
+++ b/sys/amd64/amd64/db_interface.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/pcpu.h>
#include <machine/cpufunc.h>
+#include <machine/md_var.h>
#include <machine/specialreg.h>
#include <ddb/ddb.h>
@@ -75,19 +76,19 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data)
jmp_buf jb;
void *prev_jb;
char *dst;
- u_long cr0save;
+ bool old_wp;
int ret;
- cr0save = rcr0();
+ old_wp = false;
prev_jb = kdb_jmpbuf(jb);
ret = setjmp(jb);
if (ret == 0) {
- load_cr0(cr0save & ~CR0_WP);
+ old_wp = disable_wp();
dst = (char *)addr;
while (size-- > 0)
*dst++ = *data++;
}
- load_cr0(cr0save);
+ restore_wp(old_wp);
(void)kdb_jmpbuf(prev_jb);
return (ret);
}
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 72b10396341..39367fa6ffb 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -205,6 +205,7 @@ fpuinit_bsp1(void)
{
u_int cp[4];
uint64_t xsave_mask_user;
+ bool old_wp;
if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
use_xsave = 1;
@@ -233,8 +234,14 @@ fpuinit_bsp1(void)
* Patch the XSAVE instruction in the cpu_switch code
* to XSAVEOPT. We assume that XSAVE encoding used
* REX byte, and set the bit 4 of the r/m byte.
+ *
+ * It seems that some BIOSes give control to the OS
+ * with CR0.WP already set, making the kernel text
+ * read-only before cpu_startup().
*/
+ old_wp = disable_wp();
ctx_switch_xsave[3] |= 0x10;
+ restore_wp(old_wp);
}
}
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c
index 68eb6002593..f7ca3c07ea3 100644
--- a/sys/amd64/amd64/gdb_machdep.c
+++ b/sys/amd64/amd64/gdb_machdep.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <machine/frame.h>
#include <machine/gdb_machdep.h>
+#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/reg.h>
@@ -127,17 +128,14 @@ gdb_cpu_signal(int type, int code)
void *
gdb_begin_write(void)
{
- u_long cr0save;
- cr0save = rcr0();
- load_cr0(cr0save & ~CR0_WP);
- return ((void *)cr0save);
+ return (disable_wp() ? &gdb_begin_write : NULL);
}
void
gdb_end_write(void *arg)
{
- load_cr0((u_long)arg);
+ restore_wp(arg != NULL);
}
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e340c6cd14d..fcc45eca57d 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -2597,6 +2597,31 @@ clear_pcb_flags(struct pcb *pcb, const u_int flags)
: "cc", "memory");
}
+/*
+ * Enable and restore kernel text write permissions.
+ * Callers must ensure that disable_wp()/restore_wp() are executed
+ * without rescheduling on the same core.
+ */
+bool
+disable_wp(void)
+{
+ u_int cr0;
+
+ cr0 = rcr0();
+ if ((cr0 & CR0_WP) == 0)
+ return (false);
+ load_cr0(cr0 & ~CR0_WP);
+ return (true);
+}
+
+void
+restore_wp(bool old_wp)
+{
+
+ if (old_wp)
+ load_cr0(rcr0() | CR0_WP);
+}
+
#ifdef KDB
/*
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 63dabaf4047..abcc273b6c6 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -53,6 +53,8 @@ void amd64_conf_fast_syscall(void);
void amd64_db_resume_dbreg(void);
void amd64_lower_shared_page(struct sysentvec *);
void amd64_syscall(struct thread *td, int traced);
+bool disable_wp(void);
+void restore_wp(bool old_wp);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
void ld_ds(void) __asm(__STRING(ld_ds));