Added basic support for CPU-local data

This is a fairly naive implementation, where we allocate a per-CPU region from CPU_SPACE for each CPU (which is currently just the first CPU, as we don't have multi-CPU support yet), then we store a pointer to the beginning of that space in the GS base. To load the per-CPU data, we simply read the address back out of the GS base and dereference it to access the data. This isn't as efficient as the approach being discussed in rust-osdev/x86_64#257, but it works well enough for now. Signed-off-by: SlyMarbo <[email protected]>
ProjectSerenity · Dec 2, 2021 · be0b250 · be0b250
1 parent 9a02520
commit be0b250
Show file tree

Hide file tree

Showing 6 changed files with 172 additions and 51 deletions.
diff --git a/kernel/src/gdt.rs b/kernel/src/gdt.rs
@@ -7,7 +7,7 @@
 // handler.
 
 use lazy_static::lazy_static;
-use x86_64::instructions::segmentation::{Segment, CS};
+use x86_64::instructions::segmentation::{Segment, CS, GS};
 use x86_64::instructions::tables::load_tss;
 use x86_64::structures::gdt::{Descriptor, GlobalDescriptorTable, SegmentSelector};
 use x86_64::structures::tss::TaskStateSegment;
@@ -20,6 +20,7 @@ pub fn init() {
     GDT.0.load();
     unsafe {
         CS::set_reg(GDT.1.code_selector);
+        GS::set_reg(GDT.1.cpu_selector);
         load_tss(GDT.1.tss_selector);
     }
 }
@@ -29,11 +30,13 @@ lazy_static! {
         let mut gdt = GlobalDescriptorTable::new();
         let code_selector = gdt.add_entry(Descriptor::kernel_code_segment());
         let tss_selector = gdt.add_entry(Descriptor::tss_segment(&TSS));
+        let cpu_selector = gdt.add_entry(Descriptor::kernel_data_segment());
         (
             gdt,
             Selectors {
                 code_selector,
                 tss_selector,
+                cpu_selector,
             },
         )
     };
@@ -42,6 +45,7 @@ lazy_static! {
 struct Selectors {
     code_selector: SegmentSelector,
     tss_selector: SegmentSelector,
+    cpu_selector: SegmentSelector,
 }
 
 // Set up the task state segment with a safe

diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs
@@ -31,7 +31,8 @@
 
 extern crate alloc;
 
-use crate::multitasking::thread;
+use crate::memory::KERNEL_STACK_0;
+use crate::multitasking::{cpu_local, thread};
 use bootloader::BootInfo;
 use core::panic::PanicInfo;
 use lazy_static::lazy_static;
@@ -63,6 +64,7 @@ pub fn init(boot_info: &'static BootInfo) {
 
     // Set up the heap allocator.
     unsafe { memory::init(boot_info) };
+    cpu_local::init(cpu_local::CpuId::new(), &KERNEL_STACK_0);
     thread::init();
 }
 

diff --git a/kernel/src/main.rs b/kernel/src/main.rs
@@ -72,7 +72,7 @@ fn kmain() {
 
     // We're now executing as the idle
     // thread.
-    thread::idle_thread();
+    thread::idle_loop();
 }
 
 fn debug_threading() -> ! {

diff --git a/kernel/src/multitasking/cpu_local.rs b/kernel/src/multitasking/cpu_local.rs
@@ -0,0 +1,154 @@
+//! cpu_local provides functionality to access a different copy of the same
+//! structure on each CPU. This is used to track data like the CPU ID and
+//! the currently-executing thread.
+
+// For now, we take a simple but slightly inefficient approach, where we
+// allocate a copy of the CpuData struct in the CPU-local address space
+// and store a pointer to it in the GS base. To access the data, we use
+// a wrapper function to retrieve the pointer from the GS base, casting
+// it to the right type, then access the data as usual.
+//
+// This is less efficient than using offsets from the GS base directly
+// in assembly, as described here[1], but it's much simpler to implement.
+// If rust-osdev/x86_64#257 is merged, that will probably be used to
+// replace this module.
+//
+// [1]: https://github.com/rust-osdev/x86_64/pull/257#issuecomment-849514649
+
+use crate::memory::{kernel_pml4, pmm, VirtAddrRange, CPU_LOCAL};
+use crate::multitasking::thread::Thread;
+use alloc::sync::Arc;
+use core::mem::size_of;
+use core::sync::atomic::{AtomicU64, Ordering};
+use x86_64::registers::model_specific::GsBase;
+use x86_64::structures::paging::{
+    FrameAllocator, Mapper, Page, PageSize, PageTableFlags, Size4KiB,
+};
+
+/// init prepares the current CPU's local
+/// data using the given CPU ID and stack
+/// space.
+///
+pub fn init(cpu_id: CpuId, stack_space: &VirtAddrRange) {
+    if cpu_id.0 != 0 {
+        unimplemented!("additional CPUs not implemented: no idle stack space");
+    }
+
+    // Next, work out where we will store our CpuId
+    // data. We align up to page size to make paging
+    // easier.
+    let size = align_up(size_of::<CpuId>(), Size4KiB::SIZE as usize) as u64;
+    let start = CPU_LOCAL.start() + cpu_id.as_u64() * size;
+    let end = start + size;
+
+    // The page addresses should already be aligned,
+    // so we shouldn't get any panics here.
+    let start_page = Page::from_start_address(start).expect("bad start address");
+    let end_page = Page::from_start_address(end).expect("bad end address");
+
+    // Map our per-CPU address space.
+    let mut mapper = unsafe { kernel_pml4() };
+    let mut frame_allocator = pmm::ALLOCATOR.lock();
+    for page in Page::range(start_page, end_page) {
+        let frame = frame_allocator
+            .allocate_frame()
+            .expect("failed to allocate for per-CPU data");
+
+        let flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::NO_EXECUTE;
+        unsafe {
+            mapper
+                .map_to(page, frame, flags, &mut *frame_allocator)
+                .expect("failed to map per-CPU data")
+                .flush()
+        };
+    }
+
+    // Store the pointer to the CpuData in the GS base.
+    GsBase::write(start);
+
+    // Create our idle thread.
+    let idle = Thread::new_idle_thread(stack_space);
+
+    // Initialise the CpuData from a pointer at the
+    // start of the address space.
+    let cpu_data = start.as_mut_ptr() as *mut CpuData;
+    unsafe {
+        cpu_data.write(CpuData {
+            id: cpu_id,
+            idle_thread: idle.clone(),
+            current_thread: idle,
+        });
+    }
+}
+
+// Helper functions to expose the CPU data.
+
+/// cpu_data is our helper function to get
+/// the pointer to the CPU data from the
+/// GS register.
+///
+unsafe fn cpu_data() -> &'static mut CpuData {
+    let ptr = GsBase::read();
+
+    &mut *(ptr.as_mut_ptr() as *mut CpuData)
+}
+
+/// cpu_id returns this CPU's unique ID.
+///
+pub fn cpu_id() -> CpuId {
+    unsafe { cpu_data() }.id
+}
+
+/// idle_thread returns this CPU's idle thread.
+///
+pub fn idle_thread() -> Arc<Thread> {
+    unsafe { cpu_data() }.idle_thread.clone()
+}
+
+/// current_thread returns the currently executing thread.
+///
+pub fn current_thread() -> Arc<Thread> {
+    unsafe { cpu_data() }.current_thread.clone()
+}
+
+/// set_current_thread overwrites the currently executing
+/// thread.
+///
+pub fn set_current_thread(thread: Arc<Thread>) {
+    unsafe { cpu_data() }.current_thread = thread;
+}
+
+/// align_up aligns the given address upwards to alignment align.
+///
+/// Requires that align is a power of two.
+///
+fn align_up(addr: usize, align: usize) -> usize {
+    (addr + align - 1) & !(align - 1)
+}
+
+/// CpuId uniquely identifies a CPU core.
+///
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct CpuId(u64);
+
+impl CpuId {
+    /// new allocates and returns the next available
+    /// CpuId.
+    ///
+    pub fn new() -> Self {
+        static NEXT_CPU_ID: AtomicU64 = AtomicU64::new(0);
+        CpuId(NEXT_CPU_ID.fetch_add(1, Ordering::Relaxed))
+    }
+
+    pub const fn as_u64(&self) -> u64 {
+        self.0
+    }
+}
+
+// CpuData contains the data specific to an individual CPU core.
+//
+struct CpuData {
+    id: CpuId,
+    idle_thread: Arc<Thread>,
+    current_thread: Arc<Thread>,
+}
diff --git a/kernel/src/multitasking/mod.rs b/kernel/src/multitasking/mod.rs
@@ -1,5 +1,6 @@
 //! multitasking contains the functionality used to implement cooperative and
 //! preemptive multitasking.
 
+pub mod cpu_local;
 pub mod task;
 pub mod thread;
diff --git a/kernel/src/multitasking/thread/mod.rs b/kernel/src/multitasking/thread/mod.rs
@@ -1,9 +1,9 @@
 //! thread implements preemptive multitasking, using threads of execution.
 
-use crate::memory::{new_kernel_stack, StackBounds, KERNEL_STACK_0};
+use crate::memory::{new_kernel_stack, StackBounds, VirtAddrRange};
+use crate::multitasking::cpu_local::{current_thread, idle_thread, set_current_thread};
 use crate::multitasking::thread::scheduler::Scheduler;
 use crate::println;
-use crate::utils::lazy::Lazy;
 use crate::utils::once::Once;
 use alloc::collections::BTreeMap;
 use alloc::sync::Arc;
@@ -15,17 +15,6 @@ use crossbeam::atomic::AtomicCell;
 mod scheduler;
 mod switch;
 
-/// IDLE_THREAD is the idle kernel thread, which we
-/// only ever execute if no other threads are runnable.
-///
-/// Note that this is not thread-safe, as we haven't
-/// set up thread-local storage yet. However, as we
-/// haven't set up multiple CPUs yet either, it's
-/// safe in practice. We still have to use unsafe
-/// to access it, but for now that will have to do.
-///
-pub static mut IDLE_THREAD: Lazy<Arc<Thread>> = Lazy::new();
-
 type ThreadTable = BTreeMap<ThreadId, Arc<Thread>>;
 
 /// THREADS stores all living threads, referencing them by
@@ -39,23 +28,6 @@ static THREADS: spin::Mutex<ThreadTable> = spin::Mutex::new(BTreeMap::new());
 ///
 static SCHEDULER: Once<spin::Mutex<Scheduler>> = Once::new();
 
-/// CURRENT is the currently executing thread, which
-/// is initialised in init().
-///
-/// Note that this is not thread-safe, as we haven't
-/// set up thread-local storage yet. However, as we
-/// haven't set up multiple CPUs yet either, it's
-/// safe in practice. We still have to use unsafe
-/// to access it, but for now that will have to do.
-///
-static mut CURRENT: Lazy<Arc<Thread>> = Lazy::new();
-
-/// current_thread returns the currently executing thread.
-///
-pub fn current_thread() -> &'static Arc<Thread> {
-    unsafe { CURRENT.get() }
-}
-
 /// DEAD_STACKS is a free list of kernel stacks that
 /// have been released by kernel threads that have
 /// exited.
@@ -72,11 +44,11 @@ pub fn current_thread() -> &'static Arc<Thread> {
 ///
 static DEAD_STACKS: spin::Mutex<Vec<StackBounds>> = spin::Mutex::new(Vec::new());
 
-/// idle_thread implements the idle thread. We
+/// idle_loop implements the idle thread. We
 /// fall back to this if the kernel has no other
 /// work left to do.
 ///
-pub fn idle_thread() -> ! {
+pub fn idle_loop() -> ! {
     println!("Kernel entering the idle thread.");
     loop {
         println!("Shutting down.");
@@ -101,18 +73,6 @@ const DEFAULT_RFLAGS: u64 = 0x2;
 /// init prepares the thread scheduler.
 ///
 pub fn init() {
-    // Set up the idle thread so that when
-    // kmain hands over to the scheduler,
-    // the idle thread inherits the kernel's
-    // initial stack and has something to
-    // fall back to if no other threads
-    // are runnable.
-    let idle = Thread::new_idle_thread();
-    unsafe {
-        CURRENT.set(idle.clone());
-        IDLE_THREAD.set(idle);
-    }
-
     SCHEDULER.init(|| spin::Mutex::new(Scheduler::new()));
 }
 
@@ -135,7 +95,7 @@ pub fn switch() {
 
         match scheduler.next() {
             Some(thread) => THREADS.lock().get(&thread).unwrap().clone(),
-            None => unsafe { IDLE_THREAD.get().clone() },
+            None => idle_thread(),
         }
     };
 
@@ -153,7 +113,7 @@ pub fn switch() {
     let new_stack_pointer = next.stack_pointer.get();
 
     // Switch into the next thread.
-    unsafe { CURRENT.set(next.clone()) };
+    set_current_thread(next.clone());
     unsafe { switch::switch_stack(current_stack_pointer, new_stack_pointer) };
 }
 
@@ -257,7 +217,7 @@ impl Thread {
     /// new_idle_thread creates a new kernel thread, to
     /// which we fall back if no other threads are runnable.
     ///
-    fn new_idle_thread() -> Arc<Thread> {
+    pub fn new_idle_thread(stack_space: &VirtAddrRange) -> Arc<Thread> {
         // The idle thread always has thread id 0, which
         // is otherwise invalid.
         let id = ThreadId(0);
@@ -273,7 +233,7 @@ impl Thread {
         // We inherit the kernel's initial stack, although
         // we never access any data previously stored
         // on the stack.
-        let stack_bounds = Some(StackBounds::from(&KERNEL_STACK_0));
+        let stack_bounds = Some(StackBounds::from(stack_space));
 
         let thread = Arc::new(Thread {
             id,