feat: added SIMD

This commit is contained in:
Jordan ⌨️ 2023-12-15 22:31:51 +01:00
parent 0e61118fdf
commit 6a90e556c4
8 changed files with 335 additions and 13 deletions

View file

@ -1,5 +1,21 @@
#include "asm.h" #include "asm.h"
void out8(uint16_t port, uint8_t value)
{
asm volatile("outb %0, %1"
:
: "a"(value), "Nd"(port));
}
uint8_t in8(uint16_t port)
{
uint8_t value;
asm volatile("inb %1, %0"
: "=a"(value)
: "Nd"(port));
return value;
}
void hal_disable_interrupts(void) void hal_disable_interrupts(void)
{ {
asm volatile("cli"); asm volatile("cli");
@ -36,4 +52,45 @@ uint64_t asm_read_msr(uint64_t msr)
return ((uint64_t)high << 32) | low; return ((uint64_t)high << 32) | low;
} }
void asm_write_xcr(uint32_t xcr, uint64_t value)
{
uint32_t edx = value >> 32;
uint32_t eax = (uint32_t)value;
asm volatile("xsetbv"
:
: "a"(eax), "d"(edx), "c"(xcr)
: "memory");
}
uint64_t asm_read_xcr(uint32_t xcr)
{
uint32_t eax, edx;
asm volatile("xgetbv"
: "=a"(eax), "=d"(edx)
: "c"(xcr)
: "memory");
return eax | ((uint64_t)edx << 32);
}
void asm_xsave(uint8_t *region)
{
asm volatile("xsave %0" ::"m"(*region), "a"(~(uintptr_t)0), "d"(~(uintptr_t)0)
: "memory");
}
void asm_xrstor(uint8_t *region)
{
asm volatile("xrstor %0" ::"m"(*region), "a"(~(uintptr_t)0), "d"(~(uintptr_t)0)
: "memory");
}
void asm_fxsave(void *region)
{
asm volatile("fxsave (%0)" ::"a"(region));
}
void asm_fxrstor(void *region)
{
asm volatile("fxrstor (%0)" ::"a"(region));
}

View file

@ -2,6 +2,10 @@
#include <stdint.h> #include <stdint.h>
void out8(uint16_t port, uint8_t value);
uint8_t in8(uint16_t port);
#define asm_read_cr(n, reg) asm volatile("mov %%cr" #n ", %0" \ #define asm_read_cr(n, reg) asm volatile("mov %%cr" #n ", %0" \
: "=r"(reg)) : "=r"(reg))
@ -31,3 +35,15 @@ enum msr_star_reg
void asm_write_msr(uint64_t msr, uint64_t value); void asm_write_msr(uint64_t msr, uint64_t value);
uint64_t asm_read_msr(uint64_t msr); uint64_t asm_read_msr(uint64_t msr);
void asm_write_xcr(uint32_t xcr, uint64_t value);
uint64_t asm_read_xcr(uint32_t xcr);
void asm_xsave(uint8_t *region);
void asm_xrstor(uint8_t *region);
void asm_fxrstor(void *region);
void asm_fxsave(void *region);

View file

@ -0,0 +1,84 @@
#pragma once
#include <stdint.h>
enum CRO : uint64_t
{
CR0_PROTECTED_MODE = 1 << 0,
CR0_MONITOR_COPROCESSOR = 1 << 1,
CR0_EMULATION = 1 << 2,
CR0_TASK_SWITCHED = 1 << 3,
CR0_EXTENSION_TYPE = 1 << 4,
CR0_NUMERIC_ERROR = 1 << 5,
CR0_WRITE_PROTECT = 1 << 16,
CR0_ALIGNMENT_MASK = 1 << 18,
CR0_NOT_WRITE_THROUGH = 1 << 29,
CR0_CACHE_DISABLE = 1 << 30,
CR0_PAGING = 1 << 31,
};
enum CR4 : uint64_t
{
CR4_VIRTUAL_MODE = 1 << 0,
CR4_PROTECTED_MODE_VIRTUAL_INTERRUPTS = 1 << 1,
CR4_TIME_STAMP_DISABLE = 1 << 2,
CR4_DEBUGGING_EXTENSIONS = 1 << 3,
CR4_PAGE_SIZE_EXTENSIONS = 1 << 4,
CR4_PHYSICAL_ADDRESS_EXTENSION = 1 << 5,
CR4_MACHINE_CHECK_ENABLE = 1 << 6,
CR4_PAGE_GLOBAL_ENABLE = 1 << 7,
CR4_PERFORMANCE_MONITORING_COUNTER_ENABLE = 1 << 8,
CR4_OSFXSR = 1 << 9,
CR4_OSXMMEXCPT_ENABLE = 1 << 10,
CR4_USER_MODE_INSTRUCTION_PREVENTION = 1 << 11,
CR4_VIRTUAL_MACHINE_EXTENSIONS = 1 << 13,
CR4_SAFER_MODE_EXTENSIONS = 1 << 14,
CR4_FGSBASE_ENABLED = 1 << 16,
CR4_PCIDE_ENABLE = 1 << 17,
CR4_OSXSAVE_ENABLE = 1 << 18,
CR4_SMEP_ENABLE = 1 << 20,
CR4_SMAP_ENABLE = 1 << 21,
CR4_PROTECTION_KEY_ENABLE = 1 << 22,
CR4_CONTROL_FLOW_ENFORCEMENT = 1 << 23,
CR4_PROTECTION_KEY_SUPERVISOR_PAGE = 1 << 24,
};
enum XCR0 : uint64_t
{
XCR0_x87 = 1 << 0,
XCR0_SSE = 1 << 1,
XCR0_AVX = 1 << 2,
XCR0_BNDREGS = 1 << 3,
XCR0_BNDCSR = 1 << 4,
XCR0_OPMASK = 1 << 5,
XCR0_ZMM_Hi256 = 1 << 6,
XCR0_Hi16_ZMM = 1 << 7,
XCR0_PKRU = 1 << 9,
};
enum RFlags : uint64_t
{
RFLAGS_CARRY = 1 << 0,
RFLAGS_RESERVED1 = 1 << 1,
RFLAGS_PARITY = 1 << 2,
RFLAGS_RESERVED2 = 1 << 3,
RFLAGS_AUXILIARY_CARRY = 1 << 4,
RFLAGS_RESERVED3 = 1 << 5,
RFLAGS_ZERO = 1 << 6,
RFLAGS_SIGN = 1 << 7,
RFLAGS_TRAP = 1 << 8,
RFLAGS_INTERRUPT_ENABLE = 1 << 9,
RFLAGS_DIRECTION = 1 << 10,
RFLAGS_OVERFLOW = 1 << 11,
RFLAGS_IO_PRIVILEGE_LEVEL = 1 << 12,
RFLAGS_NESTED_TASK = 1 << 14,
RFLAGS_RESERVED4 = 1 << 15,
RFLAGS_RESUME = 1 << 16,
RFLAGS_VIRTUAL_8086_MODE = 1 << 17,
RFLAGS_ALIGNMENT_CHECK = 1 << 18,
RFLAGS_VIRTUAL_INTERRUPT = 1 << 19,
RFLAGS_VIRTUAL_INTERRUPT_PENDING = 1 << 20,
RFLAGS_ID = 1 << 21,
RFLAGS_RESERVED5 = 1 << 22,
};

View file

@ -1,6 +1,6 @@
#include "cpuid.h" #include "cpuid.h"
CpuidResult cpuid(uint32_t leaf, uint32_t subleaf) static CpuidResult cpuid(uint32_t leaf, uint32_t subleaf)
{ {
uint32_t cpuid_max; uint32_t cpuid_max;
CpuidResult result; CpuidResult result;
@ -36,3 +36,44 @@ bool cpuid_has_1gb_pages(void)
return result.edx & CPUID_EXFEATURE_PDPE1GB; return result.edx & CPUID_EXFEATURE_PDPE1GB;
} }
bool cpuid_has_xsave(void)
{
CpuidResult result = cpuid(CPUID_FEATURE_IDENTIFIER, 0);
if (!result.success)
{
return false;
}
return result.ecx & CPUID_XSAVE_SUPPORT;
}
bool cpuid_has_avx(void)
{
CpuidResult result = cpuid(CPUID_FEATURE_IDENTIFIER, 0);
if (!result.success)
{
return false;
}
return result.ecx & CPUID_AVX_SUPPORT;
}
bool cpuid_has_avx512(void)
{
CpuidResult result = cpuid(CPUID_EXTENDED_FEATURE_IDENTIFIER, 0);
if (!result.success)
{
return false;
}
return result.ebx & CPUID_AVX512_SUPPORT;
}
size_t cpuid_xsave_size(void)
{
return cpuid(CPUID_PROC_EXTENDED_STATE_ENUMERATION, 0).ecx;
}

View file

@ -1,14 +1,16 @@
#pragma once #pragma once
#include <stddef.h>
#include <stdint.h> #include <stdint.h>
#define CPUID_EXTENDED_LEAF (0x80000001) #define CPUID_EXTENDED_LEAF (0x80000001)
#define CPUID_EXFEATURE_PDPE1GB (1 << 26) #define CPUID_EXFEATURE_PDPE1GB (1 << 26)
#define CPUID_SSE_SUPPORT (1 << 25)
#define CPUID_SSE2_SUPPORT (1 << 26)
#define CPUID_XSAVE_SUPPORT (1 << 26) #define CPUID_XSAVE_SUPPORT (1 << 26)
#define CPUID_AVX_SUPPORT (1 << 28)
#define CPUID_AVX512_SUPPORT (1 << 16)
#define CPUID_FEATURE_IDENTIFIER (0x1) #define CPUID_FEATURE_IDENTIFIER (0x1)
#define CPUID_EXTENDED_FEATURE_IDENTIFIER (0x7)
#define CPUID_PROC_EXTENDED_STATE_ENUMERATION (0xD)
typedef struct typedef struct
{ {
uint32_t eax; uint32_t eax;
@ -19,9 +21,8 @@ typedef struct
bool success; bool success;
} CpuidResult; } CpuidResult;
CpuidResult cpuid(uint32_t leaf, uint32_t subleaf);
bool cpuid_has_1gb_pages(void); bool cpuid_has_1gb_pages(void);
bool cpuid_has_sse(void);
bool cpuid_has_sse2(void);
bool cpuid_has_xsave(void); bool cpuid_has_xsave(void);
bool cpuid_has_avx(void);
bool cpuid_has_avx512(void);
size_t cpuid_xsave_size(void);

View file

@ -8,6 +8,7 @@
#include "paging.h" #include "paging.h"
#include "hpet.h" #include "hpet.h"
#include "apic.h" #include "apic.h"
#include "simd.h"
Stream hal_dbg_stream(void) Stream hal_dbg_stream(void)
{ {
@ -24,6 +25,7 @@ Res hal_setup(void)
acpi_init(); acpi_init();
try$(hpet_init()); try$(hpet_init());
try$(apic_init()); try$(apic_init());
simd_init();
return ok$(); return ok$();
} }

View file

@ -0,0 +1,108 @@
#include <dbg/log.h>
#include <stdint.h>
#include <string.h>
#include "asm.h"
#include "cpu.h"
#include "cpuid.h"
#include "simd.h"
#include "../../core/pmm.h"
static _Alignas(PMM_PAGE_SIZE) uint8_t simd_initial_context[PMM_PAGE_SIZE] = {};
void simd_init(void)
{
uint64_t cr0;
uint64_t cr4;
uint64_t xcr0 = XCR0_x87 | XCR0_SSE;
bool xcr0_compat = true;
asm_read_cr(0, cr0);
cr0 &= ~CR0_EMULATION;
cr0 |= CR0_MONITOR_COPROCESSOR;
cr0 |= CR0_NUMERIC_ERROR;
asm_write_cr(0, cr0);
asm_read_cr(4, cr4);
cr4 |= CR4_OSFXSR;
cr4 |= CR4_OSXMMEXCPT_ENABLE;
if (cpuid_has_xsave())
{
log$("XSAVE is supported");
cr4 |= CR4_OSXSAVE_ENABLE;
}
else
{
xcr0_compat = false;
}
asm_write_cr(4, cr4);
if (cpuid_has_avx())
{
log$("AVX is supported");
xcr0 |= XCR0_AVX;
}
if (cpuid_has_avx512())
{
log$("AVX512 is supported");
xcr0 |= XCR0_OPMASK;
xcr0 |= XCR0_ZMM_Hi256;
xcr0 |= XCR0_Hi16_ZMM;
}
if (xcr0_compat)
{
asm_write_xcr(0, xcr0);
}
asm volatile("fninit");
simd_context_save(simd_initial_context);
log$("SIMD initialized");
}
void simd_context_save(void *ptr)
{
if (cpuid_has_xsave())
{
asm_xsave(ptr);
}
else
{
asm_fxsave(ptr);
}
}
size_t simd_context_size(void)
{
if (cpuid_has_xsave())
{
return cpuid_xsave_size();
}
else
{
return 512;
}
}
void simd_context_load(void *ptr)
{
if (cpuid_has_xsave())
{
asm_xrstor(ptr);
}
else
{
asm_fxrstor(ptr);
}
}
void simd_context_init(void *ptr)
{
memcpy(ptr, simd_initial_context, simd_context_size());
}

View file

@ -0,0 +1,13 @@
#pragma once
#include <stddef.h>
void simd_init(void);
void simd_context_save(void *ptr);
size_t simd_context_size(void);
void simd_context_init(void *ptr);
void simd_context_load(void *ptr);