From 4a2aefc117144a87d5fc515ddcb60c7d13dcaf1e Mon Sep 17 00:00:00 2001 From: Anton Kling Date: Sun, 30 Jun 2024 19:38:31 +0200 Subject: AHCI: Optimization to writes Now it avoids waiting for completion if possible and can instead batch commands. --- kernel/drivers/ahci.c | 65 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 18 deletions(-) (limited to 'kernel/drivers') diff --git a/kernel/drivers/ahci.c b/kernel/drivers/ahci.c index bef11ae..530295d 100644 --- a/kernel/drivers/ahci.c +++ b/kernel/drivers/ahci.c @@ -303,20 +303,33 @@ u8 get_free_command_slot(volatile struct HBA_PORT *port, u8 *err) { return 0; } +void ahci_wait_for_completion(volatile struct HBA_PORT *port) { + u32 num_slots = ((hba->cap >> 8) & 0x1F); + u32 slots = (port->ci); + u32 num_free = 0; + for (; num_free < 10;) { + num_free = 0; + for (u8 i = 0; i < num_slots + 1; i++) { + if (!((slots >> i) & 1)) { + num_free++; + } + } + } +} + // is_write: Determins whether a read or write command will be used. u8 ahci_perform_command(volatile struct HBA_PORT *port, u32 startl, u32 starth, - u32 count, u16 *buffer, u8 is_write) { + u32 count, u16 *buffer, u8 is_write, u8 async) { // TODO: The number of PRDT tables are hardcoded at a seemingly // very low number. It can be up to 65,535. Should it maybe be // changed? assert(count <= num_prdt); port->is = -1; // Clear pending interrupts u8 err; - u32 command_slot = get_free_command_slot(port, &err); - if (err) { - klog(LOG_WARN, "AHCI No command slot found"); - return 0; - } + u32 command_slot; + do { + command_slot = get_free_command_slot(port, &err); + } while (err); struct HBA_CMD_HEADER *cmdheader = (struct HBA_CMD_HEADER *)physical_to_virtual((void *)port->clb); assert(0x20 == sizeof(struct HBA_CMD_HEADER)); @@ -381,6 +394,10 @@ u8 ahci_perform_command(volatile struct HBA_PORT *port, u32 startl, u32 starth, port->ci = 1 << command_slot; // Issue command + if (async) { + return 1; + } + // Wait for completion for (;;) { // In some longer duration reads, it may be helpful to spin on the DPS bit @@ -404,13 +421,13 @@ u8 ahci_perform_command(volatile struct HBA_PORT *port, u32 startl, u32 starth, } u8 ahci_raw_write(volatile struct HBA_PORT *port, u32 startl, u32 starth, - u32 count, u16 *inbuffer) { - return ahci_perform_command(port, startl, starth, count, inbuffer, 1); + u32 count, u16 *inbuffer, u8 async) { + return ahci_perform_command(port, startl, starth, count, inbuffer, 1, async); } u8 ahci_raw_read(volatile struct HBA_PORT *port, u32 startl, u32 starth, - u32 count, u16 *outbuffer) { - return ahci_perform_command(port, startl, starth, count, outbuffer, 0); + u32 count, u16 *outbuffer, u8 async) { + return ahci_perform_command(port, startl, starth, count, outbuffer, 0, async); } int ahci_write(u8 *buffer, u64 offset, u64 len, vfs_fd_t *fd) { @@ -428,13 +445,13 @@ int ahci_write(u8 *buffer, u64 offset, u64 len, vfs_fd_t *fd) { if (offset > 0) { u8 tmp_buffer[512]; - ahci_raw_read(&hba->ports[port], lba, 0, 1, (u16 *)tmp_buffer); + ahci_raw_read(&hba->ports[port], lba, 0, 1, (u16 *)tmp_buffer, 0); int left = 512 - offset; int write = min(left, len); memcpy(tmp_buffer + offset, buffer, write); - ahci_raw_write(&hba->ports[port], lba, 0, 1, (u16 *)tmp_buffer); + ahci_raw_write(&hba->ports[port], lba, 0, 1, (u16 *)tmp_buffer, 0); offset = 0; len -= write; @@ -443,17 +460,28 @@ int ahci_write(u8 *buffer, u64 offset, u64 len, vfs_fd_t *fd) { } for (; sector_count >= num_prdt; lba++) { - ahci_raw_write(&hba->ports[port], lba, 0, num_prdt, (u16 *)buffer); + ahci_raw_write(&hba->ports[port], lba, 0, num_prdt, (u16 *)buffer, 1); buffer += num_prdt * 512; len -= num_prdt * 512; sector_count -= num_prdt; } + if (sector_count >= num_prdt) { + ahci_wait_for_completion(&hba->ports[port]); + } + + if (sector_count > 0 && (0 == len % SECTOR_SIZE)) { + ahci_raw_write(&hba->ports[port], lba, 0, sector_count, (u16 *)buffer, 0); + return rc; + } + if (sector_count > 0 && len > 0) { - u8 tmp_buffer[512 * num_prdt]; - ahci_raw_read(&hba->ports[port], lba, 0, sector_count, (u16 *)tmp_buffer); + u8 tmp_buffer[512 * sector_count]; + ahci_raw_read(&hba->ports[port], lba, 0, sector_count, (u16 *)tmp_buffer, + 0); memcpy(tmp_buffer + offset, buffer, len); - ahci_raw_write(&hba->ports[port], lba, 0, sector_count, (u16 *)tmp_buffer); + ahci_raw_write(&hba->ports[port], lba, 0, sector_count, (u16 *)tmp_buffer, + 0); } return rc; } @@ -472,7 +500,7 @@ int ahci_read(u8 *buffer, u64 offset, u64 len, vfs_fd_t *fd) { } u8 tmp_buffer[512 * num_prdt]; for (; sector_count >= num_prdt; lba++) { - ahci_raw_read(&hba->ports[port], lba, 0, num_prdt, (u16 *)tmp_buffer); + ahci_raw_read(&hba->ports[port], lba, 0, num_prdt, (u16 *)tmp_buffer, 0); memcpy(buffer, tmp_buffer + offset, 512 * num_prdt); offset = 0; buffer += num_prdt * 512; @@ -481,7 +509,8 @@ int ahci_read(u8 *buffer, u64 offset, u64 len, vfs_fd_t *fd) { } if (sector_count > 0) { - ahci_raw_read(&hba->ports[port], lba, 0, sector_count, (u16 *)tmp_buffer); + ahci_raw_read(&hba->ports[port], lba, 0, sector_count, (u16 *)tmp_buffer, + 0); memcpy(buffer, tmp_buffer + offset, len); } return rc; -- cgit v1.2.3