用户态内存分配器

内存的分配和释放在堆(Heap)上完成

void * ptr = malloc (size)
free(ptr)

Stack分配是连续的，因为不需要管理。
Heap是按照size大小分配的，不是连续的。

int* a1 = malloc (1000 *sizeof(int));
int* a2 = malloc (100 *sizeof(int));

int b[1000];
int b2[100];

*(int *)((uint64_t)&b2 - sizeof(int)) = b1[999];//true

malloc 分配完要记得释放，防止内存泄露

具体实现：

malloc : 选择可以放进去的块标记为allocated，返回低地址
free ：将allocted标记去除，并连接未分配的其他空间，释放相邻的byte效率更高
8Byte对齐最后有三位空闲0（是八的倍数），最后一位用来标记allocated，不会占用任何空间

类似地，有为了节省红黑树节点内存空间（字节对齐）的操作：

struct RBTreeNode{
	//结构体内不使用会浪费内存字节对齐的bool color
	RBTreeNode* prev;
	RBTreeNode* left;
	RBTreeNode* right;
	int64 value;
};//32bytes

bool color = p->prev & 0x1;
RBTreeNode* pre = p->prev & 0xFFFFFFFE

这里预留4B来记录size，free时allocated消去，size不变
块分配示例1：

int* a = malloc (sizeof(int));
*a = 0xabcd;//(0x0000abcd)

size = 8 = 0x1;//分配的实际块大小8byte，注意字节对齐

0xd 0xc 0xb 0xa
    
0000 0000
0000 0000
1101 1100
1011 1010
0000 0000
0000 0000
0000 0000
0000 1001
//size: 0x1
//allocated : 1
    
4B / 8B = 50%

块分配示例1：

char* str = malloc(6);
str = "hello\0"

size = 4B + 4B + 2B + 6B = 16B = 0x10;

0000 0000 
0000 0000
0000 0000
0000 0000
0000 0000
0000 0000
'\0'
'o'
'l'
'l'
'e'
'h'
0000 0000
0000 0000
0000 0000
0001 0001

这样的话，Heap借助size的偏移就形成了一个单向链表

void* p = malloc(s);

uint64_t header_addr = (uint64_t)p - 4;
uint32_t header_value = *((uint32_t*)header_addr);//取后32位

uint32_t block_size = header_value & 0xFFFFFFFE;
int allocated = header_value & 0x1;

uint64_t next_header_addr = header_addr + block_size;

malloc 和 free 实现：

void malloc(uint32_t size) {
	
  uint64_t p = head_start_address;

  while (1) {
    
    uint32_t p_header_value = *((uint32_t*)p);

    uint32_t p_block_size = header_value & 0xFFFFFFFE;
    uint32_t p_allocated = header_value & 0x1;
    
    if(p_allocated == 0 &&
       p_block_size - sizeof(uint32_t) >= size)
       {
       	//split
       	uint32_t request_block_size = size + sizeof(uint32_t);
       	
       	if(request_block_size & 0x7 != 0){
       		//data alignment
       		request_block_size = request_block_size & 0xFFFFFFF8 + 0x8;	
       	}	
       	
       	p = request_block_size | 0x1;//size && allocated
       	
       	uint64_t q = p + request_block_size;//next_header_addr
       	
       	q = p_block_size - request_block_size;
       	
       	return(void*)(p + sizeof(uint32_t));//go to payload
       }
    else 
    {
    	//go to next_head_address
    	p = p + p_block_size;
    }
  }
  
}

void free(void* p) {
	
	uint64_t payload_addr = (uint64_t)p;
	
	uint64_t header_addr = p-4;
	
	uint32_t allocated = header_addr & 0x1;
	
	if(allocated == 0){
		printf("free twice");
		return;
	}
	
	uint32_t block_size = header_addr & 0xFFFFFFFE;
	
	uint64_t q = header_addr + block_size;
	
	uint32_t q_allocated = get_allocation(q);
	
	uint32_t q_block_size = get_block_size(q);
	
	if(q_allocated == 1){
		set_free(q);
		return;
	}
	
	else{
		//merge q && p
		block_size = block_size + q_block_size;
		header_addr = block_size | 0x0;
		
		p = header_addr + sizeof(uint32_t);
	}
	
}

这只是一个单向链表，在free时无法和前面合并，利用空间不完全。

为了解决这个问题：要在block加一个footer的结构，和header一样，包含了size和allocated

隐式空闲链表 implicit free list

基于地址实现的implicit list实现如下（不完全实现）：

//
// Created by 29400 on 2024/5/29.
//

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

void heap_init();
uint64_t mem_alloc(uint32_t size);
void mem_free(uint64_t vaddr);
void syscall_brk(uint64_t) {

}

uint64_t heap_start_vaddr;
uint64_t heap_end_vaddr;

#define HEAP_MAX_SIZE (4096 * 8)
uint8_t heap[HEAP_MAX_SIZE];

typedef struct {

}block_t;

/** Round up to the next mutiple of n.
 *  if (x== k * n)
 *  return x.
 *  else, x = k * n + m and m < n
 *  return x = (k + 1) * n.
 */
static uint64_t round_up(uint64_t x, uint64_t n) {
    return n * ((x + n - 1) / n);
}


// applicantable for both header && footer
static uint32_t get_blocksize(uint64_t vaddr) {

    if(vaddr==0) {
        // NULL can be considered as size 0 
        return 0;
    }
    
    assert(heap_start_vaddr <= vaddr
        && vaddr <= heap_end_vaddr - 4);
    assert((vaddr & 0x3) == 0x0);  // 4 bytes alignment

    uint32_t value = *(uint32_t*)&heap[vaddr];
    return value & 0xFFFFFFF8;
}

// applicantable for both header && footer
static uint32_t get_allocated(uint64_t vaddr) {

    if(vaddr==0) {
        // NULL can be considered as allocated
        return 1;
    }
    
    assert(heap_start_vaddr <= vaddr
        && vaddr <= heap_end_vaddr);
    assert((vaddr & 0x3) == 0x0);  // 4 bytes alignment

    uint32_t value = *(uint32_t*)&heap[vaddr];
    return value & 0x1;
}

static void set_blocksize(uint64_t header_vaddr, uint32_t blocksize) {

    if(header_vaddr==0) {
        return;
    }
    
    assert(heap_start_vaddr <= header_vaddr
        && header_vaddr <= heap_end_vaddr);
    assert((header_vaddr & 0x3) == 0x0);  // 4 bytes alignment
    assert((blocksize & 0x7) == 0x0);  // 8 bytes alignment

    *(uint32_t*)&heap[header_vaddr] &= 0x00000007;
    *(uint32_t*)&heap[header_vaddr] |= blocksize;
}

static void set_allocated(uint64_t header_vaddr, uint32_t allocated) {

    if(header_vaddr==0) {
        return;
    }
    
    assert(heap_start_vaddr <= header_vaddr
        && header_vaddr <= heap_end_vaddr);
    assert((header_vaddr & 0x3) == 0x0);  // 4 bytes alignment

    *(uint32_t*)&heap[header_vaddr] &= 0x00000007;
    *(uint32_t*)&heap[header_vaddr] |= (allocated & 0x1);
}

/**
 * @param vaddr can be :
 *          1. starting address of the block (8 * n + 4)
 *          2. starting address of the payload (8 * m)
 */
static uint64_t get_payload_addr(uint64_t vaddr) {
    return round_up(vaddr,8);
}

/**
 * @param vaddr can be :
 *          1. starting address of the block (8 * n + 4)
 *          2. starting address of the payload (8 * m)
 */
static uint64_t get_header_addr(uint64_t vaddr) {

    uint64_t payload_vaddr = get_payload_addr(vaddr);

    // NULL block does not have header
    return payload_vaddr == 0 ? 0 : payload_vaddr - 4;
}

/**
 * @param vaddr can be :
 *          1. starting address of the block (8 * n + 4)
 *          2. starting address of the payload (8 * m)
 */
static uint64_t get_footer_addr(uint64_t vaddr) {

    uint64_t next_header = get_header_addr(vaddr);

    // last block does not have footer
    return next_header == 0 ? 0 : next_header - 4;
}

static int is_lastblock(uint64_t vaddr) {
    assert(heap_start_vaddr <= vaddr
        && vaddr <= heap_end_vaddr);
    assert((vaddr & 0x3) == 0x0);  // 4 bytes alignment

    uint64_t header_vaddr = get_header_addr(vaddr);
    uint64_t block_size = get_blocksize(heap_end_vaddr);

    if(header_vaddr + block_size == heap_end_vaddr + 1 + 4) {
        return 1;
    }

    return 0;

}

/**
 * @param vaddr can be :
 *          1. starting address of the block (8 * n + 4)
 *          2. starting address of the payload (8 * m)
 */
static uint64_t get_nextheader (uint64_t vaddr) {

    if(vaddr==0||is_lastblock(vaddr)) {
        return 0;
    }

    uint64_t header_vaddr = get_header_addr(vaddr);
    uint32_t block_size = get_blocksize(header_vaddr);

    uint64_t next_header_vaddr = header_vaddr + block_size;

    assert(0 <= next_header_vaddr &&
            next_header_vaddr <= heap_end_vaddr);

    return next_header_vaddr;
}

/**
 * @param vaddr can be :
 *          1. starting address of the block (8 * n + 4)
 *          2. starting address of the payload (8 * m)
 */
static uint64_t get_prevheader (uint64_t vaddr) {

    if(vaddr == 0) {
        return 0;
    }
    
    uint64_t header_vaddr = get_header_addr(vaddr);

    if(header_vaddr==heap_start_vaddr) {
        return 0;
    }
    //assert(header_vaddr >= 20);//.//

    uint32_t prev_footer_vaddr = header_vaddr - 4;
    uint32_t block_size = get_blocksize(prev_footer_vaddr);
    uint32_t prev_header_vaddr = header_vaddr - block_size;
    assert(0 <= prev_header_vaddr &&
            prev_header_vaddr <= heap_end_vaddr - 12);
    return prev_header_vaddr;
}

/** rules :
 *      1. block[0] ==> A/F.
 *      2. block[last] ==> A/F.
 *      3. block[i] == A ==> block[i-1] == A/F && block[i+1] == A/F.
 *      4. block[i] == F ==> block[i-1] == A && block[i+1] == A.
 *
 *  these 4 rules ensure that
 *  adjancent free blocks are always merged together
 *  henceforth external fragmentation are minimized.
 */
int heap_check() {

    return 0;
}

void heap_init() {

    // heap_start_vaddr is the starting address of the first block
    // the payload of the first block is 8B aligned ([8])
    // so the header address of the first block is [8] - 4 = [4]
    heap_start_vaddr = 4;

    set_blocksize(heap_start_vaddr,4096 - 8);
    set_allocated(heap_start_vaddr,0);

    // the last block is without footer
    heap_end_vaddr = 4096 - 1;
}

/**
 * @param block_vaddr the virtual address of header
 * @param requested_blocksize the whole size of block
 * @return the virtual address of allocated payload, when no enough free block for current heap, return 0
 */
static uint64_t try_alloc(uint64_t block_vaddr, uint32_t requested_blocksize) {

    uint64_t b = block_vaddr;

    uint32_t b_blocksize = get_blocksize(b);
    uint32_t b_allocated = get_allocated(b);

    if(b_allocated == 0 && b_blocksize >= requested_blocksize) {
        //allocated this block
        if(b_blocksize > requested_blocksize) {
            //split the block 'b'
            set_allocated(b, 1);
            set_blocksize(b, requested_blocksize);

            //set the left splited block
            /**
             * in the extreme situation, next block size == 8
             * which makes the whole next block to be :
             * [0x0000'0008, 0x0000'0008]
             */
            uint64_t next_header_vaddr = b + requested_blocksize;
            set_allocated(next_header_vaddr, 0);
            set_blocksize(next_header_vaddr, b_blocksize - requested_blocksize);

            return get_payload_addr(b);
        }
        else {
            set_blocksize(b, requested_blocksize);
            set_allocated(b, 1);
            return get_payload_addr(b);
        }
    }
    return 0;
}

/**
 * @param size requested payload size
 * @return the virtual address of payload
 */
uint64_t mem_alloc(uint32_t size){

    assert(0 < size && size < 4096 - 8);
    uint64_t payload_vaddr = 0;

    uint32_t requested_blocksize = round_up(size,8) + 8;

    uint64_t last_block = 0;
    uint64_t b = heap_start_vaddr;

    while(b <= heap_end_vaddr) {

        payload_vaddr = try_alloc(b,requested_blocksize);

        if(payload_vaddr != 0) {
            return payload_vaddr;
        }
        else {
            // go to next block
            if(is_lastblock(b)) {
                last_block = b;
            }

            b = get_nextheader(b);
        }
    }

    // when no enough free block for current heap
    // request a new free physical & virtual page from OS
    if(heap_end_vaddr + 1 + 4096 <= HEAP_MAX_SIZE) {

        // we can allocated page for the request
        uint64_t old_heap_end = heap_end_vaddr;

        // brk system call
        syscall_brk(heap_end_vaddr + 1);
        heap_end_vaddr += 4096;

        uint32_t last_allocated =  get_allocated(last_block);
        uint32_t last_blocksize =  get_blocksize(last_block);

        if(get_allocated(last_block)==1) {

            // add footer for the last block
            // set_allocated(old_heap_end)
            set_allocated(old_heap_end, 1);
            set_blocksize(old_heap_end, last_blocksize);

            set_allocated(heap_end_vaddr + 4, 0);
            set_blocksize(heap_end_vaddr + 4, 4096);

            last_block = old_heap_end + 4;
        }
        else {
            set_allocated(last_block,last_blocksize + 4096);
        }
        // try to allocated
        payload_vaddr = try_alloc(last_block,requested_blocksize);

        if(payload_vaddr != 0) {
            return payload_vaddr;
        }

    }

    // <==> return NULL;
    return 0;
}

void mem_free(uint64_t payload_vaddr) {
    assert(heap_start_vaddr <= payload_vaddr
        && payload_vaddr <= heap_end_vaddr);
    assert((payload_vaddr & 0x7) == 0x0);

    uint64_t req = get_header_addr(payload_vaddr);
    uint64_t req_footer = get_footer_addr(req);//.0.//
    
    uint32_t req_allocated = get_allocated(req);
    uint32_t req_blocksize = get_blocksize(req);
    assert(req_allocated==1);

    // block starting address of next && prev
    uint64_t next = get_nextheader(req);// last . 0
    uint64_t next_footer = get_footer_addr(next);

    uint64_t prev = get_prevheader(req);// first . 0

    uint32_t next_allocated = get_allocated(next);// last . 1
    uint32_t prev_allocated = get_allocated(prev);// first . 1

    uint32_t next_blocksize = get_blocksize(next);// last . 0
    uint32_t prev_blocksize = get_blocksize(prev);// first . 0 

    if(next_allocated == 1 && prev_allocated == 1) {
        set_allocated(req,0);
        set_allocated(req_footer,0);
    }
    else if(next_allocated == 0 && prev_allocated == 1) {
        set_allocated(req, 0);
        set_blocksize(req, req_blocksize + next_blocksize);
        
        set_allocated(next_footer,0);
        set_blocksize(next_footer,req_blocksize + next_blocksize);
        
    }
    else if(next_allocated == 1 && prev_allocated ==0) {
        set_allocated(prev, 0 );
        set_blocksize(prev, req_blocksize + prev_blocksize);

        set_allocated(req_footer, 0);
        set_blocksize(req_footer,req_blocksize + prev_blocksize);
    }
    else {
        set_allocated(prev, 0 );
        set_blocksize(prev, req_blocksize + prev_blocksize + next_blocksize);

        set_allocated(next_footer, 0);
        set_blocksize(next_footer, req_blocksize + prev_blocksize + next_blocksize); 
    }
}

显式空闲链表 explicit free list

可以做到 $T{search}(Explicit)\le\frac12T{search}(Implicit)$ ，（交错分布，糖水不等式）