Memory๐คฏ
Tlb
pyhsical address = (((satp[VPN[2]] >> 10)[VPN[1]] >> 10)[VPN[0]] << 2 )& 0xfffffffffff000) | virtual address[11:0]
VPN[2] = virtual address[38:30]
VPN[1] = virtual address[29:21]
VPN[0] = virtual address[20:12]
Brk
brk
ๅsbrk
ๆฏๅ
ๆ ธๆณ็จๆทๆไพ็ไธคไธช็จไบ็ณ่ฏทๅ ็ฉบ้ด็็ณป็ป่ฐ็จ๏ผไฝไธ่ฌไฝฟ็จๅฐ่ฃ
ๅฅฝ็malloc
ๆฅๅฃ
#include <unistd.h>
int brk(void *addr);
void *sbrk(intptr_t increment)
Mmap
Partition
segmentation
ๅpaging
้ฝๆฏ่ฟไธๆฆๅฟต็ๅฎ็ฐ๏ผๅจx86_64
ไธญไฝฟ็จ็ๆฏsegmentation
๏ผ่ๅจRisc v
ไธญไฝฟ็จ็๏ผๅนถไธๅซๆไธๆ็กฌไปถ็๏ผๆฏpaging
MMU
๏ผ va -> pa
ๅฏนไบMemory Allocation Strategies
ๆฅ่ฏด๏ผๅไธบ๏ผ
-
Fixed partitions
ๅ ทๆinternal partition
-
Variable paritions
ๅ ทๆexternal partition
-
first fit
็ณ่ฏท็ฌฌไธไธช่ถณๅคๅคง็
block
-
best fit
็ณ่ฏทๆๅฐ็ๅฏไปฅๆฟ่ฝฝ่ฟ็จ็
block
-
worst fit
ไปๆๅคง็
hole
ไธญ็ณ่ฏท
-
Segmentation
ๆฏไธไธชSegmentation
ไธญๅ
ๅซๅพๅคsection
๏ผ็ธๅๆ้็section
ๅฏไปฅๆพๅฐไธไธชsegment
ไธญ
ๅจๅ ทไฝ็ๅฎ็ฐไธญ๏ผ้็จไปฅไธ็็ปๆๆฅๅฎ็ฐ(่ฏฅ่กจๅญๅจไบๅ ๅญไธญ)๏ผ
ๆฏไธ็งvariable partition
Paging
ๆ้่ฆ็ๆฆๅฟต๏ผๅฐฑๆฏๅ
ถไธญ็ปดๆคๆ ๅฐ็page table
็ปๆ
ๅจtable
ไธญ๏ผๅญๅจ็ๅผไธบframe number
๏ผ่page number
้ๅซๅจindex
ไธญ๏ผ็ไธไธๅ็็ฉบ้ด
ไฝฟ็จๆๅ้กต่กจbase
็ๅฏๅญๅจ๏ผ่ฏฅ่กจๅญๅจๅจๅ
ๅญไธญ
TLB(translation look-aside buffer)
:
if (the page number in the TLB)
hit;
else
miss;
walk page table;
ๆฏๆฌกๅจๅๆขprocess
ๆถ๏ผ้่ฆๆธ
้ค่ฏฅcache
๏ผๅธฆๆฅๆง่ฝ็ๆ่
็ฎๅ็TLB
ๅฎ็ฐ๏ผ
Associative memeory
:ๆฏๆๅนถ่ก็ๆฏ่พ
โโโโโโโโโโโโโโโโ
โ โ โโโโโฌโโโโโโโ
โ CPU โโ>โP โ D โ
โ โ โโฌโฌโโดโโโโโโโ
โโโโโโโโโโโโโโโโ โโโโ>โโโโโโฌโโโโโโ
โโโโ>โ โ โ
โโโโ>โ โ โ
โโโผโ>โ โ โโโโโ โโโโโโโโ
โ โโ>โ โ โ โ โ โ
โ โโ>โ โ โ v โ โ
โ โโ>โโโโโโดโโโโโโ โโโโโฌโโโ โ โ
โ โ F โD โโโ>โ โ
โ โโโโโดโโโ โ โ
โ โโโโโโ ^ โโโโโโโโ
โโโโโโโโโโโ>โ โโโ
โ โ
โ โ
โ โ
โ โ
โโโโโโ
ๅจๅคๆ ธCPU
ไธญ๏ผๆฏไธช่ฟ็จ็ๆๆ็บฟ็จๅ
ฑไบซไธไธชpage table
ๅฏนไบ็ธๅ่ฟ็จ๏ผtext
ๅจๅ
ๅญไธญๆฏๅไธไธชframe
๏ผ่ๆฐๆฎๆฎต๏ผๅๅๅซๅจไธๅ็frame
ไธญ๏ผๅช้่ฆๆ ๅฐๅฐไธๅ็่ๆๅฐๅไธญๅณๅฏ
ๅๅฑ้กต่กจๆถ๏ผๆฏไธช้กต่กจ็entry
ๆฐ้ๆฏ1M
๏ผ$2^{20}$
ไธ่ฌๆฏไธชentry
็้ฟๅบฆๆฏ4 byte
ๆญคๆถ๏ผๆฏไธช้กต่กจ็ๅคงๅฐไธบ4M
-
hierachical page table
32ไฝๆถ๏ผๅฆๆๆไธค็บง้กต่กจ๏ผๅชไฝฟ็จไธคไธช
entry
ๆถ๏ผๅช้่ฆไธไธช้กต่กจ๏ผ่ๆนๅจๅ้่ฆ1K
ไธช -
hashed page table
-
inverted page table
็ฑปfixed partition
ๅจ่ฟ่กๅ
ๅญๅ้
ๆถ๏ผ้็จ็ๆฏlazy allocation
็ๆนๅผ๏ผไฝฟ็จbrk
ๆฅๅฏนheap
่ฟ่กๅข้ฟ๏ผๅนถๆฒกๆๅฎ้
่ฟ่กๅ้
๏ผๅฏไปฅ็ๅปๅ้
ๆถๅ้ๆพๆถ็ๆไฝ
kernel
ๆฒกๆlazy allocation
๏ผๅ ไธบไธ่ฝๅค็page fault
mm_struct
struct mm_struct {
struct {
struct maple_tree mm_mt;
#ifdef CONFIG_MMU
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/* Base addresses for compatible mmap() */
unsigned long mmap_compat_base;
unsigned long mmap_compat_legacy_base;
#endif
unsigned long task_size; /* size of task vm space */
pgd_t * pgd;
#ifdef CONFIG_MEMBARRIER
/**
* @membarrier_state: Flags controlling membarrier behavior.
*
* This field is close to @pgd to hopefully fit in the same
* cache-line, which needs to be touched by switch_mm().
*/
atomic_t membarrier_state;
#endif
/**
* @mm_users: The number of users including userspace.
*
* Use mmget()/mmget_not_zero()/mmput() to modify. When this
* drops to 0 (i.e. when the task exits and there are no other
* temporary reference holders), we also release a reference on
* @mm_count (which may then free the &struct mm_struct if
* @mm_count also drops to 0).
*/
atomic_t mm_users;
/**
* @mm_count: The number of references to &struct mm_struct
* (@mm_users count as 1).
*
* Use mmgrab()/mmdrop() to modify. When this drops to 0, the
* &struct mm_struct is freed.
*/
atomic_t mm_count;
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* PTE page table pages */
#endif
int map_count; /* number of VMAs */
spinlock_t page_table_lock; /* Protects page tables and some
* counters
*/
/*
* With some kernel config, the current mmap_lock's offset
* inside 'mm_struct' is at 0x120, which is very optimal, as
* its two hot fields 'count' and 'owner' sit in 2 different
* cachelines, and when mmap_lock is highly contended, both
* of the 2 fields will be accessed frequently, current layout
* will help to reduce cache bouncing.
*
* So please be careful with adding new fields before
* mmap_lock, which can easily push the 2 fields into one
* cacheline.
*/
struct rw_semaphore mmap_lock;
struct list_head mmlist; /* List of maybe swapped mm's. These
* are globally strung together off
* init_mm.mmlist, and are protected
* by mmlist_lock
*/
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
atomic64_t pinned_vm; /* Refcount permanently increased */
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
unsigned long def_flags;
/**
* @write_protect_seq: Locked when any thread is write
* protecting pages mapped by this mm to enforce a later COW,
* for instance during page table copying for fork().
*/
seqcount_t write_protect_seq;
spinlock_t arg_lock; /* protect the below fields */
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
/*
* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
struct mm_rss_stat rss_stat;
struct linux_binfmt *binfmt;
/* Architecture-specific MM context */
mm_context_t context;
unsigned long flags; /* Must use atomic bitops to access */
#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table;
#endif
#ifdef CONFIG_MEMCG
/*
* "owner" points to a task that is regarded as the canonical
* user/owner of this mm. All of the following must be true in
* order for it to be changed:
*
* current == mm->owner
* current->mm != mm
* new_owner->mm == mm
* new_owner->alloc_lock is held
*/
struct task_struct __rcu *owner;
#endif
struct user_namespace *user_ns;
/* store ref to file /proc/<pid>/exe symlink points to */
struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_subscriptions *notifier_subscriptions;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_NUMA_BALANCING
/*
* numa_next_scan is the next time that PTEs will be remapped
* PROT_NONE to trigger NUMA hinting faults; such faults gather
* statistics and migrate pages to new nodes if necessary.
*/
unsigned long numa_next_scan;
/* Restart point for scanning and remapping PTEs. */
unsigned long numa_scan_offset;
/* numa_scan_seq prevents two threads remapping PTEs. */
int numa_scan_seq;
#endif
/*
* An operation with batched TLB flushing is going on. Anything
* that can move process memory needs to flush the TLB when
* moving a PROT_NONE mapped page.
*/
atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/* See flush_tlb_batched_pending() */
atomic_t tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
#ifdef CONFIG_PREEMPT_RT
struct rcu_head delayed_drop;
#endif
#ifdef CONFIG_HUGETLB_PAGE
atomic_long_t hugetlb_usage;
#endif
struct work_struct async_put_work;
#ifdef CONFIG_IOMMU_SVA
u32 pasid;
#endif
#ifdef CONFIG_KSM
/*
* Represent how many pages of this process are involved in KSM
* merging.
*/
unsigned long ksm_merging_pages;
/*
* Represent how many pages are checked for ksm merging
* including merged and not merged.
*/
unsigned long ksm_rmap_items;
#endif
#ifdef CONFIG_LRU_GEN
struct {
/* this mm_struct is on lru_gen_mm_list */
struct list_head list;
/*
* Set when switching to this mm_struct, as a hint of
* whether it has been used since the last time per-node
* page table walkers cleared the corresponding bits.
*/
unsigned long bitmap;
#ifdef CONFIG_MEMCG
/* points to the memcg of "owner" above */
struct mem_cgroup *memcg;
#endif
} lru_gen;
#endif /* CONFIG_LRU_GEN */
} __randomize_layout;
/*
* The mm_cpumask needs to be at the end of mm_struct, because it
* is dynamically sized based on nr_cpu_ids.
*/
unsigned long cpu_bitmap[];
};
vma
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */
struct mm_struct *vm_mm; /* The address space we belong to. */
/*
* Access permissions of this VMA.
* See vmf_insert_mixed_prot() for discussion.
*/
pgprot_t vm_page_prot;
unsigned long vm_flags; /* Flags, see mm.h. */
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*
* For private anonymous mappings, a pointer to a null terminated string
* containing the name given to the vma, or NULL if unnamed.
*/
union {
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
/*
* Serialized by mmap_sem. Never use directly because it is
* valid only when vm_file is NULL. Use anon_vma_name instead.
*/
struct anon_vma_name *anon_name;
};
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain; /* Serialized by mmap_lock &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */
const struct vm_operations_struct *vm_ops;
/* Information about our backing store: */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
units */
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info;
#endif
#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;
ๅจ็จๆทๆ่ฟ็จๆง่ก็ๆถๅ๏ผไธๅฏ้ฟๅ
ๅฐไผๅ็page fault
๏ผๅจ่ฟไบๅผๅธธไธญ๏ผไธๆญๆ ๅฐpage
๏ผๆฅไฟ่ฏ็จๅบ็่ฟ่ก
ๅจ่ฟ่กไปฃ็ ๆถ๏ผๆข่ฆๅ้
ๆฐ็frame
๏ผไน่ฆๅฏนๅ
ๅญ่ฟ่กๆ ๅฐ
**Demand Paging **:ๅฝไธไธชpage
้่ฆ่ขซไฝฟ็จๅฐ็ๆถๅ๏ผๆไผๅธฆๅ
ฅๅฐๅญๅจไธญ
ไธ่ฌๅจ่ฎฟ้ฎ่ๆๅฐๅ(walk page table
)ๆถ๏ผไผๅบ็ฐไธค็ง้่ฏฏ:
valid
ไฝๆฏไธๅจๅ ๅญไธญ๏ผbring it to memory
- ้กต่กจ้กน็
valid
ไฝไธบ0๏ผabort the operation
ๆไฝ็ณป็ป็ธๅฝไบไธไธชๅๅฐ่ฟ็จ๏ผๅจๅบ็ฐ็ณป็ป่ฐ็จๆๆฏๅผๅธธ็ๆถๅ๏ผๆไผไปๅ ฅ
ๅจๆฒกๆๆไปถ่ฏปๅๆถ๏ผไธ้่ฆ่ฟ่ก็กฌ็็่ฏปๅ๏ผๅช้่ฆๅๅบfree frame
ๅณๅฏ
Trap to operation system
=> Save the user regs and state
=> determine the interrupt as a page fault
=> check the page legal and determine the location
=> issue read from disk to free frame
=> waiting allocate the CPU
=> receive an interrupt from disk
=> save regs and state
=> determine the interrupt from disk
=> correct the page table and other tables to show page is now in memory
=> wait for the CPU to be allocated to this process again
=> restore the user registers tate and the new page table, then resume the interrupted instruction
Overview:
partition directly on physical memory (segmentation and paging)
=> swapping
=> MMU
TLB
Virtual memory: ้็ฆป้ป่พๅฐๅๅ็ฉ็ๅฐๅ
page fault
ๆฏdemanding paging
็ๅฎ็ฐๆนๅผ
MMU:
if (page_table[v_addr][valid] == v){
fetch_the_memory();
}
else if (page_table[v_addr][valid] == i){
raise (page_fault);
}
kernel
ไธ้็จlazy allocation
๏ผๆไปฅไธไผๅ็page fault
ๅจๅฎ้
็linux
ไธญ๏ผๅฏนไบvma
็็ฎก็ไฝฟ็จ็ๆฐๆฎ็ปๆๆฏ็บข้ปๆ ๏ผ่ๅฏนไบๅจๆ ไธญ็ๆฅๆพๆไฝ๏ผๆฏ็ฑๆไฝ็ณป็ปๆฅๅฎๆ๏ผๆไปฅๆฏไธไธชuser space process
๏ผ้ฝๆไธไธช็บข้ปๆ
ๅฏนไบ๏ผๆๆ็ซฏ็ๆ
ๅต๏ผๅจๅๅๆง่กไธไธช็บฟ็จๆถ๏ผไป
ไป
ๅจๅ
ๅญไธญๆท่ดไบpcb
๏ผๅ
ถๅฎ็ๅ
ๅฎนๅนถๆฒกๆ่ฝฝๅ
ฅๅฐๅ
ๅญไธญ๏ผไนๆฒกๆ็ธๅบ็ๆ ๅฐ
่ๅจๅ็page fault
ๆถ๏ผ็ปดๆคfree frame
ๆถ๏ผ้่ฆไธไธช้พ่กจ
ๅฝไธไธชpage
ๆถanonymous
็๏ผๅ้่ฆๅๅ
ฅswap area
๏ผๅฆๅๅๆฐธไน
ๅคฑๅป่ฏฅ้กต
ๅฝ้่ฆไฝฟ็จswap
ๆถ๏ผ้่ฆๆ่กกไธๅ็็ฎๆณ๏ผ
-
FIFO
Beladyโs Anomaly:ๅจๅ ๅญๅขๅ ๆฏ๏ผๅบ็ฐ
page fault
็ๆฌกๆฐๅ่ๅขๅ -
optimal
ๆฟๆขๆๆไน ไธ่ขซ็จ่ฟ็
frame
-
LRU
ๆฟๆขๆๆไน ่ขซ็จ็
frame
-
LFU
ๆฟๆขๆๆๅฐ่ขซ็จๅฐ็
frame
-
MFU
ๆฟๆขๆๆๅค่ขซ็จๅฐ็
frame
ไพ:
#visiting reference:
#assume 3 frames
7,0,1,2,0,3,0,4,2,3,0,3,0,3,2,1,2,0,1,7,0,1
FIFO
โโโ โโโ โโโ โ*โ โโโ โโโ โโโ โ*โ โโโ โโโ โ*โ โโโ โโโ โ*โ โโโ โโโ
โ7โ โ7โ โ7โ โ2โ โ2โ โ2โ โ2โ โ4โ โ4โ โ4โ โ0โ โ0โ โ0โ โ7โ โ7โ โ7โ
โโโค โโโค โโโค โโโค โโโค โ*โค โโโค โโโค โ*โค โโโค โโโค โ*โค โโโค โโโค โ*โค โโโค
โ โ โ0โ โ0โ โ0โ โ0โ โ3โ โ3โ โ3โ โ2โ โ2โ โ2โ โ1โ โ1โ โ1โ โ0โ โ0โ
โโโค โโโค โโโค โโโค โโโค โโโค โ*โค โโโค โโโค โ*โค โโโค โโโค โ*โค โโโค โโโค โ*โค
โ โ โ โ โ1โ โ1โ โ1โ โ1โ โ0โ โ0โ โ0โ โ3โ โ3โ โ3โ โ2โ โ2โ โ2โ โ1โ
โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ
15 page fault
Optimal
Best for theorem
โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ
โ7โ โ7โ โ7โ โ0โ โ0โ โ2โ โ2โ โ2โ โ0โ
โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค
โ โ โ0โ โ0โ โ1โ โ2โ โ3โ โ3โ โ0โ โ1โ
โโโค โโโค โโโค โ*โค โ*โค โ*โค โ*โค โ*โค โ*โค
โ โ โ โ โ1โ โ2โ โ3โ โ4โ โ0โ โ1โ โ7โ
โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ
9 page fault
LRU
For the actual implementation:
- counter based
uint8 lru_bits = 0;
while(1){
if (clk == 100ms){
lru = lru >> 1;
if (page used) lru |= 0x80;
}
}
- stack based
# so 11000100 is newer than 01110111
# When MMU access the page table, it changes the 8bits the same time.
โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ
โ7โ โ7โ โ7โ โ0โ โ1โ โ0โ โ2โ โ3โ โ0โ โ4โ โ2โ โ2โ โ2โ โ2โ โ0โ โ3โ โ2โ โ0โ โ1โ โ7โ
โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค โโโค
โ โ โ0โ โ0โ โ1โ โ0โ โ2โ โ3โ โ0โ โ4โ โ2โ โ3โ โ0โ โ3โ โ0โ โ3โ โ2โ โ0โ โ1โ โ7โ โ0โ
โโโค โโโค โโโค โ*โค โโโค โ*โค โโโค โ*โค โ*โค โ*โค โ*โค โโโค โโโค โโโค โ*โค โโโค โ*โค โ*โค โโโค โโโค
โ โ โ โ โ1โ โ2โ โ2โ โ3โ โ0โ โ4โ โ2โ โ3โ โ0โ โ3โ โ0โ โ3โ โ2โ โ0โ โ1โ โ7โ โ0โ โ1โ
โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ โโโ
12 page fault
clock Page-replacement Algorithm
# give the page second chance:
for page in pages:
if reference_bit == 0:
replace
eles if referece_bit == 1:
reference_bit = 0
#enhanced version:
#(0,0) not read or write
#(0,1) not read but write
#(1,0) read but not write
#(1,1) read and write
ๅจๅ็page fault
ๆถ๏ผ่ฟ่ก็context switch
่ฟ็จๅฆไธ๏ผ
process A ---\
^ \
| \--- page fault ---\
| \
| \---> do_fault(A) ---> read_disk() ---\
| ^ | | \
| | | A in wait queue \--- switch_to(B) ---\
| | | | \
|---------------------------------------|---| | \---> process B
| done ^ |
| | | |
| A in ready queue clock interrupt |
| |-------------------------------------------| |
|---- context switec_to (A) ------------------------------------------|
ๅจๅ็page fault
ๆถ๏ผไผๅ็ๅทจๅคง็ๅผ้๏ผ็ฎๅๆต่ฏๅฆไธ๏ผ
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
char *buf ;
int main(){
buf = (char*)malloc(sizeof(char) * 0x400000);
clock_t st,en;
st = time(NULL);
for (int o = 0;o < 1000;o++)
for (int i = 0;i < 0x400000;i++) buf[i] = 2;
en = time(NULL);
printf("time by row is :%lf\n",((double)(en - st)) / CLOCKS_PER_SEC);
st = time(NULL);
for (int o = 0;o < 1000;o++)
for (int i = 0;i < 0x1000;i++)
for (int j = 0;j < 0x400; j++) buf[j * 0x1000 + i] = 1;
en = time(NULL);
printf("time by line is :%lf\n",((double)(en - st)) / CLOCKS_PER_SEC);
}
่ฟ่ก็ปๆ๏ผ
$.\a.exe
time by row is :0.006000
time by line is :0.059000
ไธไพไนๆฏไธไธชThrashing
็็ฎๅไพๅญ๏ผๅฝ้ข็นๅ็page fault
ๅฏผ่ด็page replacement
ๆถ๏ผๅฐฑไผๅบ็ฐ่ฟ็งๆ
ๅต
WSS
: Working Sets
PFF
:Page-fault frequency rate
- ๅฝ
rate
ๅคชไฝๆถ๏ผไผๅ ็จๅคชๅค็frame
๏ผไป่้ๆพไธไบ - ๅฝ
rate
ๅคช้ซๆถ๏ผไผ่ทๅframe
linux
ไธญๅค็page fault
(riscv
):
asmlinkage void do_page_fault(struct pt_regs *regs)
{
struct task_struct *tsk;
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long addr, cause;
unsigned int flags = FAULT_FLAG_DEFAULT;
int code = SEGV_MAPERR;
vm_fault_t fault;
cause = regs->cause;
addr = regs->badaddr;
tsk = current;
mm = tsk->mm;
/*
* Fault-in kernel-space virtual memory on-demand.
* The 'reference' page table is init_mm.pgd.
*
* NOTE! We MUST NOT take any locks for this case. We may
* be in an interrupt or a critical region, and should
* only copy the information from the master page table,
* nothing more.
*/
if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
vmalloc_fault(regs, code, addr);
return;
}
/* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE))
local_irq_enable();
/*
* If we're in an interrupt, have no user context, or are running
* in an atomic region, then we must not take the fault.
*/
if (unlikely(faulthandler_disabled() || !mm)) {
no_context(regs, addr);
return;
}
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (cause == EXC_STORE_PAGE_FAULT)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
retry:
mmap_read_lock(mm);
vma = find_vma(mm, addr);
if (unlikely(!vma)) {
bad_area(regs, mm, code, addr); q
return;
}
if (likely(vma->vm_start <= addr))
goto good_area;
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
bad_area(regs, mm, code, addr);
return;
}
if (unlikely(expand_stack(vma, addr))) {
bad_area(regs, mm, code, addr);
return;
}
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it.
*/
good_area:
code = SEGV_ACCERR;
if (unlikely(access_error(cause, vma))) {
bad_area(regs, mm, code, addr);
return;
}
/*
* If for any reason at all we could not handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
fault = handle_mm_fault(vma, addr, flags, regs);
/*
* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
if (fault_signal_pending(fault, regs))
return;
if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
flags |= FAULT_FLAG_TRIED;
/*
* No need to mmap_read_unlock(mm) as we would
* have already released it in __lock_page_or_retry
* in mm/filemap.c.
*/
goto retry;
}
mmap_read_unlock(mm);
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, addr, fault);
return;
}
return;
}
ๅฝๅ็page fault
ๆถ๏ผ้ฆๅ
่ฆvma
ๆฅfind_vma
๏ผๆฅ็่ฏฅๅฐๅๆฏgood or bad
Buddy System
:ไปฅ$2^n$่ฟ่ก็ปดๆค
slab Allocator
:
-
task struct pool
-
no fragmentation
:A 12K slab (3 pages) can store 4 3K objects
prepaging
:ๅจpage fault
ไนๅ๏ผๅฐฑๅฐๅฐๅๆ ๅฐ่ฟๅป
page size
:ๅฐฑๅไนๅ็ๅฐ่ฟ็๏ผlarge page
TLB Reach
:TLB reach = (TLB size) * (page size)
Windows XP
:working set