转自:
1. 内存分配函数
相关代码如下:
#define alloc_pages(gfp_mask, order) alloc_pages_node(numa_node_id(), gfp_mask, order)
#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)#define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0)
#define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order))#define pfn_to_page(pfn) (mem_map + ((pfn) - PHYS_PFN_OFFSET))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET)#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))#define phys_to_page(phys) (pfn_to_page(phys >> PAGE_SHIFT))
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)1)__get_free_pages实现代码如下,它返回页的虚拟地址:
- unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
- {
- struct page *page;
- /*
- * __get_free_pages() returns a 32-bit address, which cannot represent
- * a highmem page
- */
- VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
- page = alloc_pages(gfp_mask, order);
- if (!page)
- return 0;
- return (unsigned long) page_address(page);
- }
- /**
- * page_address - get the mapped virtual address of a page
- * @page: &struct page to get the virtual address of
- *
- * Returns the page's virtual address.
- */
- void *page_address(struct page *page)
- {
- unsigned long flags;
- void *ret;
- struct page_address_slot *pas;
- if (!PageHighMem(page))
- return lowmem_page_address(page);
- pas = page_slot(page);
- ret = NULL;
- spin_lock_irqsave(&pas->lock, flags);
- if (!list_empty(&pas->lh)) {
- struct page_address_map *pam;
- list_for_each_entry(pam, &pas->lh, list) {
- if (pam->page == page) {
- ret = pam->virtual;
- goto done;
- }
- }
- }
- done:
- spin_unlock_irqrestore(&pas->lock, flags);
- return ret;
- }
- static __always_inline void *lowmem_page_address(struct page *page)
- {
- return __va(PFN_PHYS(page_to_pfn(page)));
- }
2)alloc_pages_node
- static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
- unsigned int order)
- {
- /* Unknown node is current node */
- if (nid < 0)
- nid = numa_node_id();
- return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
- }
参数nid是要分配内存的 NUMA节点 ID,参数gfp_mask是 GFP_分配标志,参数order是分配内存的大小(2^order个页面).返回值是一个指向第一个(可能返回多个页)page结构的指针,失败时返回NULL。
- static inline struct page *
- __alloc_pages(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist)
- {
- return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
- }
- /*
- * This is the 'heart' of the zoned buddy allocator.
- */
- struct page *
- __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist, nodemask_t *nodemask)
- {
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
- struct zone *preferred_zone;
- struct page *page;
- int migratetype = allocflags_to_migratetype(gfp_mask);
- gfp_mask &= gfp_allowed_mask;
- lockdep_trace_alloc(gfp_mask);
- might_sleep_if(gfp_mask & __GFP_WAIT);
- if (should_fail_alloc_page(gfp_mask, order))
- return NULL;
- /*
- * Check the zones suitable for the gfp_mask contain at least one
- * valid zone. It's possible to have an empty zonelist as a result
- * of GFP_THISNODE and a memoryless node
- */
- if (unlikely(!zonelist->_zonerefs->zone))
- return NULL;
- get_mems_allowed();
- /* The preferred zone is used for statistics later */
- first_zones_zonelist(zonelist, high_zoneidx,
- nodemask ? : &cpuset_current_mems_allowed,
- &preferred_zone);
- if (!preferred_zone) {
- put_mems_allowed();
- return NULL;
- }
- /* First allocation attempt */
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
- zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
- preferred_zone, migratetype);
- if (unlikely(!page))
- page = __alloc_pages_slowpath(gfp_mask, order,
- zonelist, high_zoneidx, nodemask,
- preferred_zone, migratetype);
- put_mems_allowed();
- trace_mm_page_alloc(page, order, gfp_mask, migratetype);
- return page;
- }
其接下来的主要调用流程如下:
get_page_from_freelist->
buffered_rmqueue
3) buffered_rmqueue
从区域zone中获取一块大小为2^order的物理内存块,返回该内存块的首个页框的描述符page。
- static inline
- struct page *buffered_rmqueue(struct zone *preferred_zone,
- struct zone *zone, int order, gfp_t gfp_flags,
- int migratetype)
- {
- unsigned long flags;
- struct page *page;
- int cold = !!(gfp_flags & __GFP_COLD);
- again:
- if (likely(order == 0)) { //获取一页物理内存(2^0),从当前cpu的高速缓存内存中申请
- struct per_cpu_pages *pcp;
- struct list_head *list;
- local_irq_save(flags);
- pcp = &this_cpu_ptr(zone->pageset)->pcp; //获取zone的当前处理器的高速缓存内存描述结构指针
- list = &pcp->lists[migratetype];
- if (list_empty(list)) { //高速缓存内存为空
- pcp->count += rmqueue_bulk(zone, 0,//调用此函数从伙伴系统中分配batch空闲内存到高速缓存内存中
- pcp->batch, list,
- migratetype, cold);
- if (unlikely(list_empty(list)))
- goto failed;
- }
- //我们从pcp->list链表开始的第一个lru起,去寻找相应的struct page结构体
- if (cold)
- page = list_entry(list->prev, struct page, lru);
- else
- page = list_entry(list->next, struct page, lru);
- //由于被分配出去了,所以高速缓存内存中不再包含这页内存,所以从链表里删除这一项。
- list_del(&page->lru);
- pcp->count--; //相应的当前页数也要减少
- } else { //获取一块物理内存(2^order)
- if (unlikely(gfp_flags & __GFP_NOFAIL)) {
- /*
- * __GFP_NOFAIL is not to be used in new code.
- *
- * All __GFP_NOFAIL callers should be fixed so that they
- * properly detect and handle allocation failures.
- *
- * We most definitely don't want callers attempting to
- * allocate greater than order-1 page units with
- * __GFP_NOFAIL.
- */
- WARN_ON_ONCE(order > 1);
- }
- spin_lock_irqsave(&zone->lock, flags);
- page = __rmqueue(zone, order, migratetype); //调用函数申请内存
- spin_unlock(&zone->lock);
- if (!page)
- goto failed;
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
- }
- __count_zone_vm_events(PGALLOC, zone, 1 << order);
- zone_statistics(preferred_zone, zone, gfp_flags);
- local_irq_restore(flags);
- VM_BUG_ON(bad_range(zone, page));
- if (prep_new_page(page, order, gfp_flags))
- goto again;
- return page; //返回申请到的内存空间的首页内存页的struct page结构指针
- failed:
- local_irq_restore(flags);
- return NULL;
- }
4) rmqueue_bulk
用于多次(count)内存申请.
- /*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency. Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
- */
- static int rmqueue_bulk(struct zone *zone, unsigned int order,
- unsigned long count, struct list_head *list,
- int migratetype, int cold)
- {
- int i;
- spin_lock(&zone->lock);
- for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order, migratetype);
- if (unlikely(page == NULL))
- break;
- /*
- * Split buddy pages returned by expand() are received here
- * in physical page order. The page is added to the callers and
- * list and the list head then moves forward. From the callers
- * perspective, the linked list is ordered by page number in
- * some conditions. This is useful for IO devices that can
- * merge IO requests if the physical pages are ordered
- * properly.
- */
- if (likely(cold == 0))
- list_add(&page->lru, list);
- else
- list_add_tail(&page->lru, list);
- set_page_private(page, migratetype);
- list = &page->lru;
- }
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock(&zone->lock);
- return i;
- }
5) __rmqueue
用于一次内存申请。
- /*
- * Do the hard work of removing an element from the buddy allocator.
- * Call me with the zone->lock already held.
- */
- static struct page *__rmqueue(struct zone *zone, unsigned int order,
- int migratetype)
- {
- struct page *page;
- retry_reserve:
- page = __rmqueue_smallest(zone, order, migratetype);
- if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
- page = __rmqueue_fallback(zone, order, migratetype);
- /*
- * Use MIGRATE_RESERVE rather than fail an allocation. goto
- * is used because __rmqueue_smallest is an inline function
- * and we want just one call site
- */
- if (!page) {
- migratetype = MIGRATE_RESERVE;
- goto retry_reserve;
- }
- }
- trace_mm_page_alloc_zone_locked(page, order, migratetype);
- return page;
- }
2. 内存释放函数
相关宏定义如下:
-
- #define __free_page(page) __free_pages((page), 0)
- #define free_page(addr) free_pages((addr),0)