I add below a ~100-line code. When compiling the code with
g++ -march=pentium4 -O3 -g -S allocator.cc
(g++ is version 3.4.2)
I get this assembly code:
.loc 1 50 0
jne .L2
.loc 1 93 0
movl 4(%ecx), %ebx // ebx now holds page->prev
movl (%ecx), %eax // eax now holds page->next
.loc 1 97 0
movl _ZN8PagePool9free_listE, %edx // edx now holds free_list
.loc 1 93 0
movl %eax, (%ebx) // page->prev->next = page->next
.loc 1 97 0
movl %edx, (%ecx) // page->next = free_list->next
.loc 1 98 0
movl %ecx, _ZN8PagePool9free_listE // free_list->next = page
.loc 1 94 0
movl (%ecx), %esi // esi now holds page->next
// (BUG: This is actually free_list->next)
movl %ebx, 4(%esi) // page->next->prev =
page->prev
// (BUG: this is actually
// page->next->prev = free_list->next)
The optimization that moved the assembly of line 97 to be before line 94 causes
incorrect value into 4(%esi) (= page->next->prev)
The C++ code:
uncommneting the printf "fixes" this problem since apparently it prevent this
optimization from taking place
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
const size_t granularity_bits = 2;
const size_t granularity = 1 << granularity_bits;
const size_t page_size_bits = 12;
const size_t page_size = 1 << page_size_bits;
const size_t metapage_size_bits = 8;
const size_t metapage_size = 1 << metapage_size_bits;
size_t handled_obj_size(size_t page_free_size) {
return page_free_size;
}
template<class T>
inline T align_down(T val, size_t alignment) {
return val & ~(alignment - 1);
}
// A node of the free list
struct Header {
Header* next;
Header() : next(0) {}
bool is_empty() const { return next == 0; }
void enqueue(Header* obj) {
obj->next = next;
next = obj;
}
};
struct Page {
// The list of pages of the same object-size
Page* prev;
Page* next;
// sizes
size_t alloc_size;
size_t alloc_count;
// List of free slabs in this page
Header free_list;
// pointer to the first unallocated slab
void* unallocated;
Page() : prev(0), next(0), free_list() { }
bool is_empty() const { return (next == this); }
bool is_page_full() const { return free_list.is_empty() && (unallocated ==
0); }
bool is_page_empty() const { return alloc_count == 0; }
void free(void* obj) {
--alloc_count;
free_list.enqueue((Header*) obj);
}
bool is_big_alloc() {
return alloc_size == 0;
}
void* big_free() {
return unallocated;
}
};
struct PagePool {
static Header free_list;
static void free(Page* page) {
free_list.enqueue((Header*) page);
}
};
Header PagePool::free_list;
const size_t num_sizes = page_size / granularity;
const size_t page_free_space = page_size - sizeof(Page);
Page page_lists[num_sizes];
void operator delete(void* ptr) {
if(ptr == 0) {
return;
}
Page* page = (Page*) align_down((ptrdiff_t) ptr, page_size);
if(page->is_big_alloc()) {
void* place = page->big_free();
free(place);
return;
}
if(page->is_page_full()) {
} else {
page->free(ptr);
if(page->is_page_empty()) {
page->next->prev = page->prev;
page->prev->next = page->next;
// fprintf(stderr, "ddd after 3 pnp: %p, ppn: %p \n", page->next->prev,
page->prev->next);
((Header*)page)->next = PagePool::free_list.next;
PagePool::free_list.next = (Header*)page;
}
}
}
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--
Summary: Bad registers optimization
Product: gcc
Version: 3.4.2
Status: UNCONFIRMED
Severity: critical
Priority: P2
Component: c++
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: hayim at dv-networks dot com
CC: gcc-bugs at gcc dot gnu dot org
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=22190